├── LICENSE
├── README.md
├── notebooks
    └── generate_submissions.ipynb
├── requirements.txt
└── src
    ├── config
        ├── __init__.py
        ├── config.py
        └── en_config.py
    ├── data_process
        └── create_split.py
    ├── dataset
        ├── __init__.py
        └── landmark_dataset.py
    ├── layers
        ├── __init__.py
        ├── backbone
        │   ├── __init__.py
        │   ├── efficientnet_pytorch.py
        │   ├── efficientnet_pytorch_utils.py
        │   └── resnet.py
        ├── loss.py
        ├── metric_learning.py
        ├── normalization.py
        ├── pooling.py
        ├── scheduler.py
        ├── scheduler_base.py
        └── tool.py
    ├── networks
        ├── __init__.py
        ├── efficientnet_gem_fc_face.py
        ├── imageclsnet.py
        ├── resnet_gem_fc_face.py
        └── superpointglue
        │   ├── __init__.py
        │   ├── matching.py
        │   ├── superglue.py
        │   └── superpoint.py
    ├── train.py
    └── utilities
        ├── __init__.py
        ├── augment_rand.py
        ├── augment_util.py
        ├── common_util.py
        ├── metric_util.py
        ├── model_util.py
        ├── superpointglue_util.py
        └── vectors_utils.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Instance Level Recognition
 2 | 
 3 | This repository contains the code for the 2nd place solution to the 2020 edition of the Landmark Recognition competition hosted by Google on Kaggle: 
 4 | 
 5 | https://www.kaggle.com/c/landmark-recognition-2020/leaderboard
 6 | 
 7 | The full solution is described here
 8 | 
 9 | https://www.kaggle.com/c/landmark-recognition-2020/discussion/188299
10 | 
11 | ## Definition
12 | v2c(cleaned GLDv2), there are 1.6 million training images and 81k classes. All landmark test images belong to these classes.
13 | 
14 | v2x,in GLDv2, there are 3.2 million images belong to the 81k classes in v2c. I define these 3.2m images as v2x.
15 | 
16 | ## Data preparation
17 | 1.Please config your local directory in CODE_DIR/src/config/config.py
18 | 
19 | 2.Download Google Landmarks Dataset v2 train,test,index from https://github.com/cvdfoundation/google-landmark ,unpack them to DATA_DIR/images
20 | 
21 | 3.Move train.csv,train_clean.csv to DATA_DIR/raw (provided by Kaggle, is not included in my solution file)
22 | 
23 | 4.Download superpoint superglue models from https://github.com/magicleap/SuperPointPretrainedNetwork and https://github.com/magicleap/SuperGluePretrainedNetwork
24 | 
25 | 5.Create split file:
26 | ```
27 | python create_split.py
28 | ```
29 | 
30 | ## Training retrieval models
31 | ### 1. Train EfficientNet B7
32 | 
33 | 1.1 Train EfficientNet B7 v2c 448x448 model
34 | ```
35 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b7_gem_fc_arcface2_1head --save_every_epoch 0.1 --epochs 7 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2c_sgd_ls_aug1_norm1_0907_class_efficientnet_b7_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2c --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 4 --distributed 1 --preprocessing 1
36 | ```
37 | 
38 | 1.2. Train efficientnet_b7 v2x 448x448 model
39 | ```
40 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b7_gem_fc_arcface2_1head --save_every_epoch 0.1 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2x_sgd_ls_aug3b_norm1_0918_class_efficientnet_b7_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2x --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 4 --distributed 1 --preprocessing 1 --model_file RESULT_DIR/models/v2c_sgd_ls_aug1_norm1_0907_class_efficientnet_b7_gem_fc_arcface2_1head_i448/6.70.pth
41 | ```
42 | 
43 | ### 2. Train efficientnet_b6 
44 | 
45 | 2.1 Train efficientnet_b6 v2c 448x448 model
46 | ```
47 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b6_gem_fc_arcface2_1head --save_every_epoch 0.1 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2c_sgd_ls_aug3b_norm1_0919_class_efficientnet_b6_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2c --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 6 --distributed 1 --preprocessing 1
48 | ```
49 | 
50 | 2.2 Train efficientnet_b6 v2x 448x448 model
51 | ```
52 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b6_gem_fc_arcface2_1head --save_every_epoch 0.1 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2x_sgd_ls_aug3b_norm1_0919_class_efficientnet_b6_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2x --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 6 --distributed 1 --preprocessing 1 --model_file RESULT_DIR/models/v2c_sgd_ls_aug3b_norm1_0919_class_efficientnet_b6_gem_fc_arcface2_1head_i448/6.00.pth
53 | ```
54 | 
55 | ### 3 Train efficientnet_b5
56 | 
57 | 3.1 Train efficientnet_b5 v2c 448x448 model
58 | ```
59 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b5_gem_fc_arcface_1head --save_every_epoch 0.1 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2c_sgd_ls_aug3b_norm1_0918_class_efficientnet_b5_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2c --num_classes 81313 --gpu_id 0,1,2,3 --distributed 1 --preprocessing 1  --batch_size 8
60 | ```
61 | 
62 | 3.2 Train efficientnet_b5 v2x 448x448 model
63 | ```
64 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b5_gem_fc_arcface_1head --save_every_epoch 0.2 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2x_sgd_ls_aug3b_norm1_0918_class_efficientnet_b5_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2x --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 8 --distributed 1 --preprocessing 1 --model_file RESULT_DIR/models/v2c_sgd_ls_aug3b_norm1_0918_class_efficientnet_b5_gem_fc_arcface2_1head_i448/8.20.pth
65 | ```
66 | 
67 | ### 4. Train resnet152
68 | 
69 | 4.1 Train resnet152 v2c 448x448 model
70 | ```
71 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_resnet152_gem_fc_arcface_1head --save_every_epoch 0.2 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2c_sgd_ls_aug3b_norm1_0919_class_resnet152_gem_fc_arcface_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2c --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 10 --distributed 1 --preprocessing 1
72 | ```
73 | 
74 | 4.2 Train resnet152 v2x 448x448 model
75 | ```
76 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_resnet152_gem_fc_arcface_1head --save_every_epoch 0.2 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2x_sgd_ls_aug3b_norm1_0919_class_resnet152_gem_fc_arcface_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2x --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 10 --distributed 1 --preprocessing 1 --model_file RESULT_DIR/models/v2c_sgd_ls_aug3b_norm1_0919_class_resnet152_gem_fc_arcface_1head_i448/7.40.pth
77 | ```
78 | 
79 | ## Generate submission
80 | Generate submission is detailed in notebooks/generate_submissions.ipynb.


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | tqdm==4.15.0
 2 | numpy==1.19.1
 3 | pandas==0.23.1
 4 | scipy==1.5.0
 5 | torch==1.6.0
 6 | torchvision==0.7.0
 7 | mlcrate==0.2.0
 8 | scikit-image==0.15.0
 9 | scikit-learn==0.23.1
10 | pydegensac=0.1.2
11 | python=3.6.10
12 | faiss-cpu=1.6.3


--------------------------------------------------------------------------------
/src/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Module to extract deep local features."""
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | # pylint: disable=unused-import
21 | # from delf.protos import aggregation_config_pb2
22 | # from delf.protos import box_pb2
23 | # from delf.protos import datum_pb2
24 | # from delf.protos import delf_config_pb2
25 | # from delf.protos import feature_pb2
26 | # from delf.python import box_io
27 | # from delf.python import datum_io
28 | # from delf.python import feature_aggregation_extractor
29 | # from delf.python import feature_aggregation_similarity
30 | # from delf.python import feature_extractor
31 | # from delf.python import feature_io
32 | # from delf.python import utils
33 | # from delf.python.examples import detector
34 | # from delf.python.examples import extractor
35 | # from delf.python import detect_to_retrieve
36 | # from delf.python import training
37 | # from delf.python.training import model
38 | # from delf.python.training import datasets
39 | # pylint: enable=unused-import
40 | 


--------------------------------------------------------------------------------
/src/config/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | ope = os.path.exists
 3 | opj = os.path.join
 4 | import numpy as np
 5 | import socket
 6 | import warnings
 7 | warnings.filterwarnings('ignore')
 8 | 
 9 | sk = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
10 | hostname = socket.gethostname()
11 | 
12 | RESULT_DIR     = '/data4/data/retrieval2020/result'
13 | DATA_DIR       = '/data5/data/landmark2020'
14 | PRETRAINED_DIR = '/data5/data/pretrained'
15 | 
16 | PI  = np.pi
17 | INF = np.inf
18 | EPS = 1e-12
19 | NUM_CLASSES = 81313
20 | 
21 | ID      = 'id'
22 | TARGET  = 'landmark_id'
23 | CLUSTER = 'cluster'
24 | SCALE   = 'scale'
25 | CTARGET = 'landmarks'
26 | 


--------------------------------------------------------------------------------
/src/config/en_config.py:
--------------------------------------------------------------------------------
 1 | en_m4_b7_b6_b5_r152_i800 = [
 2 |   # b7,
 3 |   {
 4 |     'is_20191st': False,
 5 |     'module': 'efficientnet_gem_fc_face',
 6 |     'model_name': 'class_efficientnet_b7_gem_fc_arcface2_1head',
 7 |     'out_dir': 'v2x_sgd_ls_aug3b_norm1_0918_class_efficientnet_b7_gem_fc_arcface2_1head_i736',
 8 |     'predict_epoch': '26.80',
 9 |     'img_size': 800,
10 |     'batch_size': 4,
11 |     'num_classes': 81313,
12 |     'in_channels': 3,
13 |     'preprocessing': True,
14 |     'weight': 0.6,
15 |   },
16 |   {
17 |     'is_20191st': False,
18 |     'module': 'efficientnet_gem_fc_face',
19 |     'model_name': 'class_efficientnet_b6_gem_fc_arcface2_1head',
20 |     'out_dir': 'v2x_sgd_ls_aug3b_norm1_0919_class_efficientnet_b6_gem_fc_arcface2_1head_i736',
21 |     'predict_epoch': '21.70',
22 |     'img_size': 800,
23 |     'batch_size': 4,
24 |     'num_classes': 81313,
25 |     'in_channels': 3,
26 |     'preprocessing': True,
27 |     'weight': 0.2,
28 |   },
29 |   {
30 |     'is_20191st': False,
31 |     'module': 'efficientnet_gem_fc_face',
32 |     'model_name': 'class_efficientnet_b5_gem_fc_arcface2_1head',
33 |     'out_dir': 'v2x_sgd_ls_aug3b_norm1_0918_class_efficientnet_b5_gem_fc_arcface2_1head_i736',
34 |     'predict_epoch': '19.30',
35 |     'img_size': 800,
36 |     'batch_size': 4,
37 |     'num_classes': 81313,
38 |     'in_channels': 3,
39 |     'preprocessing': True,
40 |     'weight': 0.1,
41 |   },
42 |   {
43 |     'is_20191st': False,
44 |     'module': 'resnet_gem_fc_face',
45 |     'model_name': 'class_resnet152_gem_fc_arcface_1head',
46 |     'out_dir': 'v2x_sgd_ls_aug3b_norm1_0919_class_resnet152_gem_fc_arcface_1head_i736',
47 |     'predict_epoch': '17.90',
48 |     'img_size': 800,
49 |     'batch_size': 4,
50 |     'num_classes': 81313,
51 |     'in_channels': 3,
52 |     'preprocessing': True,
53 |     'weight': 0.1,
54 |   },
55 | ]


--------------------------------------------------------------------------------
/src/data_process/create_split.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0, '..')
 3 | import pandas as pd
 4 | from tqdm import tqdm
 5 | from config.config import *
 6 | 
 7 | def create_whole_train_split(train_meta, split_name):
 8 |   train_meta = train_meta.copy()
 9 |   split_dir = f'{DATA_DIR}/split/{split_name}'
10 |   os.makedirs(split_dir, exist_ok=True)
11 | 
12 |   print('train nums: %s' % train_meta.shape[0])
13 |   print('train label nums: %s' % train_meta[TARGET].nunique())
14 |   train_meta['count'] = train_meta.groupby([TARGET])[ID].transform('count')
15 |   litter_image_df = train_meta[train_meta['count'] < 200]
16 |   train_rest_meta = train_meta[~train_meta[ID].isin(litter_image_df[ID].values)].reset_index(drop=True)
17 | 
18 |   idx = 0
19 |   valid_indices = np.random.choice(len(train_rest_meta), 200, replace=False)
20 |   valid_split_df = train_rest_meta.loc[valid_indices]
21 |   train_indices = ~train_meta[ID].isin(valid_split_df[ID].values)
22 |   train_split_df = train_rest_meta[train_indices]
23 |   train_split_df = pd.concat((train_split_df, litter_image_df), ignore_index=True)
24 | 
25 |   fname = f'{split_dir}/random_train_cv{idx}.csv'
26 |   print("train: create split file: %s; "% (fname))
27 |   print(('nums: %d; label nums: %d; max label: %s')%
28 |       (train_split_df.shape[0],train_split_df[TARGET].nunique(),train_split_df[TARGET].max()))
29 |   train_split_df.to_csv(fname, index=False)
30 |   print(train_split_df.head())
31 | 
32 |   fname = f'{split_dir}/random_valid_cv{idx}.csv'
33 |   print("valid: create split file: %s; "% (fname))
34 |   print(('nums: %d; label nums: %d; max label: %s') %
35 |         (valid_split_df.shape[0],valid_split_df[TARGET].nunique(),valid_split_df[TARGET].max()))
36 |   valid_split_df.to_csv(fname, index=False)
37 |   print(valid_split_df.head())
38 | 
39 | def create_v2x_split():
40 |   train_clean_df = pd.read_csv(f'{DATA_DIR}/raw/train_clean.csv', usecols=[TARGET])
41 |   train_df = pd.read_csv(f'{DATA_DIR}/raw/train.csv', usecols=[ID, TARGET])
42 |   train_df = train_df[train_df[TARGET].isin(train_clean_df[TARGET].unique())]
43 | 
44 |   landmark_mapping = {l: i for i, l in enumerate(np.sort(train_df[TARGET].unique()))}
45 |   train_df[TARGET] = train_df[TARGET].map(landmark_mapping)
46 | 
47 |   idx = 0
48 |   train_split_df = pd.read_csv(f'{DATA_DIR}/split/v2c/random_train_cv{idx}.csv')
49 |   valid_split_df = pd.read_csv(f'{DATA_DIR}/split/v2c/random_valid_cv{idx}.csv')
50 |   _train_df = train_df.set_index(ID)
51 |   assert np.array_equal(_train_df.loc[train_split_df[ID].values, TARGET], train_split_df[TARGET])
52 |   assert np.array_equal(_train_df.loc[valid_split_df[ID].values, TARGET], valid_split_df[TARGET])
53 |   del _train_df
54 | 
55 |   train_df = train_df[~train_df[ID].isin(valid_split_df[ID])]
56 |   train_split_df = pd.merge(train_df, train_split_df, on=[ID, TARGET], how='left')
57 | 
58 |   split_dir = f'{DATA_DIR}/split/v2x'
59 |   os.makedirs(split_dir, exist_ok=True)
60 | 
61 |   fname = f'{split_dir}/random_train_cv{idx}.csv'
62 |   print("train: create split file: %s; "% (fname))
63 |   print(('nums: %d; label nums: %d') % (train_split_df.shape[0], train_split_df[TARGET].nunique()))
64 |   train_split_df.to_csv(fname, index=False)
65 |   print(train_split_df.head())
66 | 
67 |   fname = f'{split_dir}/random_valid_cv{idx}.csv'
68 |   print("valid: create split file: %s; "% (fname))
69 |   print(('nums: %d; label nums: %d') % (valid_split_df.shape[0], valid_split_df[TARGET].nunique()))
70 |   valid_split_df.to_csv(fname, index=False)
71 |   print(valid_split_df.head())
72 | 
73 | if __name__ == "__main__":
74 |   print('%s: calling main function ... ' % os.path.basename(__file__))
75 |   train_clean_df = pd.read_csv(f'{DATA_DIR}/raw/train_clean.csv')
76 |   train_clean_df['count'] = [len(row.split(' ')) for row in train_clean_df['images'].values]
77 |   train_clean_df[CTARGET] = train_clean_df[TARGET]
78 |   train_clean_df[TARGET] = range(len(train_clean_df))
79 |   images = []
80 |   for _, row in tqdm(train_clean_df.iterrows(), total=len(train_clean_df)):
81 |     label = row[TARGET]
82 |     old_label = row[CTARGET]
83 |     for file_id in row['images'].split(' '):
84 |       images.append((file_id, label, old_label))
85 | 
86 |   dataset_df = pd.DataFrame(data=images, columns=[ID, TARGET, CTARGET])
87 |   dataset_df = dataset_df.sample(len(dataset_df), replace=False, random_state=100).reset_index(drop=True)
88 |   dataset_df.to_csv(f'{DATA_DIR}/split/train2020.csv', index=False)
89 | 
90 |   create_whole_train_split(dataset_df, split_name='v2c')
91 |   create_v2x_split()
92 | 


--------------------------------------------------------------------------------
/src/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Module to extract deep local features."""
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | # pylint: disable=unused-import
21 | # from delf.protos import aggregation_config_pb2
22 | # from delf.protos import box_pb2
23 | # from delf.protos import datum_pb2
24 | # from delf.protos import delf_config_pb2
25 | # from delf.protos import feature_pb2
26 | # from delf.python import box_io
27 | # from delf.python import datum_io
28 | # from delf.python import feature_aggregation_extractor
29 | # from delf.python import feature_aggregation_similarity
30 | # from delf.python import feature_extractor
31 | # from delf.python import feature_io
32 | # from delf.python import utils
33 | # from delf.python.examples import detector
34 | # from delf.python.examples import extractor
35 | # from delf.python import detect_to_retrieve
36 | # from delf.python import training
37 | # from delf.python.training import model
38 | # from delf.python.training import datasets
39 | # pylint: enable=unused-import
40 | 


--------------------------------------------------------------------------------
/src/dataset/landmark_dataset.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.insert(0, '..')
  3 | import torch
  4 | from torch.utils.data import Dataset
  5 | import pandas as pd
  6 | from albumentations import Normalize
  7 | 
  8 | from utilities.augment_util import *
  9 | 
 10 | class RetrievalDataset(Dataset):
 11 |   def __init__(self, args, split_file, transform, data_type='train'):
 12 |     self.args = args
 13 |     self.img_size = (args.img_size, args.img_size)
 14 |     self.transform = transform
 15 |     self.is_train = data_type == 'train'
 16 | 
 17 |     df = pd.read_csv(split_file)
 18 |     self.df = df
 19 |     if data_type == 'valid':
 20 |         self.df = self.df[:200]
 21 | 
 22 |     img_dir = f'{DATA_DIR}/images/train'
 23 |     self.do_print('img_dir %s' % img_dir)
 24 |     self.img_dir = img_dir
 25 | 
 26 |     if self.is_train:
 27 |       self.df = self.df.sample(len(self.df), replace=False).reset_index(drop=True)
 28 |       dataset_df = self.df
 29 |     else:
 30 |       dataset_df = self.df
 31 | 
 32 |     self.dataset_df = dataset_df
 33 |     self.do_resample()
 34 | 
 35 |   def do_resample(self):
 36 |     dataset_df = self.dataset_df
 37 |     self.x = dataset_df[ID].values
 38 |     self.y = dataset_df[TARGET].values
 39 | 
 40 |   def do_print(self, content):
 41 |     if self.args.can_print:
 42 |       print(content)
 43 | 
 44 |   def __len__(self):
 45 |     return len(self.x)
 46 | 
 47 |   def get_batch_images(self, idx, img_id, label):
 48 |     x = [img_id]
 49 |     y = [label]
 50 |     return x, y
 51 | 
 52 |   def __getitem__(self, idx):
 53 |     img_id = self.x[idx]
 54 |     label = self.y[idx]
 55 | 
 56 |     x, y = self.get_batch_images(idx, img_id, label)
 57 |     images = []
 58 |     for file_name in x:
 59 |       img_dir = self.img_dir
 60 |       boxes = None
 61 |       img_fname = f'{img_dir}/{file_name}.jpg'
 62 |       if not os.path.exists(img_fname):
 63 |         img_fname = f'{DATA_DIR}/images/test/{file_name}.jpg'
 64 |       image = cv2.imread(img_fname)
 65 |       image = image[..., ::-1]
 66 |       if self.transform is not None:
 67 |         image = self.transform(image, img_size=self.img_size, boxes=boxes)
 68 |       if image.shape[:2] != self.img_size:
 69 |         image = cv2.resize(image, self.img_size)
 70 | 
 71 |       if self.args.preprocessing == 1:
 72 |         norm = Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0)
 73 |         image = norm(image=image)['image']
 74 |       else:
 75 |         image = image / 255.0
 76 |       image = np.transpose(image, (2, 0, 1))
 77 |       image = torch.from_numpy(image).float()
 78 |       images.append(image)
 79 |     return images, y
 80 | 
 81 |   def on_epoch_end(self):
 82 |     if self.is_train:
 83 |       self.do_resample()
 84 |       idxes = np.random.choice(len(self.y), len(self.y), replace=False)
 85 |       self.x = np.array(self.x)[idxes]
 86 |       self.y = np.array(self.y)[idxes]
 87 | 
 88 | def image_collate(batch):
 89 |   batch_size = len(batch)
 90 |   images = []
 91 |   labels = []
 92 |   for b in range(batch_size):
 93 |     if batch[b][0] is None:
 94 |       continue
 95 |     else:
 96 |       images.extend(batch[b][0])
 97 |       labels.extend(batch[b][1])
 98 |   images = torch.stack(images, 0)
 99 |   labels = torch.from_numpy(np.array(labels))
100 |   return images, labels
101 | 


--------------------------------------------------------------------------------
/src/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bestfitting/instance_level_recognition/683f021b4e65876835f028797ec28b0d1071bb45/src/layers/__init__.py


--------------------------------------------------------------------------------
/src/layers/backbone/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bestfitting/instance_level_recognition/683f021b4e65876835f028797ec28b0d1071bb45/src/layers/backbone/__init__.py


--------------------------------------------------------------------------------
/src/layers/backbone/efficientnet_pytorch.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch.nn import functional as F
  4 | 
  5 | from .efficientnet_pytorch_utils import (
  6 |   round_filters,
  7 |   round_repeats,
  8 |   drop_connect,
  9 |   get_same_padding_conv2d,
 10 |   get_model_params,
 11 |   efficientnet_params,
 12 |   load_pretrained_weights,
 13 |   Swish,
 14 |   MemoryEfficientSwish,
 15 | )
 16 | 
 17 | class MBConvBlock(nn.Module):
 18 |   """
 19 |   Mobile Inverted Residual Bottleneck Block
 20 | 
 21 |   Args:
 22 |       block_args (namedtuple): BlockArgs, see above
 23 |       global_params (namedtuple): GlobalParam, see above
 24 | 
 25 |   Attributes:
 26 |       has_se (bool): Whether the block contains a Squeeze and Excitation layer.
 27 |   """
 28 | 
 29 |   def __init__(self, block_args, global_params):
 30 |     super().__init__()
 31 |     self._block_args = block_args
 32 |     self._bn_mom = 1 - global_params.batch_norm_momentum
 33 |     self._bn_eps = global_params.batch_norm_epsilon
 34 |     self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
 35 |     self.id_skip = block_args.id_skip  # skip connection and drop connect
 36 | 
 37 |     # Get static or dynamic convolution depending on image size
 38 |     Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)
 39 | 
 40 |     # Expansion phase
 41 |     inp = self._block_args.input_filters  # number of input channels
 42 |     oup = self._block_args.input_filters * self._block_args.expand_ratio  # number of output channels
 43 |     if self._block_args.expand_ratio != 1:
 44 |       self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
 45 |       self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
 46 | 
 47 |     # Depthwise convolution phase
 48 |     k = self._block_args.kernel_size
 49 |     s = self._block_args.stride
 50 |     self._depthwise_conv = Conv2d(
 51 |       in_channels=oup, out_channels=oup, groups=oup,  # groups makes it depthwise
 52 |       kernel_size=k, stride=s, bias=False)
 53 |     self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
 54 | 
 55 |     # Squeeze and Excitation layer, if desired
 56 |     if self.has_se:
 57 |       num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
 58 |       self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
 59 |       self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
 60 | 
 61 |     # Output phase
 62 |     final_oup = self._block_args.output_filters
 63 |     self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
 64 |     self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
 65 |     self._swish = MemoryEfficientSwish()
 66 | 
 67 |   def forward(self, inputs, drop_connect_rate=None):
 68 |     """
 69 |     :param inputs: input tensor
 70 |     :param drop_connect_rate: drop connect rate (float, between 0 and 1)
 71 |     :return: output of block
 72 |     """
 73 | 
 74 |     # Expansion and Depthwise Convolution
 75 |     x = inputs
 76 |     if self._block_args.expand_ratio != 1:
 77 |       x = self._swish(self._bn0(self._expand_conv(inputs)))
 78 |     x = self._swish(self._bn1(self._depthwise_conv(x)))
 79 | 
 80 |     # Squeeze and Excitation
 81 |     if self.has_se:
 82 |       x_squeezed = F.adaptive_avg_pool2d(x, 1)
 83 |       x_squeezed = self._se_expand(self._swish(self._se_reduce(x_squeezed)))
 84 |       x = torch.sigmoid(x_squeezed) * x
 85 | 
 86 |     x = self._bn2(self._project_conv(x))
 87 | 
 88 |     # Skip connection and drop connect
 89 |     input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
 90 |     if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
 91 |       if drop_connect_rate:
 92 |         x = drop_connect(x, p=drop_connect_rate, training=self.training)
 93 |       x = x + inputs  # skip connection
 94 |     return x
 95 | 
 96 |   def set_swish(self, memory_efficient=True):
 97 |     """Sets swish function as memory efficient (for training) or standard (for export)"""
 98 |     self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
 99 | 
100 | 
101 | class EfficientNet(nn.Module):
102 |   """
103 |   An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods
104 | 
105 |   Args:
106 |       blocks_args (list): A list of BlockArgs to construct blocks
107 |       global_params (namedtuple): A set of GlobalParams shared between blocks
108 | 
109 |   Example:
110 |       model = EfficientNet.from_pretrained('efficientnet-b0')
111 | 
112 |   """
113 | 
114 |   def __init__(self, blocks_args=None, global_params=None):
115 |     super().__init__()
116 |     assert isinstance(blocks_args, list), 'blocks_args should be a list'
117 |     assert len(blocks_args) > 0, 'block args must be greater than 0'
118 |     self._global_params = global_params
119 |     self._blocks_args = blocks_args
120 | 
121 |     # Get static or dynamic convolution depending on image size
122 |     Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)
123 | 
124 |     # Batch norm parameters
125 |     bn_mom = 1 - self._global_params.batch_norm_momentum
126 |     bn_eps = self._global_params.batch_norm_epsilon
127 | 
128 |     # Stem
129 |     in_channels = 3  # rgb
130 |     out_channels = round_filters(32, self._global_params)  # number of output channels
131 |     self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
132 |     self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
133 | 
134 |     # Build blocks
135 |     self._blocks = nn.ModuleList([])
136 |     for block_args in self._blocks_args:
137 | 
138 |       # Update block input and output filters based on depth multiplier.
139 |       block_args = block_args._replace(
140 |         input_filters=round_filters(block_args.input_filters, self._global_params),
141 |         output_filters=round_filters(block_args.output_filters, self._global_params),
142 |         num_repeat=round_repeats(block_args.num_repeat, self._global_params)
143 |       )
144 | 
145 |       # The first block needs to take care of stride and filter size increase.
146 |       self._blocks.append(MBConvBlock(block_args, self._global_params))
147 |       if block_args.num_repeat > 1:
148 |         block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
149 |       for _ in range(block_args.num_repeat - 1):
150 |         self._blocks.append(MBConvBlock(block_args, self._global_params))
151 | 
152 |     # Head
153 |     in_channels = block_args.output_filters  # output of final block
154 |     out_channels = round_filters(1280, self._global_params)
155 |     self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
156 |     self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
157 | 
158 |     # Final linear layer
159 |     self._avg_pooling = nn.AdaptiveAvgPool2d(1)
160 |     self._dropout = nn.Dropout(self._global_params.dropout_rate)
161 |     self._fc = nn.Linear(out_channels, self._global_params.num_classes)
162 |     self._swish = MemoryEfficientSwish()
163 | 
164 |   def set_swish(self, memory_efficient=True):
165 |     """Sets swish function as memory efficient (for training) or standard (for export)"""
166 |     self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
167 |     for block in self._blocks:
168 |       block.set_swish(memory_efficient)
169 | 
170 |   def extract_features(self, inputs):
171 |     """ Returns output of the final convolution layer """
172 | 
173 |     # Stem
174 |     x = self._swish(self._bn0(self._conv_stem(inputs)))
175 | 
176 |     # Blocks
177 |     for idx, block in enumerate(self._blocks):
178 |       drop_connect_rate = self._global_params.drop_connect_rate
179 |       if drop_connect_rate:
180 |         drop_connect_rate *= float(idx) / len(self._blocks)
181 |       x = block(x, drop_connect_rate=drop_connect_rate)
182 | 
183 |     # Head
184 |     x = self._swish(self._bn1(self._conv_head(x)))
185 | 
186 |     return x
187 | 
188 |   def forward(self, inputs):
189 |     """ Calls extract_features to extract features, applies final linear layer, and returns logits. """
190 |     bs = inputs.size(0)
191 |     # Convolution layers
192 |     x = self.extract_features(inputs)
193 | 
194 |     # Pooling and final linear layer
195 |     x = self._avg_pooling(x)
196 |     x = x.view(bs, -1)
197 |     x = self._dropout(x)
198 |     x = self._fc(x)
199 |     return x
200 | 
201 |   @classmethod
202 |   def from_name(cls, model_name, override_params=None):
203 |     cls._check_model_name_is_valid(model_name)
204 |     blocks_args, global_params = get_model_params(model_name, override_params)
205 |     return cls(blocks_args, global_params)
206 | 
207 |   @classmethod
208 |   def from_pretrained(cls, model_name, model_dir, num_classes=1000, in_channels=3, can_print=False):
209 |     model = cls.from_name(model_name, override_params={'num_classes': num_classes})
210 |     load_pretrained_weights(model, model_name, model_dir, load_fc=(num_classes == 1000), can_print=can_print)
211 |     if in_channels != 3:
212 |       Conv2d = get_same_padding_conv2d(image_size=model._global_params.image_size)
213 |       out_channels = round_filters(32, model._global_params)
214 |       model._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
215 |     return model
216 | 
217 |   @classmethod
218 |   def from_pretrained(cls, model_name, model_dir, num_classes=1000, can_print=False):
219 |     model = cls.from_name(model_name, override_params={'num_classes': num_classes})
220 |     load_pretrained_weights(model, model_name, model_dir, load_fc=(num_classes == 1000), can_print=can_print)
221 | 
222 |     return model
223 | 
224 |   @classmethod
225 |   def get_image_size(cls, model_name):
226 |     cls._check_model_name_is_valid(model_name)
227 |     _, _, res, _ = efficientnet_params(model_name)
228 |     return res
229 | 
230 |   @classmethod
231 |   def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False):
232 |     """ Validates model name. None that pretrained weights are only available for
233 |     the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """
234 |     num_models = 4 if also_need_pretrained_weights else 8
235 |     valid_models = ['efficientnet-b' + str(i) for i in range(num_models)]
236 |     if model_name not in valid_models:
237 |       raise ValueError('model_name should be one of: ' + ', '.join(valid_models))
238 | 


--------------------------------------------------------------------------------
/src/layers/backbone/efficientnet_pytorch_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains helper functions for building the model and for loading model parameters.
  3 | These helper functions are built to mirror those in the official TensorFlow implementation.
  4 | """
  5 | 
  6 | import re
  7 | import math
  8 | import collections
  9 | from functools import partial
 10 | import torch
 11 | from torch import nn
 12 | from torch.nn import functional as F
 13 | from torch.utils import model_zoo
 14 | 
 15 | ########################################################################
 16 | ############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ###############
 17 | ########################################################################
 18 | 
 19 | 
 20 | # Parameters for the entire model (stem, all blocks, and head)
 21 | GlobalParams = collections.namedtuple('GlobalParams', [
 22 |     'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate',
 23 |     'num_classes', 'width_coefficient', 'depth_coefficient',
 24 |     'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size'])
 25 | 
 26 | # Parameters for an individual model block
 27 | BlockArgs = collections.namedtuple('BlockArgs', [
 28 |     'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
 29 |     'expand_ratio', 'id_skip', 'stride', 'se_ratio'])
 30 | 
 31 | # Change namedtuple defaults
 32 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
 33 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
 34 | 
 35 | 
 36 | class SwishImplementation(torch.autograd.Function):
 37 |     @staticmethod
 38 |     def forward(ctx, i):
 39 |         result = i * torch.sigmoid(i)
 40 |         ctx.save_for_backward(i)
 41 |         return result
 42 | 
 43 |     @staticmethod
 44 |     def backward(ctx, grad_output):
 45 |         i = ctx.saved_variables[0]
 46 |         sigmoid_i = torch.sigmoid(i)
 47 |         return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
 48 | 
 49 | 
 50 | class MemoryEfficientSwish(nn.Module):
 51 |     def forward(self, x):
 52 |         return SwishImplementation.apply(x)
 53 | 
 54 | class Swish(nn.Module):
 55 |     def forward(self, x):
 56 |         return x * torch.sigmoid(x)
 57 | 
 58 | 
 59 | def round_filters(filters, global_params):
 60 |     """ Calculate and round number of filters based on depth multiplier. """
 61 |     multiplier = global_params.width_coefficient
 62 |     if not multiplier:
 63 |         return filters
 64 |     divisor = global_params.depth_divisor
 65 |     min_depth = global_params.min_depth
 66 |     filters *= multiplier
 67 |     min_depth = min_depth or divisor
 68 |     new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
 69 |     if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
 70 |         new_filters += divisor
 71 |     return int(new_filters)
 72 | 
 73 | 
 74 | def round_repeats(repeats, global_params):
 75 |     """ Round number of filters based on depth multiplier. """
 76 |     multiplier = global_params.depth_coefficient
 77 |     if not multiplier:
 78 |         return repeats
 79 |     return int(math.ceil(multiplier * repeats))
 80 | 
 81 | 
 82 | def drop_connect(inputs, p, training):
 83 |     """ Drop connect. """
 84 |     if not training: return inputs
 85 |     batch_size = inputs.shape[0]
 86 |     keep_prob = 1 - p
 87 |     random_tensor = keep_prob
 88 |     random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
 89 |     binary_tensor = torch.floor(random_tensor)
 90 |     output = inputs / keep_prob * binary_tensor
 91 |     return output
 92 | 
 93 | 
 94 | def get_same_padding_conv2d(image_size=None):
 95 |     """ Chooses static padding if you have specified an image size, and dynamic padding otherwise.
 96 |         Static padding is necessary for ONNX exporting of models. """
 97 |     if image_size is None:
 98 |         return Conv2dDynamicSamePadding
 99 |     else:
100 |         return partial(Conv2dStaticSamePadding, image_size=image_size)
101 | 
102 | 
103 | class Conv2dDynamicSamePadding(nn.Conv2d):
104 |     """ 2D Convolutions like TensorFlow, for a dynamic image size """
105 | 
106 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
107 |         super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
108 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
109 | 
110 |     def forward(self, x):
111 |         ih, iw = x.size()[-2:]
112 |         kh, kw = self.weight.size()[-2:]
113 |         sh, sw = self.stride
114 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
115 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
116 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
117 |         if pad_h > 0 or pad_w > 0:
118 |             x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
119 |         return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
120 | 
121 | 
122 | class Conv2dStaticSamePadding(nn.Conv2d):
123 |     """ 2D Convolutions like TensorFlow, for a fixed image size"""
124 | 
125 |     def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs):
126 |         super().__init__(in_channels, out_channels, kernel_size, **kwargs)
127 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
128 | 
129 |         # Calculate padding based on image size and save it
130 |         assert image_size is not None
131 |         ih, iw = image_size if type(image_size) == list else [image_size, image_size]
132 |         kh, kw = self.weight.size()[-2:]
133 |         sh, sw = self.stride
134 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
135 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
136 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
137 |         if pad_h > 0 or pad_w > 0:
138 |             self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
139 |         else:
140 |             self.static_padding = Identity()
141 | 
142 |     def forward(self, x):
143 |         x = self.static_padding(x)
144 |         x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
145 |         return x
146 | 
147 | 
148 | class Identity(nn.Module):
149 |     def __init__(self, ):
150 |         super(Identity, self).__init__()
151 | 
152 |     def forward(self, input):
153 |         return input
154 | 
155 | 
156 | ########################################################################
157 | ############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ##############
158 | ########################################################################
159 | 
160 | 
161 | def efficientnet_params(model_name):
162 |     """ Map EfficientNet model name to parameter coefficients. """
163 |     params_dict = {
164 |         # Coefficients:   width,depth,res,dropout
165 |         'efficientnet-b0': (1.0, 1.0, 224, 0.2),
166 |         'efficientnet-b1': (1.0, 1.1, 240, 0.2),
167 |         'efficientnet-b2': (1.1, 1.2, 260, 0.3),
168 |         'efficientnet-b3': (1.2, 1.4, 300, 0.3),
169 |         'efficientnet-b4': (1.4, 1.8, 380, 0.4),
170 |         'efficientnet-b5': (1.6, 2.2, 456, 0.4),
171 |         'efficientnet-b6': (1.8, 2.6, 528, 0.5),
172 |         'efficientnet-b7': (2.0, 3.1, 600, 0.5),
173 |     }
174 |     return params_dict[model_name]
175 | 
176 | 
177 | class BlockDecoder(object):
178 |     """ Block Decoder for readability, straight from the official TensorFlow repository """
179 | 
180 |     @staticmethod
181 |     def _decode_block_string(block_string):
182 |         """ Gets a block through a string notation of arguments. """
183 |         assert isinstance(block_string, str)
184 | 
185 |         ops = block_string.split('_')
186 |         options = {}
187 |         for op in ops:
188 |             splits = re.split(r'(\d.*)', op)
189 |             if len(splits) >= 2:
190 |                 key, value = splits[:2]
191 |                 options[key] = value
192 | 
193 |         # Check stride
194 |         assert (('s' in options and len(options['s']) == 1) or
195 |                 (len(options['s']) == 2 and options['s'][0] == options['s'][1]))
196 | 
197 |         return BlockArgs(
198 |             kernel_size=int(options['k']),
199 |             num_repeat=int(options['r']),
200 |             input_filters=int(options['i']),
201 |             output_filters=int(options['o']),
202 |             expand_ratio=int(options['e']),
203 |             id_skip=('noskip' not in block_string),
204 |             se_ratio=float(options['se']) if 'se' in options else None,
205 |             stride=[int(options['s'][0])])
206 | 
207 |     @staticmethod
208 |     def _encode_block_string(block):
209 |         """Encodes a block to a string."""
210 |         args = [
211 |             'r%d' % block.num_repeat,
212 |             'k%d' % block.kernel_size,
213 |             's%d%d' % (block.strides[0], block.strides[1]),
214 |             'e%s' % block.expand_ratio,
215 |             'i%d' % block.input_filters,
216 |             'o%d' % block.output_filters
217 |         ]
218 |         if 0 < block.se_ratio <= 1:
219 |             args.append('se%s' % block.se_ratio)
220 |         if block.id_skip is False:
221 |             args.append('noskip')
222 |         return '_'.join(args)
223 | 
224 |     @staticmethod
225 |     def decode(string_list):
226 |         """
227 |         Decodes a list of string notations to specify blocks inside the network.
228 | 
229 |         :param string_list: a list of strings, each string is a notation of block
230 |         :return: a list of BlockArgs namedtuples of block args
231 |         """
232 |         assert isinstance(string_list, list)
233 |         blocks_args = []
234 |         for block_string in string_list:
235 |             blocks_args.append(BlockDecoder._decode_block_string(block_string))
236 |         return blocks_args
237 | 
238 |     @staticmethod
239 |     def encode(blocks_args):
240 |         """
241 |         Encodes a list of BlockArgs to a list of strings.
242 | 
243 |         :param blocks_args: a list of BlockArgs namedtuples of block args
244 |         :return: a list of strings, each string is a notation of block
245 |         """
246 |         block_strings = []
247 |         for block in blocks_args:
248 |             block_strings.append(BlockDecoder._encode_block_string(block))
249 |         return block_strings
250 | 
251 | 
252 | def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2,
253 |                  drop_connect_rate=0.2, image_size=None, num_classes=1000):
254 |     """ Creates a efficientnet model. """
255 | 
256 |     blocks_args = [
257 |         'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
258 |         'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
259 |         'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
260 |         'r1_k3_s11_e6_i192_o320_se0.25',
261 |     ]
262 |     blocks_args = BlockDecoder.decode(blocks_args)
263 | 
264 |     global_params = GlobalParams(
265 |         batch_norm_momentum=0.99,
266 |         batch_norm_epsilon=1e-3,
267 |         dropout_rate=dropout_rate,
268 |         drop_connect_rate=drop_connect_rate,
269 |         # data_format='channels_last',  # removed, this is always true in PyTorch
270 |         num_classes=num_classes,
271 |         width_coefficient=width_coefficient,
272 |         depth_coefficient=depth_coefficient,
273 |         depth_divisor=8,
274 |         min_depth=None,
275 |         image_size=image_size,
276 |     )
277 | 
278 |     return blocks_args, global_params
279 | 
280 | 
281 | def get_model_params(model_name, override_params):
282 |     """ Get the block args and global params for a given model """
283 |     if model_name.startswith('efficientnet'):
284 |         w, d, s, p = efficientnet_params(model_name)
285 |         # note: all models have drop connect rate = 0.2
286 |         blocks_args, global_params = efficientnet(
287 |             width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
288 |     else:
289 |         raise NotImplementedError('model name is not pre-defined: %s' % model_name)
290 |     if override_params:
291 |         # ValueError will be raised here if override_params has fields not included in global_params.
292 |         global_params = global_params._replace(**override_params)
293 |     return blocks_args, global_params
294 | 
295 | 
296 | url_map = {
297 |     'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b0-355c32eb.pth',
298 |     'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b1-f1951068.pth',
299 |     'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b2-8bb594d6.pth',
300 |     'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth',
301 |     'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b4-6ed6700e.pth',
302 |     'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b5-b6417697.pth',
303 |     'efficientnet-b6': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b6-c76e70fd.pth',
304 |     'efficientnet-b7': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b7-dcc49843.pth',
305 | }
306 | 
307 | 
308 | def load_pretrained_weights(model, model_name, model_dir, load_fc=True, can_print=False):
309 |     """ Loads pretrained weights, and downloads if loading for the first time. """
310 |     state_dict = model_zoo.load_url(url_map[model_name], model_dir)
311 |     if load_fc:
312 |         model.load_state_dict(state_dict)
313 |     else:
314 |         state_dict.pop('_fc.weight')
315 |         state_dict.pop('_fc.bias')
316 |         res = model.load_state_dict(state_dict, strict=False)
317 |         assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
318 |     if can_print:
319 |         print('Loaded pretrained weights for {}'.format(model_name))
320 | 


--------------------------------------------------------------------------------
/src/layers/backbone/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch.utils.model_zoo as model_zoo
  4 | 
  5 | 
  6 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
  7 |            'resnet152']
  8 | 
  9 | 
 10 | model_urls = {
 11 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 12 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 13 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 14 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 15 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 16 | }
 17 | 
 18 | 
 19 | def conv3x3(in_planes, out_planes, stride=1):
 20 |     """3x3 convolution with padding"""
 21 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 22 |                      padding=1, bias=False)
 23 | 
 24 | 
 25 | class BasicBlock(nn.Module):
 26 |     expansion = 1
 27 | 
 28 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 29 |         super(BasicBlock, self).__init__()
 30 |         self.conv1 = conv3x3(inplanes, planes, stride)
 31 |         self.bn1 = nn.BatchNorm2d(planes)
 32 |         self.relu = nn.ReLU(inplace=True)
 33 |         self.conv2 = conv3x3(planes, planes)
 34 |         self.bn2 = nn.BatchNorm2d(planes)
 35 |         self.downsample = downsample
 36 |         self.stride = stride
 37 | 
 38 |     def forward(self, x):
 39 |         residual = x
 40 | 
 41 |         out = self.conv1(x)
 42 |         out = self.bn1(out)
 43 |         out = self.relu(out)
 44 | 
 45 |         out = self.conv2(out)
 46 |         out = self.bn2(out)
 47 | 
 48 |         if self.downsample is not None:
 49 |             residual = self.downsample(x)
 50 | 
 51 |         out += residual
 52 |         out = self.relu(out)
 53 | 
 54 |         return out
 55 | 
 56 | 
 57 | class Bottleneck(nn.Module):
 58 |     expansion = 4
 59 | 
 60 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 61 |         super(Bottleneck, self).__init__()
 62 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 63 |         self.bn1 = nn.BatchNorm2d(planes)
 64 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 65 |                                padding=1, bias=False)
 66 |         self.bn2 = nn.BatchNorm2d(planes)
 67 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 68 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 69 |         self.relu = nn.ReLU(inplace=True)
 70 |         self.downsample = downsample
 71 |         self.stride = stride
 72 | 
 73 |     def forward(self, x):
 74 |         residual = x
 75 | 
 76 |         out = self.conv1(x)
 77 |         out = self.bn1(out)
 78 |         out = self.relu(out)
 79 | 
 80 |         out = self.conv2(out)
 81 |         out = self.bn2(out)
 82 |         out = self.relu(out)
 83 | 
 84 |         out = self.conv3(out)
 85 |         out = self.bn3(out)
 86 | 
 87 |         if self.downsample is not None:
 88 |             residual = self.downsample(x)
 89 | 
 90 |         out += residual
 91 |         out = self.relu(out)
 92 | 
 93 |         return out
 94 | 
 95 | 
 96 | class ResNet(nn.Module):
 97 | 
 98 |     def __init__(self, block, layers, num_classes=1000):
 99 |         self.inplanes = 64
100 |         super(ResNet, self).__init__()
101 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
102 |                                bias=False)
103 |         self.bn1 = nn.BatchNorm2d(64)
104 |         self.relu = nn.ReLU(inplace=True)
105 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
106 |         self.layer1 = self._make_layer(block, 64, layers[0])
107 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
108 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
109 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
110 |         self.avgpool = nn.AvgPool2d(7, stride=1)
111 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
112 | 
113 |         for m in self.modules():
114 |             if isinstance(m, nn.Conv2d):
115 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
116 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
117 |             elif isinstance(m, nn.BatchNorm2d):
118 |                 m.weight.data.fill_(1)
119 |                 m.bias.data.zero_()
120 | 
121 |     def _make_layer(self, block, planes, blocks, stride=1):
122 |         downsample = None
123 |         if stride != 1 or self.inplanes != planes * block.expansion:
124 |             downsample = nn.Sequential(
125 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
126 |                           kernel_size=1, stride=stride, bias=False),
127 |                 nn.BatchNorm2d(planes * block.expansion),
128 |             )
129 | 
130 |         layers = []
131 |         layers.append(block(self.inplanes, planes, stride, downsample))
132 |         self.inplanes = planes * block.expansion
133 |         for i in range(1, blocks):
134 |             layers.append(block(self.inplanes, planes))
135 | 
136 |         return nn.Sequential(*layers)
137 | 
138 |     def forward(self, x):
139 |         x = self.conv1(x)
140 |         x = self.bn1(x)
141 |         x = self.relu(x)
142 |         x = self.maxpool(x)
143 | 
144 |         x = self.layer1(x)
145 |         x = self.layer2(x)
146 |         x = self.layer3(x)
147 |         x = self.layer4(x)
148 | 
149 |         x = self.avgpool(x)
150 |         x = x.view(x.size(0), -1)
151 |         x = self.fc(x)
152 | 
153 |         return x
154 | 
155 | 
156 | def resnet18(pretrained=False, **kwargs):
157 |     """Constructs a ResNet-18 model.
158 | 
159 |     Args:
160 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
161 |     """
162 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
163 |     if pretrained:
164 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
165 |     return model
166 | 
167 | 
168 | def resnet34(pretrained=False, **kwargs):
169 |     """Constructs a ResNet-34 model.
170 | 
171 |     Args:
172 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
173 |     """
174 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
175 |     if pretrained:
176 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
177 |     return model
178 | 
179 | 
180 | def resnet50(pretrained=False, **kwargs):
181 |     """Constructs a ResNet-50 model.
182 | 
183 |     Args:
184 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
185 |     """
186 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
187 |     if pretrained:
188 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
189 |     return model
190 | 
191 | 
192 | def resnet101(pretrained=False, **kwargs):
193 |     """Constructs a ResNet-101 model.
194 | 
195 |     Args:
196 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
197 |     """
198 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
199 |     if pretrained:
200 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
201 |     return model
202 | 
203 | 
204 | def resnet152(pretrained=False, **kwargs):
205 |     """Constructs a ResNet-152 model.
206 | 
207 |     Args:
208 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
209 |     """
210 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
211 |     if pretrained:
212 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
213 |     return model
214 | 
215 | 
216 | 


--------------------------------------------------------------------------------
/src/layers/loss.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0, '../..')
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | from config.config import *
 8 | 
 9 | class LabelSmoothingLoss(nn.Module):
10 |   def __init__(self, smoothing=0.1):
11 |     super(LabelSmoothingLoss, self).__init__()
12 |     self.confidence = 1.0 - smoothing
13 |     self.smoothing = smoothing
14 | 
15 |   def forward(self, logits, labels, epoch=0, **kwargs):
16 |     if self.training:
17 |       logits = logits.float()
18 |       labels = labels.float()
19 |       logprobs = torch.nn.functional.log_softmax(logits, dim=-1)
20 | 
21 |       nll_loss = -logprobs.gather(dim=-1, index=labels.unsqueeze(1).long())
22 |       nll_loss = nll_loss.squeeze(1)
23 |       smooth_loss = -logprobs.mean(dim=-1)
24 |       loss = self.confidence * nll_loss + self.smoothing * smooth_loss
25 |       loss = loss.mean()
26 |     else:
27 |       loss = F.cross_entropy(logits, labels)
28 |     return loss
29 | 
30 | class LabelSmoothingLossV1(nn.modules.Module):
31 |   def __init__(self):
32 |     super(LabelSmoothingLossV1, self).__init__()
33 |     self.classify_loss = LabelSmoothingLoss()
34 | 
35 |   def forward(self, logits, labels, epoch=0):
36 |     out_face, feature = logits
37 |     loss = self.classify_loss(out_face, labels)
38 |     return loss
39 | 
40 | if __name__ == "__main__":
41 |   loss = LabelSmoothingLossV1()
42 |   logits = Variable(torch.randn(3, NUM_CLASSES))
43 |   labels = Variable(torch.LongTensor(3).random_(NUM_CLASSES))
44 |   output = loss([logits, None, logits], labels)
45 |   print(output)
46 | 


--------------------------------------------------------------------------------
/src/layers/metric_learning.py:
--------------------------------------------------------------------------------
  1 | # from https://github.com/ronghuaiyang/arcface-pytorch/blob/master/models/metrics.py
  2 | # adacos: https://github.com/4uiiurz1/pytorch-adacos/blob/master/metrics.py
  3 | from __future__ import print_function
  4 | from __future__ import division
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | import torch.autograd
  9 | from torch.nn import Parameter
 10 | import math
 11 | 
 12 | class ArcMarginProduct(nn.Module):
 13 |   r"""Implement of large margin arc distance: :
 14 |     Args:
 15 |       in_features: size of each input sample
 16 |       out_features: size of each output sample
 17 |       s: norm of input feature
 18 |       m: margin
 19 |       cos(theta + m)
 20 |     """
 21 |   def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False, ls_eps=0.0):
 22 |     super(ArcMarginProduct, self).__init__()
 23 |     self.in_features = in_features
 24 |     self.out_features = out_features
 25 |     self.s = s
 26 |     self.m = m
 27 |     self.ls_eps = ls_eps  # label smoothing
 28 |     self.W = Parameter(torch.FloatTensor(out_features, in_features))
 29 |     self.reset_parameters()
 30 | 
 31 |     self.easy_margin = easy_margin
 32 |     self.cos_m = math.cos(m)
 33 |     self.sin_m = math.sin(m)
 34 |     self.th = math.cos(math.pi - m)
 35 |     self.mm = math.sin(math.pi - m) * m
 36 | 
 37 |   def reset_parameters(self):
 38 |     stdv = 1. / math.sqrt(self.W.size(1))
 39 |     self.W.data.uniform_(-stdv, stdv)
 40 | 
 41 |   def forward(self, input, label):
 42 |     # --------------------------- cos(theta) & phi(theta) ---------------------------
 43 |     cosine = F.linear(F.normalize(input), F.normalize(self.W))
 44 |     if label is None:
 45 |       return cosine
 46 |     sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
 47 |     phi = cosine * self.cos_m - sine * self.sin_m
 48 |     if self.easy_margin:
 49 |       phi = torch.where(cosine.float() > 0, phi, cosine.float())
 50 |     else:
 51 |       phi = torch.where(cosine.float() > self.th, phi, cosine.float() - self.mm)
 52 |     # --------------------------- convert label to one-hot ---------------------------
 53 |     # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
 54 |     one_hot = torch.zeros(cosine.size(), device=label.device)
 55 |     one_hot.scatter_(1, label.view(-1, 1).long(), 1)
 56 |     if self.ls_eps > 0:
 57 |       one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
 58 |     # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
 59 |     output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
 60 |     output *= self.s
 61 | 
 62 |     return output
 63 | 
 64 | class ArcMarginProduct2(nn.Module):
 65 | 
 66 |   def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False, ls_eps=0.0):
 67 |     super(ArcMarginProduct2, self).__init__()
 68 |     self.in_features = in_features
 69 |     self.out_features = out_features
 70 |     self.s = s
 71 |     self.m = m
 72 |     self.ls_eps = ls_eps  # label smoothing
 73 |     self.W = Parameter(torch.FloatTensor(out_features, in_features))
 74 |     nn.init.xavier_uniform_(self.W)
 75 | 
 76 |     self.easy_margin = easy_margin
 77 |     self.cos_m = math.cos(m)
 78 |     self.sin_m = math.sin(m)
 79 |     self.th = math.cos(math.pi - m)
 80 |     self.mm = math.sin(math.pi - m) * m
 81 | 
 82 |   def forward(self, input, label):
 83 |     # --------------------------- cos(theta) & phi(theta) ---------------------------
 84 |     cosine = F.linear(F.normalize(input), F.normalize(self.W))
 85 |     if label == None:
 86 |       return cosine
 87 |     sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
 88 |     phi = cosine * self.cos_m - sine * self.sin_m
 89 |     if self.easy_margin:
 90 |       phi = torch.where(cosine.float() > 0, phi, cosine.float())
 91 |     else:
 92 |       phi = torch.where(cosine.float() > self.th, phi, cosine.float() - self.mm)
 93 |     # --------------------------- convert label to one-hot ---------------------------
 94 |     # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
 95 |     one_hot = torch.zeros(cosine.size(), device=label.device)
 96 |     one_hot.scatter_(1, label.view(-1, 1).long(), 1)
 97 |     if self.ls_eps > 0:
 98 |       one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
 99 |     # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
100 |     output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
101 |     output *= self.s
102 | 
103 |     return output
104 | 


--------------------------------------------------------------------------------
/src/layers/normalization.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | # --------------------------------------
 5 | # Normalization layers
 6 | # --------------------------------------
 7 | def l2n(x, eps=1e-6):
 8 |   return x / (torch.norm(x, p=2, dim=1, keepdim=True) + eps).expand_as(x)
 9 | 
10 | class L2N(nn.Module):
11 | 
12 |   def __init__(self, eps=1e-6):
13 |     super(L2N, self).__init__()
14 |     self.eps = eps
15 | 
16 |   def forward(self, x):
17 |     return l2n(x, eps=self.eps)
18 | 
19 |   def __repr__(self):
20 |     return self.__class__.__name__ + '(' + 'eps=' + str(self.eps) + ')'
21 | 


--------------------------------------------------------------------------------
/src/layers/pooling.py:
--------------------------------------------------------------------------------
1 | import torch.nn.functional as F
2 | # --------------------------------------
3 | # Pooling layers
4 | # --------------------------------------
5 | def gem(x, p=3, eps=1e-6):
6 |     return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
7 | 


--------------------------------------------------------------------------------
/src/layers/scheduler.py:
--------------------------------------------------------------------------------
 1 | import torch.optim as optim
 2 | from layers.scheduler_base import SchedulerBase
 3 | 
 4 | class SGD(SchedulerBase):
 5 |   def __init__(self, model):
 6 |     super(SGD, self).__init__()
 7 |     self.model = model
 8 |     self._lr = 0.01
 9 |     self._optimizer = optim.SGD(model.parameters(), self._lr, momentum=0.9, weight_decay=1e-5)
10 | 
11 |   def schedule(self, epoch, epochs, **kwargs):
12 |     lr = 0.01
13 |     for param_group in self._optimizer.param_groups:
14 |       param_group['lr'] = lr
15 |     self._lr = self._optimizer.param_groups[0]['lr']
16 |     return self._optimizer, self._lr
17 | 
18 | class SGD2a(SchedulerBase):
19 |   def __init__(self, model):
20 |     super(SGD2a, self).__init__()
21 |     self.model = model
22 |     self._lr = 0.005
23 |     self._optimizer = optim.SGD(model.parameters(), self._lr, momentum=0.9, weight_decay=1e-5)
24 | 
25 |   def schedule(self, epoch, epochs, **kwargs):
26 |     lr = 0.005
27 |     for param_group in self._optimizer.param_groups:
28 |       param_group['lr'] = lr
29 |     self._lr = self._optimizer.param_groups[0]['lr']
30 |     return self._optimizer, self._lr
31 | 
32 | class SGD2c(SchedulerBase):
33 |   def __init__(self, model):
34 |     super(SGD2c, self).__init__()
35 |     self.model = model
36 |     self._lr = 0.0025
37 |     self._optimizer = optim.SGD(model.parameters(), self._lr, momentum=0.9, weight_decay=1e-5)
38 | 
39 |   def schedule(self, epoch, epochs, **kwargs):
40 |     lr = 0.0025
41 |     for param_group in self._optimizer.param_groups:
42 |       param_group['lr'] = lr
43 |     self._lr = self._optimizer.param_groups[0]['lr']
44 |     return self._optimizer, self._lr
45 | 
46 | 


--------------------------------------------------------------------------------
/src/layers/scheduler_base.py:
--------------------------------------------------------------------------------
 1 | import torch.optim as optim
 2 | import torch.nn as nn
 3 | class SchedulerBase(object):
 4 |     def __init__(self):
 5 |         self._is_load_best_weight = True
 6 |         self._is_load_best_optim = True
 7 |         self._is_adjust_lr = True
 8 |         self._lr = 0.01
 9 |         self._optimizer = None
10 | 
11 |     def schedule(self,net, epoch, epochs, **kwargs):
12 |         raise Exception('Did not implemented')
13 | 
14 |     def is_load_best_weight(self):
15 |         return self._is_load_best_weight
16 | 
17 |     def is_load_best_optim(self):
18 |         return self._is_load_best_optim
19 | 
20 | 
21 |     def reset(self):
22 |         self._is_load_best_weight = True
23 |         self._load_best_optim = True
24 | 
25 | 
26 |     def is_adjust_lr(self):
27 |         return self._is_adjust_lr
28 | 
29 |     def get_optimizer(self):
30 |         return self._optimizer


--------------------------------------------------------------------------------
/src/layers/tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import builtins
 4 | 
 5 | def open(file, mode=None, encoding=None):
 6 |   if mode == None: mode = 'r'
 7 | 
 8 |   if '/' in file:
 9 |     if 'w' or 'a' in mode:
10 |       dir = os.path.dirname(file)
11 |       if not os.path.isdir(dir):  os.makedirs(dir)
12 | 
13 |   f = builtins.open(file, mode=mode, encoding=encoding)
14 |   return f
15 | 
16 | # http://stackoverflow.com/questions/34950201/pycharm-print-end-r-statement-not-working
17 | class Logger(object):
18 |   def __init__(self):
19 |     self.terminal = sys.stdout  #stdout
20 |     self.file = None
21 | 
22 |   def open(self, file, mode=None):
23 |     if mode is None: mode ='w'
24 |     self.file = open(file, mode)
25 | 
26 |   def write(self, message, is_terminal=1, is_file=1 ):
27 |     if '\r' in message: is_file=0
28 | 
29 |     if is_terminal == 1:
30 |       self.terminal.write(message)
31 |       self.terminal.flush()
32 |       #time.sleep(1)
33 | 
34 |     if is_file == 1:
35 |       self.file.write(message)
36 |       self.file.flush()
37 | 
38 |   def flush(self):
39 |     # this flush method is needed for python 3 compatibility.
40 |     # this handles the flush command by doing nothing.
41 |     # you might want to specify some extra behavior here.
42 |     pass
43 | 


--------------------------------------------------------------------------------
/src/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bestfitting/instance_level_recognition/683f021b4e65876835f028797ec28b0d1071bb45/src/networks/__init__.py


--------------------------------------------------------------------------------
/src/networks/efficientnet_gem_fc_face.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0, '..')
 3 | 
 4 | from layers.scheduler import *
 5 | from layers.loss import *
 6 | from config.config import *
 7 | from layers.pooling import gem
 8 | from layers.backbone.efficientnet_pytorch import EfficientNet
 9 | from layers.metric_learning import *
10 | 
11 | ## net  ######################################################################
12 | class ClsClass(nn.Module):
13 | 
14 |   def __init__(self,
15 |                args,
16 |                feature_net='efficientnet_b5',
17 |                loss_module='AdaCos',
18 |                margin=0.0,
19 |                s=30.0,
20 |                ):
21 |     super().__init__()
22 |     num_classes = args.num_classes
23 | 
24 |     if feature_net == 'efficientnet_b5':
25 |       self.backbone = EfficientNet.from_pretrained('efficientnet-b5', model_dir=PRETRAINED_DIR, can_print=args.can_print)
26 |       feat_dim = 2048
27 |     elif feature_net == 'efficientnet_b6':
28 |       self.backbone = EfficientNet.from_pretrained('efficientnet-b6', model_dir=PRETRAINED_DIR, can_print=args.can_print)
29 |       feat_dim = 2304
30 |     elif feature_net == 'efficientnet_b7':
31 |       self.backbone = EfficientNet.from_pretrained('efficientnet-b7', model_dir=PRETRAINED_DIR, can_print=args.can_print)
32 |       feat_dim = 2560
33 | 
34 |     self.in_channels = args.in_channels
35 |     self.pool = gem
36 |     fc_dim = 512
37 |     self.fc = nn.Linear(feat_dim, fc_dim)
38 |     self.bn = nn.BatchNorm1d(fc_dim)
39 |     if loss_module == 'arcface':
40 |       self.face_margin_product = ArcMarginProduct(fc_dim, num_classes, s=s, m=margin)
41 |     elif loss_module == 'arcface2':
42 |       self.face_margin_product = ArcMarginProduct2(fc_dim, num_classes, s=s, m=margin)
43 |     else:
44 |       raise ValueError(loss_module)
45 | 
46 |   def extract_feature(self, x):
47 |     x = self.backbone.extract_features(x)
48 |     x = self.pool(x)
49 |     x = x.view(x.size(0), -1)
50 |     x = self.fc(x)
51 |     x = self.bn(x)
52 |     return x
53 | 
54 |   def forward(self, x, label, **kargs):
55 |     feature = self.extract_feature(x)
56 |     out_face = self.face_margin_product(feature, label)
57 |     return out_face, feature
58 | 
59 | def class_efficientnet_b5_gem_fc_arcface_1head(**kwargs):
60 |   args = kwargs['args']
61 |   model = ClsClass(args, feature_net='efficientnet_b5', loss_module='arcface', s=30, margin=0.3)
62 |   return model
63 | 
64 | def class_efficientnet_b5_gem_fc_arcface2_1head(**kwargs):
65 |   args = kwargs['args']
66 |   model = ClsClass(args, feature_net='efficientnet_b5', loss_module='arcface2', s=30, margin=0.3)
67 |   return model
68 | 
69 | def class_efficientnet_b6_gem_fc_arcface2_1head(**kwargs):
70 |   args = kwargs['args']
71 |   model = ClsClass(args, feature_net='efficientnet_b6', loss_module='arcface2', s=30, margin=0.3)
72 |   return model
73 | 
74 | def class_efficientnet_b7_gem_fc_arcface2_1head(**kwargs):
75 |   args = kwargs['args']
76 |   model = ClsClass(args, feature_net='efficientnet_b7', loss_module='arcface2', s=30, margin=0.3)
77 |   return model
78 | 


--------------------------------------------------------------------------------
/src/networks/imageclsnet.py:
--------------------------------------------------------------------------------
 1 | from argparse import Namespace
 2 | from config.config import *
 3 | from networks.efficientnet_gem_fc_face import (
 4 |   class_efficientnet_b5_gem_fc_arcface_1head,
 5 |   class_efficientnet_b5_gem_fc_arcface2_1head,
 6 |   class_efficientnet_b6_gem_fc_arcface2_1head,
 7 |   class_efficientnet_b7_gem_fc_arcface2_1head,
 8 | )
 9 | from networks.resnet_gem_fc_face import class_resnet152_gem_fc_arcface_1head
10 | 
11 | def init_network(params):
12 |   architecture = params.get('architecture', 'class_efficientnet_b7_gem_fc_arcface2_1head')
13 |   args = Namespace(**{
14 |     'num_classes': params.get('num_classes', 81313),
15 |     'in_channels': params.get('in_channels', 3),
16 |     'can_print': params.get('can_print', False),
17 |   })
18 |   net = eval(architecture)(args=args)
19 |   return net
20 | 


--------------------------------------------------------------------------------
/src/networks/resnet_gem_fc_face.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0, '..')
 3 | 
 4 | from layers.scheduler import *
 5 | from layers.loss import *
 6 | from layers.backbone.resnet import *
 7 | from config.config import *
 8 | from layers.pooling import gem
 9 | from layers.metric_learning import *
10 | from utilities.model_util import load_pretrained
11 | 
12 | model_names = {
13 |   'resnet18': 'resnet18-5c106cde.pth',
14 |   'resnet34': 'resnet34-333f7ec4.pth',
15 |   'resnet50': 'resnet50-19c8e357.pth',
16 |   'resnet101': 'resnet101-5d3b4d8f.pth',
17 |   'resnet152': 'resnet152-b121ed2d.pth',
18 | }
19 | ## net  ######################################################################
20 | class ResnetClass(nn.Module):
21 | 
22 |   def __init__(self,
23 |                args,
24 |                feature_net='resnet101',
25 |                loss_module='arcface',
26 |                s=30.0,
27 |                margin=0.3,
28 |                ):
29 |     super().__init__()
30 |     num_classes = args.num_classes
31 | 
32 |     if feature_net == 'resnet18':
33 |       self.backbone = resnet18()
34 |       self.EX = 1
35 |     elif feature_net == 'resnet34':
36 |       self.backbone = resnet34()
37 |       self.EX = 1
38 |     elif feature_net == 'resnet50':
39 |       self.backbone = resnet50()
40 |       self.EX = 4
41 |     elif feature_net == 'resnet101':
42 |       self.backbone = resnet101()
43 |       self.EX = 4
44 |     elif feature_net == 'resnet152':
45 |       self.backbone = resnet152()
46 |       self.EX = 4
47 | 
48 |     self.backbone = load_pretrained(self.backbone,
49 |                                     f'{PRETRAINED_DIR}/{model_names[feature_net]}',
50 |                                     strict=True, can_print=args.can_print)
51 |     self.in_channels = args.in_channels
52 | 
53 |     self.pool = gem
54 |     fc_dim = 512
55 |     self.fc = nn.Linear(512 * self.EX, fc_dim)
56 |     self.bn = nn.BatchNorm1d(fc_dim)
57 | 
58 |     if loss_module == 'arcface':
59 |       self.face_margin_product = ArcMarginProduct(fc_dim, num_classes, s=s, m=margin)
60 |     elif loss_module == 'arcface2':
61 |       self.face_margin_product = ArcMarginProduct2(fc_dim, num_classes, s=s, m=margin)
62 |     else:
63 |       raise ValueError(loss_module)
64 | 
65 |   def extract_feature(self, x):
66 |     x = self.backbone.conv1(x)
67 |     x = self.backbone.bn1(x)
68 |     x = self.backbone.relu(x)
69 |     x = self.backbone.maxpool(x)
70 |     e2 = self.backbone.layer1(x)
71 |     e3 = self.backbone.layer2(e2)
72 |     e4 = self.backbone.layer3(e3)
73 |     e5 = self.backbone.layer4(e4)
74 |     x = self.pool(e5)
75 |     x = x.view(x.size(0), -1)
76 |     x = self.fc(x)
77 |     x = self.bn(x)
78 |     return x
79 | 
80 |   def forward(self, x, label, **kargs):
81 |     feature = self.extract_feature(x)
82 |     out_face = self.face_margin_product(feature, label)
83 |     return out_face, feature
84 | 
85 | def class_resnet152_gem_fc_arcface_1head(**kwargs):
86 |   args = kwargs['args']
87 |   model = ResnetClass(args, feature_net='resnet152', loss_module='arcface', s=30.0, margin=0.3)
88 |   return model
89 | 


--------------------------------------------------------------------------------
/src/networks/superpointglue/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bestfitting/instance_level_recognition/683f021b4e65876835f028797ec28b0d1071bb45/src/networks/superpointglue/__init__.py


--------------------------------------------------------------------------------
/src/networks/superpointglue/matching.py:
--------------------------------------------------------------------------------
  1 | # %BANNER_BEGIN%
  2 | # ---------------------------------------------------------------------
  3 | # %COPYRIGHT_BEGIN%
  4 | #
  5 | #  Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
  6 | #
  7 | #  Unpublished Copyright (c) 2020
  8 | #  Magic Leap, Inc., All Rights Reserved.
  9 | #
 10 | # NOTICE:  All information contained herein is, and remains the property
 11 | # of COMPANY. The intellectual and technical concepts contained herein
 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign
 13 | # Patents, patents in process, and are protected by trade secret or
 14 | # copyright law.  Dissemination of this information or reproduction of
 15 | # this material is strictly forbidden unless prior written permission is
 16 | # obtained from COMPANY.  Access to the source code contained herein is
 17 | # hereby forbidden to anyone except current COMPANY employees, managers
 18 | # or contractors who have executed Confidentiality and Non-disclosure
 19 | # agreements explicitly covering such access.
 20 | #
 21 | # The copyright notice above does not evidence any actual or intended
 22 | # publication or disclosure  of  this source code, which includes
 23 | # information that is confidential and/or proprietary, and is a trade
 24 | # secret, of  COMPANY.   ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
 25 | # PUBLIC  PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE  OF THIS
 26 | # SOURCE CODE  WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
 28 | # INTERNATIONAL TREATIES.  THE RECEIPT OR POSSESSION OF  THIS SOURCE
 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
 31 | # USE, OR SELL ANYTHING THAT IT  MAY DESCRIBE, IN WHOLE OR IN PART.
 32 | #
 33 | # %COPYRIGHT_END%
 34 | # ----------------------------------------------------------------------
 35 | # %AUTHORS_BEGIN%
 36 | #
 37 | #  Originating Authors: Paul-Edouard Sarlin
 38 | #
 39 | # %AUTHORS_END%
 40 | # --------------------------------------------------------------------*/
 41 | # %BANNER_END%
 42 | 
 43 | import torch
 44 | 
 45 | from .superpoint import SuperPoint
 46 | from .superglue import SuperGlue
 47 | 
 48 | 
 49 | class Matching(torch.nn.Module):
 50 |   """ Image Matching Frontend (SuperPoint + SuperGlue) """
 51 |   def __init__(self, config={}):
 52 |     super().__init__()
 53 |     self.superpoint = SuperPoint(config.get('superpoint', {}))
 54 |     self.superglue = SuperGlue(config.get('superglue', {}))
 55 | 
 56 |   def do_superpoint(self, data):
 57 |     pred = {}
 58 | 
 59 |     # Extract SuperPoint (keypoints, scores, descriptors) if not provided
 60 |     if 'keypoints0' not in data:
 61 |       pred0 = self.superpoint({'image': data['image0']})
 62 |       pred = {**pred, **{k+'0': v for k, v in pred0.items()}}
 63 |     else:
 64 |       pred = {**pred, **{'keypoints0': data['keypoints0'], 'scores0': data['scores0'], 'descriptors0': data['descriptors0']}}
 65 |       data.pop('keypoints0')
 66 |       data.pop('scores0')
 67 |       data.pop('descriptors0')
 68 |     if 'keypoints1' not in data:
 69 |       pred1 = self.superpoint({'image': data['image1']})
 70 |       pred = {**pred, **{k+'1': v for k, v in pred1.items()}}
 71 |     else:
 72 |       pred = {**pred, **{'keypoints1': data['keypoints1'], 'scores1': data['scores1'], 'descriptors1': data['descriptors1']}}
 73 |       data.pop('keypoints1')
 74 |       data.pop('scores1')
 75 |       data.pop('descriptors1')
 76 | 
 77 |     return pred
 78 | 
 79 |   def do_superglue(self, data, pred):
 80 |     # Batch all features
 81 |     # We should either have i) one image per batch, or
 82 |     # ii) the same number of local features for all images in the batch.
 83 |     data = {**data, **pred}
 84 | 
 85 |     for k in data:
 86 |       if isinstance(data[k], (list, tuple)):
 87 |         data[k] = torch.stack(data[k])
 88 | 
 89 |     # Perform the matching
 90 |     # print(f'data : {data.keys()}')
 91 |     pred = {**pred, **self.superglue(data)}
 92 | 
 93 |     return pred
 94 | 
 95 |   def forward(self, data):
 96 |     """ Run SuperPoint (optionally) and SuperGlue
 97 |     SuperPoint is skipped if ['keypoints0', 'keypoints1'] exist in input
 98 |     Args:
 99 |       data: dictionary with minimal keys: ['image0', 'image1']
100 |     """
101 |     pred = self.do_superpoint(data)
102 |     pred = self.do_superglue(data, pred)
103 |     return pred
104 | 


--------------------------------------------------------------------------------
/src/networks/superpointglue/superglue.py:
--------------------------------------------------------------------------------
  1 | # %BANNER_BEGIN%
  2 | # ---------------------------------------------------------------------
  3 | # %COPYRIGHT_BEGIN%
  4 | #
  5 | #  Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
  6 | #
  7 | #  Unpublished Copyright (c) 2020
  8 | #  Magic Leap, Inc., All Rights Reserved.
  9 | #
 10 | # NOTICE:  All information contained herein is, and remains the property
 11 | # of COMPANY. The intellectual and technical concepts contained herein
 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign
 13 | # Patents, patents in process, and are protected by trade secret or
 14 | # copyright law.  Dissemination of this information or reproduction of
 15 | # this material is strictly forbidden unless prior written permission is
 16 | # obtained from COMPANY.  Access to the source code contained herein is
 17 | # hereby forbidden to anyone except current COMPANY employees, managers
 18 | # or contractors who have executed Confidentiality and Non-disclosure
 19 | # agreements explicitly covering such access.
 20 | #
 21 | # The copyright notice above does not evidence any actual or intended
 22 | # publication or disclosure  of  this source code, which includes
 23 | # information that is confidential and/or proprietary, and is a trade
 24 | # secret, of  COMPANY.   ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
 25 | # PUBLIC  PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE  OF THIS
 26 | # SOURCE CODE  WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
 28 | # INTERNATIONAL TREATIES.  THE RECEIPT OR POSSESSION OF  THIS SOURCE
 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
 31 | # USE, OR SELL ANYTHING THAT IT  MAY DESCRIBE, IN WHOLE OR IN PART.
 32 | #
 33 | # %COPYRIGHT_END%
 34 | # ----------------------------------------------------------------------
 35 | # %AUTHORS_BEGIN%
 36 | #
 37 | #  Originating Authors: Paul-Edouard Sarlin
 38 | #
 39 | # %AUTHORS_END%
 40 | # --------------------------------------------------------------------*/
 41 | # %BANNER_END%
 42 | 
 43 | from copy import deepcopy
 44 | from pathlib import Path
 45 | import torch
 46 | from torch import nn
 47 | 
 48 | 
 49 | def MLP(channels: list, do_bn=True):
 50 |   """ Multi-layer perceptron """
 51 |   n = len(channels)
 52 |   layers = []
 53 |   for i in range(1, n):
 54 |     layers.append(
 55 |       nn.Conv1d(channels[i - 1], channels[i], kernel_size=1, bias=True))
 56 |     if i < (n-1):
 57 |       if do_bn:
 58 |         layers.append(nn.BatchNorm1d(channels[i]))
 59 |       layers.append(nn.ReLU())
 60 |   return nn.Sequential(*layers)
 61 | 
 62 | 
 63 | def normalize_keypoints(kpts, image_shape):
 64 |   """ Normalize keypoints locations based on image image_shape"""
 65 |   _, _, height, width = image_shape
 66 |   one = kpts.new_tensor(1)
 67 |   size = torch.stack([one*width, one*height])[None]
 68 |   center = size / 2
 69 |   scaling = size.max(1, keepdim=True).values * 0.7
 70 |   return (kpts - center[:, None, :]) / scaling[:, None, :]
 71 | 
 72 | 
 73 | class KeypointEncoder(nn.Module):
 74 |   """ Joint encoding of visual appearance and location using MLPs"""
 75 |   def __init__(self, feature_dim, layers):
 76 |     super().__init__()
 77 |     self.encoder = MLP([3] + layers + [feature_dim])
 78 |     nn.init.constant_(self.encoder[-1].bias, 0.0)
 79 | 
 80 |   def forward(self, kpts, scores):
 81 |     inputs = [kpts.transpose(1, 2), scores.unsqueeze(1)]
 82 |     return self.encoder(torch.cat(inputs, dim=1))
 83 | 
 84 | 
 85 | def attention(query, key, value):
 86 |   dim = query.shape[1]
 87 |   scores = torch.einsum('bdhn,bdhm->bhnm', query, key) / dim**.5
 88 |   prob = torch.nn.functional.softmax(scores, dim=-1)
 89 |   return torch.einsum('bhnm,bdhm->bdhn', prob, value), prob
 90 | 
 91 | 
 92 | class MultiHeadedAttention(nn.Module):
 93 |   """ Multi-head attention to increase model expressivitiy """
 94 |   def __init__(self, num_heads: int, d_model: int):
 95 |     super().__init__()
 96 |     assert d_model % num_heads == 0
 97 |     self.dim = d_model // num_heads
 98 |     self.num_heads = num_heads
 99 |     self.merge = nn.Conv1d(d_model, d_model, kernel_size=1)
100 |     self.proj = nn.ModuleList([deepcopy(self.merge) for _ in range(3)])
101 | 
102 |   def forward(self, query, key, value):
103 |     batch_dim = query.size(0)
104 |     query, key, value = [l(x).view(batch_dim, self.dim, self.num_heads, -1)
105 |                for l, x in zip(self.proj, (query, key, value))]
106 |     x, _ = attention(query, key, value)
107 |     return self.merge(x.contiguous().view(batch_dim, self.dim*self.num_heads, -1))
108 | 
109 | 
110 | class AttentionalPropagation(nn.Module):
111 |   def __init__(self, feature_dim: int, num_heads: int):
112 |     super().__init__()
113 |     self.attn = MultiHeadedAttention(num_heads, feature_dim)
114 |     self.mlp = MLP([feature_dim*2, feature_dim*2, feature_dim])
115 |     nn.init.constant_(self.mlp[-1].bias, 0.0)
116 | 
117 |   def forward(self, x, source):
118 |     message = self.attn(x, source, source)
119 |     return self.mlp(torch.cat([x, message], dim=1))
120 | 
121 | 
122 | class AttentionalGNN(nn.Module):
123 |   def __init__(self, feature_dim: int, layer_names: list):
124 |     super().__init__()
125 |     self.layers = nn.ModuleList([
126 |       AttentionalPropagation(feature_dim, 4)
127 |       for _ in range(len(layer_names))])
128 |     self.names = layer_names
129 | 
130 |   def forward(self, desc0, desc1):
131 |     for layer, name in zip(self.layers, self.names):
132 |       if name == 'cross':
133 |         src0, src1 = desc1, desc0
134 |       else:  # if name == 'self':
135 |         src0, src1 = desc0, desc1
136 |       delta0, delta1 = layer(desc0, src0), layer(desc1, src1)
137 |       desc0, desc1 = (desc0 + delta0), (desc1 + delta1)
138 |     return desc0, desc1
139 | 
140 | 
141 | def log_sinkhorn_iterations(Z, log_mu, log_nu, iters: int):
142 |   """ Perform Sinkhorn Normalization in Log-space for stability"""
143 |   u, v = torch.zeros_like(log_mu), torch.zeros_like(log_nu)
144 |   for _ in range(iters):
145 |     u = log_mu - torch.logsumexp(Z + v.unsqueeze(1), dim=2)
146 |     v = log_nu - torch.logsumexp(Z + u.unsqueeze(2), dim=1)
147 |   return Z + u.unsqueeze(2) + v.unsqueeze(1)
148 | 
149 | 
150 | def log_optimal_transport(scores, alpha, iters: int):
151 |   """ Perform Differentiable Optimal Transport in Log-space for stability"""
152 |   b, m, n = scores.shape
153 |   one = scores.new_tensor(1)
154 |   ms, ns = (m*one).to(scores), (n*one).to(scores)
155 | 
156 |   bins0 = alpha.expand(b, m, 1)
157 |   bins1 = alpha.expand(b, 1, n)
158 |   alpha = alpha.expand(b, 1, 1)
159 | 
160 |   couplings = torch.cat([torch.cat([scores, bins0], -1),
161 |                torch.cat([bins1, alpha], -1)], 1)
162 | 
163 |   norm = - (ms + ns).log()
164 |   log_mu = torch.cat([norm.expand(m), ns.log()[None] + norm])
165 |   log_nu = torch.cat([norm.expand(n), ms.log()[None] + norm])
166 |   log_mu, log_nu = log_mu[None].expand(b, -1), log_nu[None].expand(b, -1)
167 | 
168 |   Z = log_sinkhorn_iterations(couplings, log_mu, log_nu, iters)
169 |   Z = Z - norm  # multiply probabilities by M+N
170 |   return Z
171 | 
172 | 
173 | def arange_like(x, dim: int):
174 |   return x.new_ones(x.shape[dim]).cumsum(0) - 1  # traceable in 1.1
175 | 
176 | 
177 | class SuperGlue(nn.Module):
178 |   """SuperGlue feature matching middle-end
179 | 
180 |   Given two sets of keypoints and locations, we determine the
181 |   correspondences by:
182 |     1. Keypoint Encoding (normalization + visual feature and location fusion)
183 |     2. Graph Neural Network with multiple self and cross-attention layers
184 |     3. Final projection layer
185 |     4. Optimal Transport Layer (a differentiable Hungarian matching algorithm)
186 |     5. Thresholding matrix based on mutual exclusivity and a match_threshold
187 | 
188 |   The correspondence ids use -1 to indicate non-matching points.
189 | 
190 |   Paul-Edouard Sarlin, Daniel DeTone, Tomasz Malisiewicz, and Andrew
191 |   Rabinovich. SuperGlue: Learning Feature Matching with Graph Neural
192 |   Networks. In CVPR, 2020. https://arxiv.org/abs/1911.11763
193 | 
194 |   """
195 |   default_config = {
196 |     'descriptor_dim': 256,
197 |     'weights': 'indoor',
198 |     'keypoint_encoder': [32, 64, 128, 256],
199 |     'GNN_layers': ['self', 'cross'] * 9,
200 |     'sinkhorn_iterations': 100,
201 |     'match_threshold': 0.2,
202 |   }
203 | 
204 |   def __init__(self, config):
205 |     super().__init__()
206 |     self.config = {**self.default_config, **config}
207 | 
208 |     self.kenc = KeypointEncoder(
209 |       self.config['descriptor_dim'], self.config['keypoint_encoder'])
210 | 
211 |     self.gnn = AttentionalGNN(
212 |       self.config['descriptor_dim'], self.config['GNN_layers'])
213 | 
214 |     self.final_proj = nn.Conv1d(
215 |       self.config['descriptor_dim'], self.config['descriptor_dim'],
216 |       kernel_size=1, bias=True)
217 | 
218 |     bin_score = torch.nn.Parameter(torch.tensor(1.))
219 |     self.register_parameter('bin_score', bin_score)
220 | 
221 |     assert self.config['weights'] in ['indoor', 'outdoor']
222 |     path = Path(self.config['model_dir']) / 'weights/superglue_{}.pth'.format(self.config['weights'])
223 |     self.load_state_dict(torch.load(str(path)))
224 |     print('Loaded SuperGlue model (\"{}\" weights)'.format(
225 |       self.config['weights']))
226 | 
227 |   def forward(self, data):
228 |     """Run SuperGlue on a pair of keypoints and descriptors"""
229 |     desc0, desc1 = data['descriptors0'], data['descriptors1']
230 |     kpts0, kpts1 = data['keypoints0'], data['keypoints1']
231 | 
232 |     if kpts0.shape[1] == 0 or kpts1.shape[1] == 0:  # no keypoints
233 |       shape0, shape1 = kpts0.shape[:-1], kpts1.shape[:-1]
234 |       return {
235 |         'matches0': kpts0.new_full(shape0, -1, dtype=torch.int),
236 |         'matches1': kpts1.new_full(shape1, -1, dtype=torch.int),
237 |         'matching_scores0': kpts0.new_zeros(shape0),
238 |         'matching_scores1': kpts1.new_zeros(shape1),
239 |       }
240 | 
241 |     # Keypoint normalization.
242 |     kpts0 = normalize_keypoints(kpts0, data['image0'].shape)
243 |     kpts1 = normalize_keypoints(kpts1, data['image1'].shape)
244 | 
245 |     # Keypoint MLP encoder.
246 |     desc0 = desc0 + self.kenc(kpts0, data['scores0'])
247 |     desc1 = desc1 + self.kenc(kpts1, data['scores1'])
248 | 
249 |     # Multi-layer Transformer network.
250 |     desc0, desc1 = self.gnn(desc0, desc1)
251 | 
252 |     # Final MLP projection.
253 |     mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1)
254 | 
255 |     # Compute matching descriptor distance.
256 |     scores = torch.einsum('bdn,bdm->bnm', mdesc0, mdesc1)
257 |     scores = scores / self.config['descriptor_dim']**.5
258 | 
259 |     # Run the optimal transport.
260 |     scores = log_optimal_transport(
261 |       scores, self.bin_score,
262 |       iters=self.config['sinkhorn_iterations'])
263 | 
264 |     # Get the matches with score above "match_threshold".
265 |     max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1)
266 |     indices0, indices1 = max0.indices, max1.indices
267 |     mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0)
268 |     mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1)
269 |     zero = scores.new_tensor(0)
270 |     mscores0 = torch.where(mutual0, max0.values.exp(), zero)
271 |     mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero)
272 |     valid0 = mutual0 & (mscores0 > self.config['match_threshold'])
273 |     valid1 = mutual1 & valid0.gather(1, indices1)
274 |     indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1))
275 |     indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1))
276 | 
277 |     return {
278 |       'matches0': indices0, # use -1 for invalid match
279 |       'matches1': indices1, # use -1 for invalid match
280 |       'matching_scores0': mscores0,
281 |       'matching_scores1': mscores1,
282 |     }
283 | 


--------------------------------------------------------------------------------
/src/networks/superpointglue/superpoint.py:
--------------------------------------------------------------------------------
  1 | # %BANNER_BEGIN%
  2 | # ---------------------------------------------------------------------
  3 | # %COPYRIGHT_BEGIN%
  4 | #
  5 | #  Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
  6 | #
  7 | #  Unpublished Copyright (c) 2020
  8 | #  Magic Leap, Inc., All Rights Reserved.
  9 | #
 10 | # NOTICE:  All information contained herein is, and remains the property
 11 | # of COMPANY. The intellectual and technical concepts contained herein
 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign
 13 | # Patents, patents in process, and are protected by trade secret or
 14 | # copyright law.  Dissemination of this information or reproduction of
 15 | # this material is strictly forbidden unless prior written permission is
 16 | # obtained from COMPANY.  Access to the source code contained herein is
 17 | # hereby forbidden to anyone except current COMPANY employees, managers
 18 | # or contractors who have executed Confidentiality and Non-disclosure
 19 | # agreements explicitly covering such access.
 20 | #
 21 | # The copyright notice above does not evidence any actual or intended
 22 | # publication or disclosure  of  this source code, which includes
 23 | # information that is confidential and/or proprietary, and is a trade
 24 | # secret, of  COMPANY.   ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
 25 | # PUBLIC  PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE  OF THIS
 26 | # SOURCE CODE  WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
 28 | # INTERNATIONAL TREATIES.  THE RECEIPT OR POSSESSION OF  THIS SOURCE
 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
 31 | # USE, OR SELL ANYTHING THAT IT  MAY DESCRIBE, IN WHOLE OR IN PART.
 32 | #
 33 | # %COPYRIGHT_END%
 34 | # ----------------------------------------------------------------------
 35 | # %AUTHORS_BEGIN%
 36 | #
 37 | #  Originating Authors: Paul-Edouard Sarlin
 38 | #
 39 | # %AUTHORS_END%
 40 | # --------------------------------------------------------------------*/
 41 | # %BANNER_END%
 42 | 
 43 | from pathlib import Path
 44 | import torch
 45 | from torch import nn
 46 | 
 47 | def simple_nms(scores, nms_radius: int):
 48 |   """ Fast Non-maximum suppression to remove nearby points """
 49 |   assert(nms_radius >= 0)
 50 | 
 51 |   def max_pool(x):
 52 |     return torch.nn.functional.max_pool2d(
 53 |       x, kernel_size=nms_radius*2+1, stride=1, padding=nms_radius)
 54 | 
 55 |   zeros = torch.zeros_like(scores)
 56 |   max_mask = scores == max_pool(scores)
 57 |   for _ in range(2):
 58 |     supp_mask = max_pool(max_mask.float()) > 0
 59 |     supp_scores = torch.where(supp_mask, zeros, scores)
 60 |     new_max_mask = supp_scores == max_pool(supp_scores)
 61 |     max_mask = max_mask | (new_max_mask & (~supp_mask))
 62 |   return torch.where(max_mask, scores, zeros)
 63 | 
 64 | 
 65 | def remove_borders(keypoints, scores, border: int, height: int, width: int):
 66 |   """ Removes keypoints too close to the border """
 67 |   mask_h = (keypoints[:, 0] >= border) & (keypoints[:, 0] < (height - border))
 68 |   mask_w = (keypoints[:, 1] >= border) & (keypoints[:, 1] < (width - border))
 69 |   mask = mask_h & mask_w
 70 |   return keypoints[mask], scores[mask]
 71 | 
 72 | 
 73 | def top_k_keypoints(keypoints, scores, k: int):
 74 |   if k >= len(keypoints):
 75 |     return keypoints, scores
 76 |   scores, indices = torch.topk(scores, k, dim=0)
 77 |   return keypoints[indices], scores
 78 | 
 79 | 
 80 | def sample_descriptors(keypoints, descriptors, s: int = 8):
 81 |   """ Interpolate descriptors at keypoint locations """
 82 |   b, c, h, w = descriptors.shape
 83 |   keypoints = keypoints - s / 2 + 0.5
 84 |   keypoints /= torch.tensor([(w*s - s/2 - 0.5), (h*s - s/2 - 0.5)]).to(keypoints)[None]
 85 |   keypoints = keypoints*2 - 1  # normalize to (-1, 1)
 86 |   args = {'align_corners': True} if int(torch.__version__[2]) > 2 else {}
 87 |   descriptors = torch.nn.functional.grid_sample(
 88 |     descriptors, keypoints.view(b, 1, -1, 2), mode='bilinear', **args)
 89 |   descriptors = torch.nn.functional.normalize(
 90 |     descriptors.reshape(b, c, -1), p=2, dim=1)
 91 |   return descriptors
 92 | 
 93 | 
 94 | class SuperPoint(nn.Module):
 95 |   """SuperPoint Convolutional Detector and Descriptor
 96 | 
 97 |   SuperPoint: Self-Supervised Interest Point Detection and
 98 |   Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew
 99 |   Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629
100 | 
101 |   """
102 |   default_config = {
103 |     'descriptor_dim': 256,
104 |     'nms_radius': 4,
105 |     'keypoint_threshold': 0.005,
106 |     'max_keypoints': -1,
107 |     'remove_borders': 4,
108 |   }
109 | 
110 |   def __init__(self, config):
111 |     super().__init__()
112 |     self.config = {**self.default_config, **config}
113 | 
114 |     self.relu = nn.ReLU(inplace=True)
115 |     self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
116 |     c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256
117 | 
118 |     self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1)
119 |     self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1)
120 |     self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
121 |     self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1)
122 |     self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1)
123 |     self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1)
124 |     self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1)
125 |     self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1)
126 | 
127 |     self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
128 |     self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0)
129 | 
130 |     self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
131 |     self.convDb = nn.Conv2d(
132 |       c5, self.config['descriptor_dim'],
133 |       kernel_size=1, stride=1, padding=0)
134 | 
135 |     path = Path(self.config['model_dir']) / 'weights/superpoint_v1.pth'
136 |     self.load_state_dict(torch.load(str(path)))
137 | 
138 |     mk = self.config['max_keypoints']
139 |     if mk == 0 or mk < -1:
140 |       raise ValueError('\"max_keypoints\" must be positive or \"-1\"')
141 | 
142 |     print('Loaded SuperPoint model')
143 | 
144 |   def forward(self, data):
145 |     """ Compute keypoints, scores, descriptors for image """
146 |     # Shared Encoder
147 |     x = self.relu(self.conv1a(data['image']))
148 |     x = self.relu(self.conv1b(x))
149 |     x = self.pool(x)
150 |     x = self.relu(self.conv2a(x))
151 |     x = self.relu(self.conv2b(x))
152 |     x = self.pool(x)
153 |     x = self.relu(self.conv3a(x))
154 |     x = self.relu(self.conv3b(x))
155 |     x = self.pool(x)
156 |     x = self.relu(self.conv4a(x))
157 |     x = self.relu(self.conv4b(x))
158 | 
159 |     # Compute the dense keypoint scores
160 |     cPa = self.relu(self.convPa(x))
161 |     scores = self.convPb(cPa)
162 |     scores = torch.nn.functional.softmax(scores, 1)[:, :-1]
163 |     b, _, h, w = scores.shape
164 |     scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8)
165 |     scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h*8, w*8)
166 |     scores = simple_nms(scores, self.config['nms_radius'])
167 | 
168 |     # Extract keypoints
169 |     keypoints = [
170 |       torch.nonzero(s > self.config['keypoint_threshold'])
171 |       for s in scores]
172 |     scores = [s[tuple(k.t())] for s, k in zip(scores, keypoints)]
173 | 
174 |     # Discard keypoints near the image borders
175 |     keypoints, scores = list(zip(*[
176 |       remove_borders(k, s, self.config['remove_borders'], h*8, w*8)
177 |       for k, s in zip(keypoints, scores)]))
178 | 
179 |     # Keep the k keypoints with highest score
180 |     if self.config['max_keypoints'] >= 0:
181 |       keypoints, scores = list(zip(*[
182 |         top_k_keypoints(k, s, self.config['max_keypoints'])
183 |         for k, s in zip(keypoints, scores)]))
184 | 
185 |     # Convert (h, w) to (x, y)
186 |     keypoints = [torch.flip(k, [1]).float() for k in keypoints]
187 | 
188 |     # Compute the dense descriptors
189 |     cDa = self.relu(self.convDa(x))
190 |     descriptors = self.convDb(cDa)
191 |     descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1)
192 | 
193 |     # Extract descriptors
194 |     descriptors = [sample_descriptors(k[None], d[None], 8)[0]
195 |              for k, d in zip(keypoints, descriptors)]
196 | 
197 |     return {
198 |       'keypoints': keypoints,
199 |       'scores': scores,
200 |       'descriptors': descriptors,
201 |     }
202 | 


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import argparse
  3 | from timeit import default_timer as timer
  4 | 
  5 | from torch.nn import DataParallel
  6 | import torch.distributed as dist
  7 | from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
  8 | from apex.parallel import DistributedDataParallel
  9 | from apex.parallel import convert_syncbn_model
 10 | 
 11 | from config.config import *
 12 | from layers.loss import *
 13 | from layers.scheduler import *
 14 | from utilities.augment_util import *
 15 | from layers.tool import Logger
 16 | from networks.imageclsnet import init_network
 17 | from dataset.landmark_dataset import RetrievalDataset, image_collate
 18 | from utilities.metric_util import generate_score_by_model
 19 | 
 20 | parser = argparse.ArgumentParser(description='PyTorch Classification')
 21 | parser.add_argument('--out_dir', type=str, help='destination where trained network should be saved')
 22 | parser.add_argument('--gpu_id', default='0', type=str, help='gpu id used for training (default: 0)')
 23 | parser.add_argument('--arch', default='class_efficientnet_b7_gem_fc_arcface2_1head', type=str,
 24 |                     help='model architecture (default: class_efficientnet_b7_gem_fc_arcface2_1head)')
 25 | parser.add_argument('--num_classes', default=81313, type=int, help='number of classes (default: 81313)')
 26 | parser.add_argument('--in_channels', default=3, type=int, help='in channels (default: 3)')
 27 | parser.add_argument('--distributed', default=1, type=int, help='distributed train (default: 1)')
 28 | parser.add_argument('--local_rank', type=int, default=0)
 29 | parser.add_argument('--loss', default='LabelSmoothingLossV1', type=str, help='loss function (default: LabelSmoothingLossV1)')
 30 | parser.add_argument('--scheduler', default='SGD', type=str, help='scheduler name (default: SGD)')
 31 | parser.add_argument('--epochs', default=7, type=int, help='number epochs to train (default: 7)')
 32 | parser.add_argument('--resume', default=None, type=str, help='name of the latest checkpoint (default: None)')
 33 | parser.add_argument('--aug_version', default='1', type=str, help='argument version (default: 1)')
 34 | parser.add_argument('--split_type', default='v2c', type=str)
 35 | parser.add_argument('--batch_size', default=7, type=int, help='train mini-batch size (default: 7)')
 36 | parser.add_argument('--workers', default=4, type=int, help='number of data loading workers (default: 4)')
 37 | parser.add_argument('--preprocessing', type=int, default=1)
 38 | parser.add_argument('--save_every_epoch', type=float, default=0.1)
 39 | parser.add_argument('--img_size', default=448, type=int, help='image size (default: 448)')
 40 | parser.add_argument('--eval_img_size', default=512, type=int, help='image size (default: 512)')
 41 | parser.add_argument('--model_file', default=None, type=str, help='fine tune with model file (default: None)')
 42 | args = parser.parse_args()
 43 | 
 44 | def main():
 45 |   args.can_print = (args.distributed and args.local_rank == 0) or (not args.distributed)
 46 | 
 47 |   log_out_dir = f'{RESULT_DIR}/logs/{args.out_dir}'
 48 |   os.makedirs(log_out_dir, exist_ok=True)
 49 |   if args.can_print:
 50 |     log = Logger()
 51 |     log.open(f'{log_out_dir}/log.train.txt', mode='a')
 52 |   else:
 53 |     log = None
 54 | 
 55 |   model_out_dir = f'{RESULT_DIR}/models/{args.out_dir}'
 56 |   if args.can_print:
 57 |     log.write(f'>> Creating directory if it does not exist:\n>> {model_out_dir}\n')
 58 |   os.makedirs(model_out_dir, exist_ok=True)
 59 | 
 60 |   # set cuda visible device
 61 |   os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
 62 |   if args.distributed:
 63 |     torch.cuda.set_device(args.local_rank)
 64 | 
 65 |   # set random seeds
 66 |   torch.manual_seed(0)
 67 |   torch.cuda.manual_seed_all(0)
 68 |   np.random.seed(0)
 69 | 
 70 |   model_params = {}
 71 |   model_params['architecture'] = args.arch
 72 |   model_params['num_classes'] = args.num_classes
 73 |   model_params['in_channels'] = args.in_channels
 74 |   model_params['can_print'] = args.can_print
 75 |   model = init_network(model_params)
 76 | 
 77 |   # move network to gpu
 78 |   if args.distributed:
 79 |     dist.init_process_group(backend='nccl', init_method='env://')
 80 |     model = convert_syncbn_model(model)
 81 |   model.cuda()
 82 |   if args.distributed:
 83 |     model = DistributedDataParallel(model, delay_allreduce=True)
 84 |   else:
 85 |     model = DataParallel(model)
 86 | 
 87 |   # define loss function (criterion)
 88 |   try:
 89 |     criterion = eval(args.loss)().cuda()
 90 |   except:
 91 |     raise RuntimeError(f'Loss {args.loss} not available!')
 92 | 
 93 |   start_epoch = 0
 94 |   best_score = 0
 95 |   best_epoch = 0
 96 | 
 97 |   # define scheduler
 98 |   try:
 99 |     scheduler = eval(args.scheduler)(model)
100 |   except:
101 |     raise RuntimeError(f'Scheduler {args.scheduler} not available!')
102 | 
103 |   # optionally resume from a checkpoint
104 |   reset_epoch = True
105 |   pretrained_file = None
106 |   if args.model_file:
107 |     reset_epoch = True
108 |     pretrained_file = args.model_file
109 |   if args.resume:
110 |     reset_epoch = False
111 |     pretrained_file = f'{model_out_dir}/{args.resume}'
112 |   if pretrained_file and os.path.isfile(pretrained_file):
113 |     # load checkpoint weights and update model and optimizer
114 |     if args.can_print:
115 |       log.write(f'>> Loading checkpoint:\n>> {pretrained_file}\n')
116 | 
117 |     checkpoint = torch.load(pretrained_file)
118 |     if not reset_epoch:
119 |       start_epoch = checkpoint['epoch']
120 |       best_epoch = checkpoint['best_epoch']
121 |       best_score = checkpoint['best_score']
122 |     model.module.load_state_dict(checkpoint['state_dict'])
123 |     if args.can_print:
124 |       if reset_epoch:
125 |         log.write(f'>>>> loaded checkpoint:\n>>>> {pretrained_file}\n')
126 |       else:
127 |         log.write(f'>>>> loaded checkpoint:\n>>>> {pretrained_file} (epoch {checkpoint["epoch"]:.2f})\n')
128 |   else:
129 |     if args.can_print:
130 |       log.write(f'>> No checkpoint found at {pretrained_file}\n')
131 | 
132 |   # Data loading code
133 |   train_transform = eval(f'train_multi_augment{args.aug_version}')
134 |   train_split_file = f'{DATA_DIR}/split/{args.split_type}/random_train_cv0.csv'
135 |   valid_split_file = f'{DATA_DIR}/split/{args.split_type}/random_valid_cv0.csv'
136 |   train_dataset = RetrievalDataset(
137 |     args,
138 |     train_split_file,
139 |     transform=train_transform,
140 |     data_type='train',
141 |   )
142 |   valid_dataset = RetrievalDataset(
143 |     args,
144 |     valid_split_file,
145 |     transform=None,
146 |     data_type='valid',
147 |   )
148 |   if args.distributed:
149 |     train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
150 |     valid_sampler = torch.utils.data.distributed.DistributedSampler(valid_dataset)
151 |   else:
152 |     train_sampler = RandomSampler(train_dataset)
153 |     valid_sampler = SequentialSampler(valid_dataset)
154 |   train_loader = DataLoader(
155 |     train_dataset,
156 |     sampler=train_sampler,
157 |     batch_size=args.batch_size,
158 |     drop_last=True,
159 |     num_workers=args.workers,
160 |     pin_memory=True,
161 |     collate_fn=image_collate,
162 |   )
163 |   valid_loader = DataLoader(
164 |     valid_dataset,
165 |     sampler=valid_sampler,
166 |     batch_size=args.batch_size,
167 |     drop_last=False,
168 |     num_workers=args.workers,
169 |     pin_memory=True,
170 |     collate_fn=image_collate,
171 |   )
172 | 
173 |   train(args, train_loader, valid_loader, model, criterion, scheduler, log, best_epoch, best_score, start_epoch, model_out_dir)
174 | 
175 | def train_generator(dataloader):
176 |   while True:
177 |     for it, (images, labels) in enumerate(dataloader, 0):
178 |       if it == len(dataloader)-1:
179 |         dataloader.dataset.on_epoch_end()
180 |       yield images, labels
181 | 
182 | def reduce_tensor(tensor: torch.Tensor):
183 |   rt = tensor.clone()
184 |   dist.all_reduce(rt, op=dist.reduce_op.SUM)
185 |   rt /= dist.get_world_size()
186 |   return rt
187 | 
188 | def train(args, train_loader, valid_loader, model, criterion, scheduler, log, best_epoch, best_score, start_epoch, model_out_dir):
189 |   if args.can_print:
190 |     log.write('** start training here! **\n')
191 |     log.write('\n')
192 |     log.write('epoch   iter    rate    |   train_loss/acc    |   valid_loss/acc/map100    |  best_epoch/score |   min (valid_min)  \n')
193 |     log.write('--------------------------------------------------------------------------------------------------------------------\n')
194 | 
195 |   model.train()
196 |   last_cpt_epoch = start_epoch
197 |   epoch = start_epoch
198 |   max_iters = args.epochs * len(train_loader)
199 |   global_step = int(start_epoch * len(train_loader))
200 |   smooth_loss = 0.0
201 |   smooth_acc = 0.0
202 |   train_num = 0
203 |   start = timer()
204 |   for it, iter_data in enumerate(train_generator(train_loader)):
205 |     if global_step >= max_iters:
206 |       break
207 | 
208 |     optimizer, rate = scheduler.schedule(epoch, args.epochs, best_epoch=best_epoch)
209 |     images, labels = iter_data
210 |     images = Variable(images.cuda())
211 |     labels = Variable(labels.cuda())
212 |     outputs = model(images, label=labels)
213 |     loss = criterion(outputs, labels, epoch=epoch)
214 |     if type(outputs) == tuple:
215 |       logits = outputs[0]
216 |     else:
217 |       logits = outputs
218 |     probs = F.softmax(logits).data
219 | 
220 |     optimizer.zero_grad()
221 |     loss.backward()
222 |     optimizer.step()
223 | 
224 |     train_acc = (probs.argmax(dim=1) == labels).float().mean()
225 |     if args.distributed:
226 |       train_loss = reduce_tensor(loss.data)
227 |       train_acc = reduce_tensor(train_acc)
228 |     else:
229 |       train_loss = loss
230 |     if smooth_loss == 0.0:
231 |       smooth_loss = train_loss.item()
232 |       smooth_acc = train_acc.item()
233 |     smooth_loss = 0.99 * smooth_loss + 0.01 * train_loss.item()
234 |     smooth_acc = 0.99 * smooth_acc + 0.01 * train_acc.item()
235 |     global_step += 1
236 |     if args.distributed:
237 |       train_num += (args.batch_size * dist.get_world_size())
238 |     else:
239 |       train_num += args.batch_size
240 |     epoch = start_epoch + train_num / len(train_loader.dataset)
241 | 
242 |     if args.can_print:
243 |       print('\r%5.2f %6d  %0.6f  |   %6.4f   %6.4f   | ... ' % (epoch, global_step, rate, smooth_loss, smooth_acc), end='', flush=True)
244 | 
245 |     if int(epoch * 1000) > 0 and int(epoch * 1000) % int(args.save_every_epoch * 1000) == 0 \
246 |       and epoch - last_cpt_epoch > args.save_every_epoch * 0.1:
247 |       need_eval = True
248 |       last_cpt_epoch = epoch
249 |     else:
250 |       need_eval = False
251 | 
252 |     model_file = f'{model_out_dir}/{epoch:.2f}.pth'
253 |     if need_eval and args.can_print:
254 |       save_model(model, model_file, best_score, best_epoch, epoch)
255 | 
256 |     if need_eval:
257 |       model.eval()
258 |       valid_start = timer()
259 |       with torch.no_grad():
260 |         valid_loss, valid_acc = validate(args, model, epoch, valid_loader, criterion)
261 |         if args.can_print:
262 |           valid_map100 = generate_score_by_model(
263 |             model, img_size=(args.eval_img_size, args.eval_img_size),
264 |             batch_size=1, preprocessing=args.preprocessing
265 |           )
266 |         else:
267 |           valid_map100 = 0
268 |       valid_end = timer()
269 |       valid_run_time = (valid_end - valid_start) / 60.
270 | 
271 |       if valid_map100 > best_score:
272 |         best_score = valid_map100
273 |         best_epoch = epoch
274 |         final_model_file = f'{model_out_dir}/final.pth'
275 |         save_model(model, final_model_file, best_score, best_epoch, epoch)
276 |         save_model(model, model_file, best_score, best_epoch, epoch)
277 | 
278 |       end = timer()
279 |       time = (end - start) / 60
280 |       start = timer()
281 | 
282 |       if args.can_print:
283 |         print('\r', end='', flush=True)
284 |         log.write(
285 |           '%5.2f %6d  %0.6f  |   %6.4f   %6.4f   |  %6.4f   %6.4f   %6.4f  |   %5.2f   %6.4f  | %3.1f min (%3.1f min) \n' % \
286 |           (epoch, global_step, rate, smooth_loss, smooth_acc, valid_loss, valid_acc, valid_map100, best_epoch, best_score, time, valid_run_time)
287 |         )
288 |       model.train()
289 | 
290 | def validate(args, model, epoch, valid_loader, criterion):
291 |   valid_num = 0
292 |   valid_loss = 0
293 |   valid_acc = 0
294 |   for it, iter_data in enumerate(valid_loader, 0):
295 |     images, labels = iter_data
296 | 
297 |     images = Variable(images.cuda())
298 |     labels = Variable(labels.cuda())
299 | 
300 |     outputs = model(images, label=labels)
301 |     loss = criterion(outputs, labels, epoch=epoch)
302 |     if type(outputs) == tuple:
303 |       logits = outputs[0]
304 |     else:
305 |       logits = outputs
306 |     probs = F.softmax(logits).data
307 |     batch_size = len(images)
308 |     valid_acc_batch = (probs.argmax(dim=1) == labels).float().mean()
309 |     if args.distributed:
310 |       valid_loss_batch = reduce_tensor(loss.data)
311 |       valid_acc_batch = reduce_tensor(valid_acc_batch)
312 |     else:
313 |       valid_loss_batch = loss
314 | 
315 |     valid_num += batch_size
316 |     valid_loss += batch_size * valid_loss_batch.item()
317 |     valid_acc += batch_size * valid_acc_batch.item()
318 | 
319 |   valid_loss = valid_loss / valid_num
320 |   valid_acc = valid_acc / valid_num
321 |   return valid_loss, valid_acc
322 | 
323 | def save_model(model, model_file, best_score, best_epoch, epoch):
324 |   if type(model) == DataParallel or type(model) == DistributedDataParallel:
325 |     state_dict = model.module.state_dict()
326 |   else:
327 |     state_dict = model.state_dict()
328 |   state_dict_to_save=dict()
329 |   for key in state_dict.keys():
330 |     state_dict_to_save[key] = state_dict[key].cpu()
331 |   torch.save({
332 |     'best_score': best_score,
333 |     'state_dict': state_dict_to_save,
334 |     'best_epoch': best_epoch,
335 |     'epoch': epoch,
336 |   }, model_file)
337 | 
338 | if __name__ == '__main__':
339 |   main()
340 | 


--------------------------------------------------------------------------------
/src/utilities/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Module to extract deep local features."""
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | # pylint: disable=unused-import
21 | # from delf.protos import aggregation_config_pb2
22 | # from delf.protos import box_pb2
23 | # from delf.protos import datum_pb2
24 | # from delf.protos import delf_config_pb2
25 | # from delf.protos import feature_pb2
26 | # from delf.python import box_io
27 | # from delf.python import datum_io
28 | # from delf.python import feature_aggregation_extractor
29 | # from delf.python import feature_aggregation_similarity
30 | # from delf.python import feature_extractor
31 | # from delf.python import feature_io
32 | # from delf.python import utils
33 | # from delf.python.examples import detector
34 | # from delf.python.examples import extractor
35 | # from delf.python import detect_to_retrieve
36 | # from delf.python import training
37 | # from delf.python.training import model
38 | # from delf.python.training import datasets
39 | # pylint: enable=unused-import
40 | 


--------------------------------------------------------------------------------
/src/utilities/augment_rand.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | from PIL import Image, ImageOps, ImageEnhance, ImageDraw
  4 | 
  5 | fast_randaugment_list = [
  6 |   ('Identity', 0, 1),
  7 |   ('AutoContrast', 0, 10),
  8 |   ('Block_fade', 0, 0.5),
  9 |   ('Brightness', 0.5, 1.5),
 10 |   ('Color', 0.0, 2.0),
 11 |   ('Contrast', 0.0, 2.0),
 12 |   ('Cutout', 0, 0.5),
 13 |   ('Rotate', -20, 20),
 14 |   ('ShearX', 0., 0.1),
 15 |   ('ShearY', 0., 0.1),
 16 |   ('TranslateX', 0., 0.25),
 17 |   ('TranslateY', 0., 0.25),
 18 | ]
 19 | 
 20 | def apply_op(image, op, severity):
 21 |   pil_img = Image.fromarray(image)
 22 |   pil_img = eval(op)(pil_img, severity)
 23 |   return np.asarray(pil_img)
 24 | 
 25 | def randaugment_base(augment_list, img, n, m, div=10, prob=1.):
 26 |   # ops = np.random.choice(augment_list, size=n)
 27 |   ops_idx = np.random.choice(len(augment_list), replace=False, size=n)
 28 |   ops = np.array(augment_list)[ops_idx]
 29 |   for op, minseverity, maxseverity in ops:
 30 |     minseverity = float(minseverity)
 31 |     maxseverity = float(maxseverity)
 32 |     severity = (float(m) / div) * float(maxseverity - minseverity) + minseverity
 33 |     if np.random.random() < prob:
 34 |       img = apply_op(img, str(op), severity)
 35 |   return img
 36 | 
 37 | def CutoutAbs(img, v):
 38 |   if v < 0:
 39 |     return img
 40 |   w, h = img.size
 41 |   x0 = np.random.uniform(w)
 42 |   y0 = np.random.uniform(h)
 43 | 
 44 |   x0 = int(max(0, x0 - v / 2.))
 45 |   y0 = int(max(0, y0 - v / 2.))
 46 |   x1 = min(w, x0 + v)
 47 |   y1 = min(h, y0 + v)
 48 | 
 49 |   xy = (x0, y0, x1, y1)
 50 |   color = (125, 123, 114)
 51 |   img = img.copy()
 52 |   ImageDraw.Draw(img).rectangle(xy, color)
 53 |   return img
 54 | 
 55 | def do_random_block_fade(image, magnitude=0.5):
 56 |   size = [0.1, magnitude]
 57 |   height, width = image.shape[:2]
 58 | 
 59 |   # get bounding box
 60 |   m = image.copy()
 61 |   cv2.rectangle(m, (0, 0), (height, width), 1, 5)
 62 |   m = image < 0.5
 63 |   if m.sum() == 0: return image
 64 | 
 65 |   m = np.where(m)
 66 |   y0, y1, x0, x1 = np.min(m[0]), np.max(m[0]), np.min(m[1]), np.max(m[1])
 67 |   w = x1 - x0
 68 |   h = y1 - y0
 69 |   if w * h < 10: return image
 70 | 
 71 |   ew, eh = np.random.uniform(*size, 2)
 72 |   ew = int(ew * w)
 73 |   eh = int(eh * h)
 74 | 
 75 |   ex = np.random.randint(0, w - ew) + x0
 76 |   ey = np.random.randint(0, h - eh) + y0
 77 | 
 78 |   image[ey:ey + eh, ex:ex + ew] *= np.random.uniform(0.1, 0.5)  # 1 #
 79 |   image = np.clip(image, 0, 1)
 80 |   return image
 81 | 
 82 | def Identity(img, _):
 83 |   return img
 84 | 
 85 | def AutoContrast(img, v):
 86 |   return ImageOps.autocontrast(img, v)
 87 | 
 88 | def Rotate(img, v):
 89 |   if np.random.random() > 0.5:
 90 |     v = -v
 91 |   return img.rotate(v)
 92 | 
 93 | def Color(img, v):
 94 |   return ImageEnhance.Color(img).enhance(v)
 95 | 
 96 | def Contrast(img, v):
 97 |   return ImageEnhance.Contrast(img).enhance(v)
 98 | 
 99 | def Brightness(img, v):
100 |   return ImageEnhance.Brightness(img).enhance(v)
101 | 
102 | def ShearX(img, v):
103 |   if np.random.random() > 0.5:
104 |     v = -v
105 |   return img.transform(img.size, Image.AFFINE, (1, v, 0, 0, 1, 0))
106 | 
107 | def ShearY(img, v):
108 |   if np.random.random() > 0.5:
109 |     v = -v
110 |   return img.transform(img.size, Image.AFFINE, (1, 0, 0, v, 1, 0))
111 | 
112 | def TranslateX(img, v):
113 |   if np.random.random() > 0.5:
114 |     v = -v
115 |   v = v * img.size[0]
116 |   return img.transform(img.size, Image.AFFINE, (1, 0, v, 0, 1, 0))
117 | 
118 | def TranslateY(img, v):
119 |   if np.random.random() > 0.5:
120 |     v = -v
121 |   v = v * img.size[1]
122 |   return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, v))
123 | 
124 | def Cutout(img, v):
125 |   if v <= 0.:
126 |     return img
127 | 
128 |   v = v * img.size[0]
129 |   return CutoutAbs(img, v)
130 | 
131 | def Block_fade(img, v):
132 |   img = np.asarray(img)
133 |   img = img / 255.
134 |   img = do_random_block_fade(img, v)
135 |   img = img * 255.
136 |   return img.astype(np.uint8)
137 | 


--------------------------------------------------------------------------------
/src/utilities/augment_util.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0, '..')
 3 | from imgaug import augmenters as iaa
 4 | 
 5 | from config.config import *
 6 | from utilities.augment_rand import *
 7 | import random
 8 | import torchvision
 9 | 
10 | def train_multi_augment1(image, **kwargs):
11 |   seq = iaa.SomeOf(1, [
12 |     iaa.Noop(),
13 |     iaa.Fliplr(p=1),
14 |   ])
15 |   image = seq.augment_images([image])[0]
16 |   return image
17 | 
18 | def train_multi_augment3(image, img_size=(600, 600), **kwargs):
19 |   if np.random.random() < 0.5:
20 |     image = random_crop_long_edge(image, img_size)
21 |   else:
22 |     image = cv2.resize(image, img_size)
23 |   return image
24 | 
25 | def train_multi_augment3b(image, img_size=(600, 600), **kwargs):
26 |   image = do_rand_aug(image, fast_randaugment_list)
27 |   image = train_multi_augment3(image, img_size=img_size)
28 |   return image
29 | 
30 | def do_rand_aug(image, randaugment_list):
31 |   if np.random.random() < 0.03:
32 |     if np.random.random() < 0.5:
33 |       image = np.rot90(image, k=1)
34 |     else:
35 |       image = np.rot90(image, k=3)
36 | 
37 |   n = 3
38 |   m = random.randint(1, 9)
39 |   if image.max() <= 1:
40 |     image = (image * 255).astype('uint8')
41 |   image = randaugment_base(randaugment_list, image, n, m, div=10, prob=1)
42 | 
43 |   seq = iaa.OneOf([
44 |     iaa.Noop(),
45 |     iaa.Fliplr(p=1),
46 |     iaa.Affine(scale=(0.75, 1.25)),
47 |   ])
48 |   image = seq.augment_images([image])[0]
49 |   return image
50 | 
51 | def random_crop_long_edge(img, img_size=(600,600)):
52 |   if img.max() <= 1:
53 |     img = (img * 255).astype('uint8')
54 |   size = (min(img.shape[:2]), min(img.shape[:2]))
55 |   i = (0 if size[0] == img.shape[0]
56 |        else np.random.randint(low=0,high=img.shape[0] - size[0]))
57 |   j = (0 if size[1] == img.shape[1]
58 |        else np.random.randint(low=0,high=img.shape[1] - size[1]))
59 |   img = Image.fromarray(img)
60 |   img = torchvision.transforms.functional.crop(img, i, j, size[1], size[0])
61 |   img = np.asarray(img)
62 |   img = cv2.resize(img, img_size)
63 |   return img
64 | 


--------------------------------------------------------------------------------
/src/utilities/common_util.py:
--------------------------------------------------------------------------------
1 | from config.config import *
2 | 
3 | osp = os.path
4 | ope = os.path.exists
5 | opj = os.path.join
6 | 


--------------------------------------------------------------------------------
/src/utilities/metric_util.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.insert(0, '..')
  3 | from pathlib import Path
  4 | from scipy.spatial import distance
  5 | import pandas as pd
  6 | import cv2
  7 | import torch
  8 | from layers.normalization import L2N
  9 | from config.config import *
 10 | from torch.autograd import Variable
 11 | from torch.utils.data import Dataset, DataLoader, SequentialSampler
 12 | from albumentations import Normalize
 13 | 
 14 | def MeanAveragePrecision(predictions, retrieval_solution, max_predictions=100, save_perimg_score=False):
 15 |   """Computes mean average precision for retrieval prediction.
 16 | 
 17 |   Args:
 18 |     predictions: Dict mapping test image ID to a list of strings corresponding
 19 |       to index image IDs.
 20 |     retrieval_solution: Dict mapping test image ID to list of ground-truth image
 21 |       IDs.
 22 |     max_predictions: Maximum number of predictions per query to take into
 23 |       account. For the Google Landmark Retrieval challenge, this should be set
 24 |       to 100.
 25 | 
 26 |   Returns:
 27 |     mean_ap: Mean average precision score (float).
 28 | 
 29 |   Raises:
 30 |     ValueError: If a test image in `predictions` is not included in
 31 |       `retrieval_solutions`.
 32 |   """
 33 |   # Compute number of test images.
 34 |   num_test_images = len(retrieval_solution.keys())
 35 | 
 36 |   # Loop over predictions for each query and compute mAP.
 37 |   mean_ap = 0.0
 38 |   score_map = {}
 39 |   for key, prediction in predictions.items():
 40 |     if key not in retrieval_solution:
 41 |       raise ValueError('Test image %s is not part of retrieval_solution' % key)
 42 | 
 43 |     # Loop over predicted images, keeping track of those which were already
 44 |     # used (duplicates are skipped).
 45 |     ap = 0.0
 46 |     already_predicted = set()
 47 |     num_expected_retrieved = min(len(retrieval_solution[key]), max_predictions)
 48 |     num_correct = 0
 49 |     for i in range(min(len(prediction), max_predictions)):
 50 |       if prediction[i] not in already_predicted:
 51 |         if prediction[i] in retrieval_solution[key]:
 52 |           num_correct += 1
 53 |           ap += num_correct / (i + 1)
 54 |         already_predicted.add(prediction[i])
 55 | 
 56 |     ap /= num_expected_retrieved
 57 |     mean_ap += ap
 58 |     score_map[key] = ap
 59 | 
 60 |   mean_ap /= num_test_images
 61 |   if save_perimg_score:
 62 |     return mean_ap, score_map
 63 |   else:
 64 |     return mean_ap
 65 | 
 66 | def generate_score_by_model(model, img_size=None, scale=None, selected_num=200, batch_size=1, preprocessing=False):
 67 |   # assert (img_size is None and scale is not None) or (img_size is not None and scale is None)
 68 |   K = 100
 69 |   QUERY_IMAGE_DIR = f'{DATA_DIR}/images/test'
 70 |   INDEX_IMAGE_DIR = f'{DATA_DIR}/images/index'
 71 |   solution_df = pd.read_csv(f'{DATA_DIR}/raw/retrieval_solution_v2.1.csv')
 72 |   private_df = solution_df[solution_df['Usage'] == 'Private']
 73 |   private_solution = {}
 74 |   private_test_img_ids = []
 75 |   private_index_img_ids = []
 76 | 
 77 |   for i, row in private_df.iterrows():
 78 |     private_solution[row['id']] = row['images'].split(' ')
 79 |     private_test_img_ids.append(row['id'])
 80 |     private_index_img_ids.extend(row['images'].split(' '))
 81 |     if len(private_test_img_ids) >= selected_num:
 82 |       break
 83 | 
 84 |   private_test_img_ids = list(set(private_test_img_ids))
 85 |   private_index_img_ids = list(set(private_index_img_ids))
 86 | 
 87 |   class _TestDataset(Dataset):
 88 |     def __init__(self, image_paths, img_size=None):
 89 |       self.image_paths = image_paths
 90 |       self.img_size = img_size
 91 | 
 92 |     def __len__(self):
 93 |       return len(self.image_paths)
 94 | 
 95 |     def __getitem__(self, idx):
 96 |       image_path = self.image_paths[idx]
 97 |       image = cv2.imread(str(image_path))
 98 |       image = image[..., ::-1]
 99 |       image = cv2.resize(image, self.img_size)
100 |       if preprocessing == 1:
101 |         norm = Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0)
102 |         image = norm(image=image)['image']
103 |       else:
104 |         image = image / 255.0
105 |       image = np.transpose(image, (2, 0, 1))
106 |       image_tensor = torch.from_numpy(image).float()
107 |       return image_tensor
108 | 
109 |   def _create_dataset(image_paths, img_size, batch_size):
110 |     dataset = _TestDataset(image_paths, img_size)
111 |     data_loader = DataLoader(
112 |       dataset,
113 |       sampler=SequentialSampler(dataset),
114 |       batch_size=batch_size,
115 |       drop_last=False,
116 |       num_workers=4,
117 |       pin_memory=True,
118 |     )
119 |     return data_loader
120 | 
121 |   def _get_embedding(image_tensor):
122 |     image_tensor = Variable(image_tensor.cuda())
123 |     embedding = model.module.extract_feature(image_tensor)
124 |     embedding = L2N()(embedding)
125 |     return embedding
126 | 
127 |   def _get_id(image_path: Path):
128 |     return int(image_path.name.split('.')[0], 16)
129 | 
130 |   def _get_embeddings(image_root_dir: str):
131 |     if image_root_dir.count('test') > 0:
132 |       image_paths = [Path(f'{image_root_dir}/{img_id}.jpg') for img_id in private_test_img_ids]
133 |     else:
134 |       image_paths = [Path(f'{image_root_dir}/{img_id}.jpg') for img_id in private_index_img_ids]
135 |     dataloader = _create_dataset(image_paths, img_size, batch_size)
136 |     embeddings = []
137 |     for image_tensor in dataloader:
138 |       embedding = _get_embedding(image_tensor)
139 |       embedding = embedding.cpu().detach().numpy()
140 |       embeddings.extend(embedding)
141 |     ids = [_get_id(image_path) for image_path in image_paths]
142 |     return ids, embeddings
143 | 
144 |   def _to_hex(image_id: int) -> str:
145 |     return '{0:0{1}x}'.format(image_id, 16)
146 | 
147 |   def _get_metrics(predictions, solution):
148 |     relevant_predictions = {}
149 | 
150 |     for key in solution.keys():
151 |       if key in predictions:
152 |         relevant_predictions[key] = predictions[key]
153 | 
154 |     # Mean average precision.
155 |     mean_average_precision = MeanAveragePrecision(relevant_predictions, solution, max_predictions=K)
156 | 
157 |     return mean_average_precision
158 | 
159 |   query_ids, query_embeddings = _get_embeddings(QUERY_IMAGE_DIR)
160 |   index_ids, index_embeddings = _get_embeddings(INDEX_IMAGE_DIR)
161 |   distances = distance.cdist(np.array(query_embeddings), np.array(index_embeddings), 'euclidean')
162 |   predicted_positions = np.argpartition(distances, K, axis=1)[:, :K]
163 | 
164 |   predictions = {}
165 |   for i, query_id in enumerate(query_ids):
166 |     nearest = [(index_ids[j], distances[i, j]) for j in predicted_positions[i]]
167 |     nearest.sort(key=lambda x: x[1])
168 |     prediction = [_to_hex(index_id) for index_id, d in nearest]
169 |     predictions[_to_hex(query_id)] = prediction
170 |   score = _get_metrics(predictions, private_solution)
171 |   return score
172 | 


--------------------------------------------------------------------------------
/src/utilities/model_util.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.insert(0, '..')
  3 | import pandas as pd
  4 | import torch
  5 | from torch.nn import DataParallel
  6 | from apex.parallel import DistributedDataParallel
  7 | 
  8 | from config.config import *
  9 | 
 10 | def load_pretrained_state_dict(net, load_state_dict, strict=False, can_print=True, extend_W=None, num_classes=-1):
 11 |   if 'epoch' in load_state_dict and can_print:
 12 |     epoch = load_state_dict['epoch']
 13 |     print(f'load epoch:{epoch:.2f}')
 14 |   if 'state_dict' in load_state_dict:
 15 |     load_state_dict = load_state_dict['state_dict']
 16 |   elif 'model_state_dict' in load_state_dict:
 17 |     load_state_dict = load_state_dict['model_state_dict']
 18 |   elif 'model' in load_state_dict:
 19 |     load_state_dict = load_state_dict['model']
 20 |   if type(net) == DataParallel or type(net) == DistributedDataParallel:
 21 |     state_dict = net.module.state_dict()
 22 |   else:
 23 |     state_dict = net.state_dict()
 24 | 
 25 |   new_load_state_dict = dict()
 26 |   for key in load_state_dict.keys():
 27 |     if key.startswith('module.'):
 28 |       dst_key = key.replace('module.', '')
 29 |     else:
 30 |       dst_key = key
 31 |     new_load_state_dict[dst_key] = load_state_dict[key]
 32 |   load_state_dict = new_load_state_dict
 33 | 
 34 |   if extend_W is not None:
 35 |     extend_key = 'face_margin_product.W'
 36 |     if extend_key not in load_state_dict:
 37 |       raise Exception(f'{extend_key} is not in load_state_dict')
 38 |     if extend_W == 'nolandmark':
 39 |       old_landmarks = np.arange(load_state_dict[extend_key].size(0))
 40 |       new_landmarks = np.concatenate([old_landmarks, [np.max(old_landmarks) + 1]])
 41 |     else:
 42 |       arr = np.load(f'{DATA_DIR}/input/landmarks_mapping_{extend_W}.npz', allow_pickle=True)
 43 |       old_landmarks = arr['old_landmarks']
 44 |       new_landmarks = arr['new_landmarks']
 45 |     if load_state_dict[extend_key].size(0) != num_classes:
 46 |       print(f'{extend_key} shape: {len(old_landmarks)} -> {len(new_landmarks)}')
 47 |       load_state_dict = extend_model_weight(load_state_dict, old_landmarks, new_landmarks, key=extend_key)
 48 | 
 49 |   for key in list(load_state_dict.keys()):
 50 |     if key not in state_dict:
 51 |       if key == 'maxpool.1.filt':
 52 |         state_dict['maxpool.0.filt'] = load_state_dict[key]
 53 |         state_dict['maxpool.2.filt'] = load_state_dict[key]
 54 |         print('weight maxpool.1.filt --> maxpool.0.filt AND maxpool.2.filt')
 55 |       elif strict:
 56 |         raise Exception(f'not in {key}')
 57 |       if can_print:
 58 |         print('not in', key)
 59 |       continue
 60 |     if load_state_dict[key].size() != state_dict[key].size():
 61 |       if strict:
 62 |         raise Exception(f'size not the same {key}')
 63 |       if ('last_linear' in key or 'attention' in key) and (load_state_dict[key].size()[1:] == state_dict[key].size()[1:]):
 64 |         min_channel = min(state_dict[key].size(0), load_state_dict[key].size(0))
 65 |         state_dict[key][:min_channel] = load_state_dict[key][:min_channel]
 66 |       elif can_print:
 67 |         print('size not the same', key)
 68 |       continue
 69 |     state_dict[key] = load_state_dict[key]
 70 |   if type(net) == DataParallel or type(net) == DistributedDataParallel:
 71 |     net.module.load_state_dict(state_dict)
 72 |   else:
 73 |     net.load_state_dict(state_dict)
 74 |   return net
 75 | 
 76 | def extend_model_weight(state_dict, old_landmarks, new_landmarks, key='face_margin_product.W'):
 77 |   old_landmarks = np.sort(old_landmarks)
 78 |   new_landmarks = np.sort(new_landmarks)
 79 | 
 80 |   W = state_dict[key]
 81 |   assert W.size(0) == len(old_landmarks)
 82 |   assert len(W.size()) == 2
 83 |   new_W = torch.zeros((len(new_landmarks), W.size(1)), dtype=W.dtype)
 84 |   new_W[:, :] = W.mean(dim=0)
 85 | 
 86 |   old_ts = pd.Series(index=old_landmarks, data=np.arange(len(old_landmarks)))
 87 |   new_ts = pd.Series(index=new_landmarks, data=np.arange(len(new_landmarks)))
 88 |   intersection_landmarks = np.sort(list(set.intersection(set(old_landmarks), set(new_landmarks))))
 89 |   new_W[new_ts[intersection_landmarks].tolist(), :] = W[old_ts[intersection_landmarks].tolist(), :]
 90 | 
 91 |   state_dict[key] = new_W
 92 |   return state_dict
 93 | 
 94 | def load_pretrained(net, pretrained_file, strict=False,can_print=True):
 95 |   if can_print:
 96 |     print(f'load pretrained file: {pretrained_file}')
 97 |   load_state_dict = torch.load(pretrained_file)
 98 |   net = load_pretrained_state_dict(net, load_state_dict, strict=strict, can_print=can_print)
 99 |   return net
100 | 


--------------------------------------------------------------------------------
/src/utilities/superpointglue_util.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | def process_resize(w, h, resize):
 6 |   assert (len(resize) > 0 and len(resize) <= 2)
 7 |   if len(resize) == 1 and resize[0] > -1:
 8 |     scale = resize[0] / max(h, w)
 9 |     w_new, h_new = int(round(w * scale)), int(round(h * scale))
10 |   elif len(resize) == 1 and resize[0] == -1:
11 |     w_new, h_new = w, h
12 |   else:  # len(resize) == 2:
13 |     w_new, h_new = resize[0], resize[1]
14 | 
15 |   # Issue warning if resolution is too small or too large.
16 |   if max(w_new, h_new) < 160:
17 |     print('Warning: input resolution is very small, results may vary')
18 |   elif max(w_new, h_new) > 2000:
19 |     print('Warning: input resolution is very large, results may vary')
20 | 
21 |   return w_new, h_new
22 | 
23 | def frame2tensor(frame, device='cuda'):
24 |   return torch.from_numpy(frame / 255.).float()[None, None].to(device)
25 | 
26 | def read_image(path, resize, rotation, resize_float, device='cuda'):
27 |   image = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)
28 |   if image is None:
29 |     return None, None, None
30 |   w, h = image.shape[1], image.shape[0]
31 |   w_new, h_new = process_resize(w, h, resize)
32 |   scales = (float(w) / float(w_new), float(h) / float(h_new))
33 | 
34 |   if resize_float:
35 |     image = cv2.resize(image.astype('float32'), (w_new, h_new))
36 |   else:
37 |     image = cv2.resize(image, (w_new, h_new)).astype('float32')
38 | 
39 |   if rotation != 0:
40 |     image = np.rot90(image, k=rotation)
41 |     if rotation % 2:
42 |       scales = scales[::-1]
43 | 
44 |   inp = frame2tensor(image, device=device)
45 |   return image, inp, scales
46 | 


--------------------------------------------------------------------------------
/src/utilities/vectors_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def ivecs_read(fname):
 4 |   a = np.fromfile(fname, dtype='int32')
 5 |   d = a[0]
 6 |   return a.reshape(-1, d + 1)[:, 1:].copy()
 7 | 
 8 | def fvecs_read(fname):
 9 |   return ivecs_read(fname).view('float32')
10 | 


--------------------------------------------------------------------------------