├── LICENSE ├── README.md ├── notebooks └── generate_submissions.ipynb ├── requirements.txt └── src ├── config ├── __init__.py ├── config.py └── en_config.py ├── data_process └── create_split.py ├── dataset ├── __init__.py └── landmark_dataset.py ├── layers ├── __init__.py ├── backbone │ ├── __init__.py │ ├── efficientnet_pytorch.py │ ├── efficientnet_pytorch_utils.py │ └── resnet.py ├── loss.py ├── metric_learning.py ├── normalization.py ├── pooling.py ├── scheduler.py ├── scheduler_base.py └── tool.py ├── networks ├── __init__.py ├── efficientnet_gem_fc_face.py ├── imageclsnet.py ├── resnet_gem_fc_face.py └── superpointglue │ ├── __init__.py │ ├── matching.py │ ├── superglue.py │ └── superpoint.py ├── train.py └── utilities ├── __init__.py ├── augment_rand.py ├── augment_util.py ├── common_util.py ├── metric_util.py ├── model_util.py ├── superpointglue_util.py └── vectors_utils.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Instance Level Recognition 2 | 3 | This repository contains the code for the 2nd place solution to the 2020 edition of the Landmark Recognition competition hosted by Google on Kaggle: 4 | 5 | https://www.kaggle.com/c/landmark-recognition-2020/leaderboard 6 | 7 | The full solution is described here 8 | 9 | https://www.kaggle.com/c/landmark-recognition-2020/discussion/188299 10 | 11 | ## Definition 12 | v2c(cleaned GLDv2), there are 1.6 million training images and 81k classes. All landmark test images belong to these classes. 13 | 14 | v2x,in GLDv2, there are 3.2 million images belong to the 81k classes in v2c. I define these 3.2m images as v2x. 15 | 16 | ## Data preparation 17 | 1.Please config your local directory in CODE_DIR/src/config/config.py 18 | 19 | 2.Download Google Landmarks Dataset v2 train,test,index from https://github.com/cvdfoundation/google-landmark ,unpack them to DATA_DIR/images 20 | 21 | 3.Move train.csv,train_clean.csv to DATA_DIR/raw (provided by Kaggle, is not included in my solution file) 22 | 23 | 4.Download superpoint superglue models from https://github.com/magicleap/SuperPointPretrainedNetwork and https://github.com/magicleap/SuperGluePretrainedNetwork 24 | 25 | 5.Create split file: 26 | ``` 27 | python create_split.py 28 | ``` 29 | 30 | ## Training retrieval models 31 | ### 1. Train EfficientNet B7 32 | 33 | 1.1 Train EfficientNet B7 v2c 448x448 model 34 | ``` 35 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b7_gem_fc_arcface2_1head --save_every_epoch 0.1 --epochs 7 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2c_sgd_ls_aug1_norm1_0907_class_efficientnet_b7_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2c --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 4 --distributed 1 --preprocessing 1 36 | ``` 37 | 38 | 1.2. Train efficientnet_b7 v2x 448x448 model 39 | ``` 40 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b7_gem_fc_arcface2_1head --save_every_epoch 0.1 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2x_sgd_ls_aug3b_norm1_0918_class_efficientnet_b7_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2x --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 4 --distributed 1 --preprocessing 1 --model_file RESULT_DIR/models/v2c_sgd_ls_aug1_norm1_0907_class_efficientnet_b7_gem_fc_arcface2_1head_i448/6.70.pth 41 | ``` 42 | 43 | ### 2. Train efficientnet_b6 44 | 45 | 2.1 Train efficientnet_b6 v2c 448x448 model 46 | ``` 47 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b6_gem_fc_arcface2_1head --save_every_epoch 0.1 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2c_sgd_ls_aug3b_norm1_0919_class_efficientnet_b6_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2c --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 6 --distributed 1 --preprocessing 1 48 | ``` 49 | 50 | 2.2 Train efficientnet_b6 v2x 448x448 model 51 | ``` 52 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b6_gem_fc_arcface2_1head --save_every_epoch 0.1 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2x_sgd_ls_aug3b_norm1_0919_class_efficientnet_b6_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2x --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 6 --distributed 1 --preprocessing 1 --model_file RESULT_DIR/models/v2c_sgd_ls_aug3b_norm1_0919_class_efficientnet_b6_gem_fc_arcface2_1head_i448/6.00.pth 53 | ``` 54 | 55 | ### 3 Train efficientnet_b5 56 | 57 | 3.1 Train efficientnet_b5 v2c 448x448 model 58 | ``` 59 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b5_gem_fc_arcface_1head --save_every_epoch 0.1 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2c_sgd_ls_aug3b_norm1_0918_class_efficientnet_b5_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2c --num_classes 81313 --gpu_id 0,1,2,3 --distributed 1 --preprocessing 1 --batch_size 8 60 | ``` 61 | 62 | 3.2 Train efficientnet_b5 v2x 448x448 model 63 | ``` 64 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_efficientnet_b5_gem_fc_arcface_1head --save_every_epoch 0.2 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2x_sgd_ls_aug3b_norm1_0918_class_efficientnet_b5_gem_fc_arcface2_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2x --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 8 --distributed 1 --preprocessing 1 --model_file RESULT_DIR/models/v2c_sgd_ls_aug3b_norm1_0918_class_efficientnet_b5_gem_fc_arcface2_1head_i448/8.20.pth 65 | ``` 66 | 67 | ### 4. Train resnet152 68 | 69 | 4.1 Train resnet152 v2c 448x448 model 70 | ``` 71 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_resnet152_gem_fc_arcface_1head --save_every_epoch 0.2 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2c_sgd_ls_aug3b_norm1_0919_class_resnet152_gem_fc_arcface_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2c --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 10 --distributed 1 --preprocessing 1 72 | ``` 73 | 74 | 4.2 Train resnet152 v2x 448x448 model 75 | ``` 76 | python -m torch.distributed.launch --nproc_per_node=4 train.py --arch class_resnet152_gem_fc_arcface_1head --save_every_epoch 0.2 --epochs 30 --img_size 448 --eval_img_size 512 --scheduler SGD --out_dir v2x_sgd_ls_aug3b_norm1_0919_class_resnet152_gem_fc_arcface_1head_i448 --loss LabelSmoothingLossV1 --aug_version 1 --split_type v2x --num_classes 81313 --gpu_id 0,1,2,3 --batch_size 10 --distributed 1 --preprocessing 1 --model_file RESULT_DIR/models/v2c_sgd_ls_aug3b_norm1_0919_class_resnet152_gem_fc_arcface_1head_i448/7.40.pth 77 | ``` 78 | 79 | ## Generate submission 80 | Generate submission is detailed in notebooks/generate_submissions.ipynb. -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm==4.15.0 2 | numpy==1.19.1 3 | pandas==0.23.1 4 | scipy==1.5.0 5 | torch==1.6.0 6 | torchvision==0.7.0 7 | mlcrate==0.2.0 8 | scikit-image==0.15.0 9 | scikit-learn==0.23.1 10 | pydegensac=0.1.2 11 | python=3.6.10 12 | faiss-cpu=1.6.3 -------------------------------------------------------------------------------- /src/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Module to extract deep local features.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | # pylint: disable=unused-import 21 | # from delf.protos import aggregation_config_pb2 22 | # from delf.protos import box_pb2 23 | # from delf.protos import datum_pb2 24 | # from delf.protos import delf_config_pb2 25 | # from delf.protos import feature_pb2 26 | # from delf.python import box_io 27 | # from delf.python import datum_io 28 | # from delf.python import feature_aggregation_extractor 29 | # from delf.python import feature_aggregation_similarity 30 | # from delf.python import feature_extractor 31 | # from delf.python import feature_io 32 | # from delf.python import utils 33 | # from delf.python.examples import detector 34 | # from delf.python.examples import extractor 35 | # from delf.python import detect_to_retrieve 36 | # from delf.python import training 37 | # from delf.python.training import model 38 | # from delf.python.training import datasets 39 | # pylint: enable=unused-import 40 | -------------------------------------------------------------------------------- /src/config/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | ope = os.path.exists 3 | opj = os.path.join 4 | import numpy as np 5 | import socket 6 | import warnings 7 | warnings.filterwarnings('ignore') 8 | 9 | sk = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 10 | hostname = socket.gethostname() 11 | 12 | RESULT_DIR = '/data4/data/retrieval2020/result' 13 | DATA_DIR = '/data5/data/landmark2020' 14 | PRETRAINED_DIR = '/data5/data/pretrained' 15 | 16 | PI = np.pi 17 | INF = np.inf 18 | EPS = 1e-12 19 | NUM_CLASSES = 81313 20 | 21 | ID = 'id' 22 | TARGET = 'landmark_id' 23 | CLUSTER = 'cluster' 24 | SCALE = 'scale' 25 | CTARGET = 'landmarks' 26 | -------------------------------------------------------------------------------- /src/config/en_config.py: -------------------------------------------------------------------------------- 1 | en_m4_b7_b6_b5_r152_i800 = [ 2 | # b7, 3 | { 4 | 'is_20191st': False, 5 | 'module': 'efficientnet_gem_fc_face', 6 | 'model_name': 'class_efficientnet_b7_gem_fc_arcface2_1head', 7 | 'out_dir': 'v2x_sgd_ls_aug3b_norm1_0918_class_efficientnet_b7_gem_fc_arcface2_1head_i736', 8 | 'predict_epoch': '26.80', 9 | 'img_size': 800, 10 | 'batch_size': 4, 11 | 'num_classes': 81313, 12 | 'in_channels': 3, 13 | 'preprocessing': True, 14 | 'weight': 0.6, 15 | }, 16 | { 17 | 'is_20191st': False, 18 | 'module': 'efficientnet_gem_fc_face', 19 | 'model_name': 'class_efficientnet_b6_gem_fc_arcface2_1head', 20 | 'out_dir': 'v2x_sgd_ls_aug3b_norm1_0919_class_efficientnet_b6_gem_fc_arcface2_1head_i736', 21 | 'predict_epoch': '21.70', 22 | 'img_size': 800, 23 | 'batch_size': 4, 24 | 'num_classes': 81313, 25 | 'in_channels': 3, 26 | 'preprocessing': True, 27 | 'weight': 0.2, 28 | }, 29 | { 30 | 'is_20191st': False, 31 | 'module': 'efficientnet_gem_fc_face', 32 | 'model_name': 'class_efficientnet_b5_gem_fc_arcface2_1head', 33 | 'out_dir': 'v2x_sgd_ls_aug3b_norm1_0918_class_efficientnet_b5_gem_fc_arcface2_1head_i736', 34 | 'predict_epoch': '19.30', 35 | 'img_size': 800, 36 | 'batch_size': 4, 37 | 'num_classes': 81313, 38 | 'in_channels': 3, 39 | 'preprocessing': True, 40 | 'weight': 0.1, 41 | }, 42 | { 43 | 'is_20191st': False, 44 | 'module': 'resnet_gem_fc_face', 45 | 'model_name': 'class_resnet152_gem_fc_arcface_1head', 46 | 'out_dir': 'v2x_sgd_ls_aug3b_norm1_0919_class_resnet152_gem_fc_arcface_1head_i736', 47 | 'predict_epoch': '17.90', 48 | 'img_size': 800, 49 | 'batch_size': 4, 50 | 'num_classes': 81313, 51 | 'in_channels': 3, 52 | 'preprocessing': True, 53 | 'weight': 0.1, 54 | }, 55 | ] -------------------------------------------------------------------------------- /src/data_process/create_split.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | import pandas as pd 4 | from tqdm import tqdm 5 | from config.config import * 6 | 7 | def create_whole_train_split(train_meta, split_name): 8 | train_meta = train_meta.copy() 9 | split_dir = f'{DATA_DIR}/split/{split_name}' 10 | os.makedirs(split_dir, exist_ok=True) 11 | 12 | print('train nums: %s' % train_meta.shape[0]) 13 | print('train label nums: %s' % train_meta[TARGET].nunique()) 14 | train_meta['count'] = train_meta.groupby([TARGET])[ID].transform('count') 15 | litter_image_df = train_meta[train_meta['count'] < 200] 16 | train_rest_meta = train_meta[~train_meta[ID].isin(litter_image_df[ID].values)].reset_index(drop=True) 17 | 18 | idx = 0 19 | valid_indices = np.random.choice(len(train_rest_meta), 200, replace=False) 20 | valid_split_df = train_rest_meta.loc[valid_indices] 21 | train_indices = ~train_meta[ID].isin(valid_split_df[ID].values) 22 | train_split_df = train_rest_meta[train_indices] 23 | train_split_df = pd.concat((train_split_df, litter_image_df), ignore_index=True) 24 | 25 | fname = f'{split_dir}/random_train_cv{idx}.csv' 26 | print("train: create split file: %s; "% (fname)) 27 | print(('nums: %d; label nums: %d; max label: %s')% 28 | (train_split_df.shape[0],train_split_df[TARGET].nunique(),train_split_df[TARGET].max())) 29 | train_split_df.to_csv(fname, index=False) 30 | print(train_split_df.head()) 31 | 32 | fname = f'{split_dir}/random_valid_cv{idx}.csv' 33 | print("valid: create split file: %s; "% (fname)) 34 | print(('nums: %d; label nums: %d; max label: %s') % 35 | (valid_split_df.shape[0],valid_split_df[TARGET].nunique(),valid_split_df[TARGET].max())) 36 | valid_split_df.to_csv(fname, index=False) 37 | print(valid_split_df.head()) 38 | 39 | def create_v2x_split(): 40 | train_clean_df = pd.read_csv(f'{DATA_DIR}/raw/train_clean.csv', usecols=[TARGET]) 41 | train_df = pd.read_csv(f'{DATA_DIR}/raw/train.csv', usecols=[ID, TARGET]) 42 | train_df = train_df[train_df[TARGET].isin(train_clean_df[TARGET].unique())] 43 | 44 | landmark_mapping = {l: i for i, l in enumerate(np.sort(train_df[TARGET].unique()))} 45 | train_df[TARGET] = train_df[TARGET].map(landmark_mapping) 46 | 47 | idx = 0 48 | train_split_df = pd.read_csv(f'{DATA_DIR}/split/v2c/random_train_cv{idx}.csv') 49 | valid_split_df = pd.read_csv(f'{DATA_DIR}/split/v2c/random_valid_cv{idx}.csv') 50 | _train_df = train_df.set_index(ID) 51 | assert np.array_equal(_train_df.loc[train_split_df[ID].values, TARGET], train_split_df[TARGET]) 52 | assert np.array_equal(_train_df.loc[valid_split_df[ID].values, TARGET], valid_split_df[TARGET]) 53 | del _train_df 54 | 55 | train_df = train_df[~train_df[ID].isin(valid_split_df[ID])] 56 | train_split_df = pd.merge(train_df, train_split_df, on=[ID, TARGET], how='left') 57 | 58 | split_dir = f'{DATA_DIR}/split/v2x' 59 | os.makedirs(split_dir, exist_ok=True) 60 | 61 | fname = f'{split_dir}/random_train_cv{idx}.csv' 62 | print("train: create split file: %s; "% (fname)) 63 | print(('nums: %d; label nums: %d') % (train_split_df.shape[0], train_split_df[TARGET].nunique())) 64 | train_split_df.to_csv(fname, index=False) 65 | print(train_split_df.head()) 66 | 67 | fname = f'{split_dir}/random_valid_cv{idx}.csv' 68 | print("valid: create split file: %s; "% (fname)) 69 | print(('nums: %d; label nums: %d') % (valid_split_df.shape[0], valid_split_df[TARGET].nunique())) 70 | valid_split_df.to_csv(fname, index=False) 71 | print(valid_split_df.head()) 72 | 73 | if __name__ == "__main__": 74 | print('%s: calling main function ... ' % os.path.basename(__file__)) 75 | train_clean_df = pd.read_csv(f'{DATA_DIR}/raw/train_clean.csv') 76 | train_clean_df['count'] = [len(row.split(' ')) for row in train_clean_df['images'].values] 77 | train_clean_df[CTARGET] = train_clean_df[TARGET] 78 | train_clean_df[TARGET] = range(len(train_clean_df)) 79 | images = [] 80 | for _, row in tqdm(train_clean_df.iterrows(), total=len(train_clean_df)): 81 | label = row[TARGET] 82 | old_label = row[CTARGET] 83 | for file_id in row['images'].split(' '): 84 | images.append((file_id, label, old_label)) 85 | 86 | dataset_df = pd.DataFrame(data=images, columns=[ID, TARGET, CTARGET]) 87 | dataset_df = dataset_df.sample(len(dataset_df), replace=False, random_state=100).reset_index(drop=True) 88 | dataset_df.to_csv(f'{DATA_DIR}/split/train2020.csv', index=False) 89 | 90 | create_whole_train_split(dataset_df, split_name='v2c') 91 | create_v2x_split() 92 | -------------------------------------------------------------------------------- /src/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Module to extract deep local features.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | # pylint: disable=unused-import 21 | # from delf.protos import aggregation_config_pb2 22 | # from delf.protos import box_pb2 23 | # from delf.protos import datum_pb2 24 | # from delf.protos import delf_config_pb2 25 | # from delf.protos import feature_pb2 26 | # from delf.python import box_io 27 | # from delf.python import datum_io 28 | # from delf.python import feature_aggregation_extractor 29 | # from delf.python import feature_aggregation_similarity 30 | # from delf.python import feature_extractor 31 | # from delf.python import feature_io 32 | # from delf.python import utils 33 | # from delf.python.examples import detector 34 | # from delf.python.examples import extractor 35 | # from delf.python import detect_to_retrieve 36 | # from delf.python import training 37 | # from delf.python.training import model 38 | # from delf.python.training import datasets 39 | # pylint: enable=unused-import 40 | -------------------------------------------------------------------------------- /src/dataset/landmark_dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | import torch 4 | from torch.utils.data import Dataset 5 | import pandas as pd 6 | from albumentations import Normalize 7 | 8 | from utilities.augment_util import * 9 | 10 | class RetrievalDataset(Dataset): 11 | def __init__(self, args, split_file, transform, data_type='train'): 12 | self.args = args 13 | self.img_size = (args.img_size, args.img_size) 14 | self.transform = transform 15 | self.is_train = data_type == 'train' 16 | 17 | df = pd.read_csv(split_file) 18 | self.df = df 19 | if data_type == 'valid': 20 | self.df = self.df[:200] 21 | 22 | img_dir = f'{DATA_DIR}/images/train' 23 | self.do_print('img_dir %s' % img_dir) 24 | self.img_dir = img_dir 25 | 26 | if self.is_train: 27 | self.df = self.df.sample(len(self.df), replace=False).reset_index(drop=True) 28 | dataset_df = self.df 29 | else: 30 | dataset_df = self.df 31 | 32 | self.dataset_df = dataset_df 33 | self.do_resample() 34 | 35 | def do_resample(self): 36 | dataset_df = self.dataset_df 37 | self.x = dataset_df[ID].values 38 | self.y = dataset_df[TARGET].values 39 | 40 | def do_print(self, content): 41 | if self.args.can_print: 42 | print(content) 43 | 44 | def __len__(self): 45 | return len(self.x) 46 | 47 | def get_batch_images(self, idx, img_id, label): 48 | x = [img_id] 49 | y = [label] 50 | return x, y 51 | 52 | def __getitem__(self, idx): 53 | img_id = self.x[idx] 54 | label = self.y[idx] 55 | 56 | x, y = self.get_batch_images(idx, img_id, label) 57 | images = [] 58 | for file_name in x: 59 | img_dir = self.img_dir 60 | boxes = None 61 | img_fname = f'{img_dir}/{file_name}.jpg' 62 | if not os.path.exists(img_fname): 63 | img_fname = f'{DATA_DIR}/images/test/{file_name}.jpg' 64 | image = cv2.imread(img_fname) 65 | image = image[..., ::-1] 66 | if self.transform is not None: 67 | image = self.transform(image, img_size=self.img_size, boxes=boxes) 68 | if image.shape[:2] != self.img_size: 69 | image = cv2.resize(image, self.img_size) 70 | 71 | if self.args.preprocessing == 1: 72 | norm = Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0) 73 | image = norm(image=image)['image'] 74 | else: 75 | image = image / 255.0 76 | image = np.transpose(image, (2, 0, 1)) 77 | image = torch.from_numpy(image).float() 78 | images.append(image) 79 | return images, y 80 | 81 | def on_epoch_end(self): 82 | if self.is_train: 83 | self.do_resample() 84 | idxes = np.random.choice(len(self.y), len(self.y), replace=False) 85 | self.x = np.array(self.x)[idxes] 86 | self.y = np.array(self.y)[idxes] 87 | 88 | def image_collate(batch): 89 | batch_size = len(batch) 90 | images = [] 91 | labels = [] 92 | for b in range(batch_size): 93 | if batch[b][0] is None: 94 | continue 95 | else: 96 | images.extend(batch[b][0]) 97 | labels.extend(batch[b][1]) 98 | images = torch.stack(images, 0) 99 | labels = torch.from_numpy(np.array(labels)) 100 | return images, labels 101 | -------------------------------------------------------------------------------- /src/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bestfitting/instance_level_recognition/683f021b4e65876835f028797ec28b0d1071bb45/src/layers/__init__.py -------------------------------------------------------------------------------- /src/layers/backbone/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bestfitting/instance_level_recognition/683f021b4e65876835f028797ec28b0d1071bb45/src/layers/backbone/__init__.py -------------------------------------------------------------------------------- /src/layers/backbone/efficientnet_pytorch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from .efficientnet_pytorch_utils import ( 6 | round_filters, 7 | round_repeats, 8 | drop_connect, 9 | get_same_padding_conv2d, 10 | get_model_params, 11 | efficientnet_params, 12 | load_pretrained_weights, 13 | Swish, 14 | MemoryEfficientSwish, 15 | ) 16 | 17 | class MBConvBlock(nn.Module): 18 | """ 19 | Mobile Inverted Residual Bottleneck Block 20 | 21 | Args: 22 | block_args (namedtuple): BlockArgs, see above 23 | global_params (namedtuple): GlobalParam, see above 24 | 25 | Attributes: 26 | has_se (bool): Whether the block contains a Squeeze and Excitation layer. 27 | """ 28 | 29 | def __init__(self, block_args, global_params): 30 | super().__init__() 31 | self._block_args = block_args 32 | self._bn_mom = 1 - global_params.batch_norm_momentum 33 | self._bn_eps = global_params.batch_norm_epsilon 34 | self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1) 35 | self.id_skip = block_args.id_skip # skip connection and drop connect 36 | 37 | # Get static or dynamic convolution depending on image size 38 | Conv2d = get_same_padding_conv2d(image_size=global_params.image_size) 39 | 40 | # Expansion phase 41 | inp = self._block_args.input_filters # number of input channels 42 | oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels 43 | if self._block_args.expand_ratio != 1: 44 | self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False) 45 | self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) 46 | 47 | # Depthwise convolution phase 48 | k = self._block_args.kernel_size 49 | s = self._block_args.stride 50 | self._depthwise_conv = Conv2d( 51 | in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise 52 | kernel_size=k, stride=s, bias=False) 53 | self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) 54 | 55 | # Squeeze and Excitation layer, if desired 56 | if self.has_se: 57 | num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio)) 58 | self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1) 59 | self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1) 60 | 61 | # Output phase 62 | final_oup = self._block_args.output_filters 63 | self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False) 64 | self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps) 65 | self._swish = MemoryEfficientSwish() 66 | 67 | def forward(self, inputs, drop_connect_rate=None): 68 | """ 69 | :param inputs: input tensor 70 | :param drop_connect_rate: drop connect rate (float, between 0 and 1) 71 | :return: output of block 72 | """ 73 | 74 | # Expansion and Depthwise Convolution 75 | x = inputs 76 | if self._block_args.expand_ratio != 1: 77 | x = self._swish(self._bn0(self._expand_conv(inputs))) 78 | x = self._swish(self._bn1(self._depthwise_conv(x))) 79 | 80 | # Squeeze and Excitation 81 | if self.has_se: 82 | x_squeezed = F.adaptive_avg_pool2d(x, 1) 83 | x_squeezed = self._se_expand(self._swish(self._se_reduce(x_squeezed))) 84 | x = torch.sigmoid(x_squeezed) * x 85 | 86 | x = self._bn2(self._project_conv(x)) 87 | 88 | # Skip connection and drop connect 89 | input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters 90 | if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters: 91 | if drop_connect_rate: 92 | x = drop_connect(x, p=drop_connect_rate, training=self.training) 93 | x = x + inputs # skip connection 94 | return x 95 | 96 | def set_swish(self, memory_efficient=True): 97 | """Sets swish function as memory efficient (for training) or standard (for export)""" 98 | self._swish = MemoryEfficientSwish() if memory_efficient else Swish() 99 | 100 | 101 | class EfficientNet(nn.Module): 102 | """ 103 | An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods 104 | 105 | Args: 106 | blocks_args (list): A list of BlockArgs to construct blocks 107 | global_params (namedtuple): A set of GlobalParams shared between blocks 108 | 109 | Example: 110 | model = EfficientNet.from_pretrained('efficientnet-b0') 111 | 112 | """ 113 | 114 | def __init__(self, blocks_args=None, global_params=None): 115 | super().__init__() 116 | assert isinstance(blocks_args, list), 'blocks_args should be a list' 117 | assert len(blocks_args) > 0, 'block args must be greater than 0' 118 | self._global_params = global_params 119 | self._blocks_args = blocks_args 120 | 121 | # Get static or dynamic convolution depending on image size 122 | Conv2d = get_same_padding_conv2d(image_size=global_params.image_size) 123 | 124 | # Batch norm parameters 125 | bn_mom = 1 - self._global_params.batch_norm_momentum 126 | bn_eps = self._global_params.batch_norm_epsilon 127 | 128 | # Stem 129 | in_channels = 3 # rgb 130 | out_channels = round_filters(32, self._global_params) # number of output channels 131 | self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) 132 | self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) 133 | 134 | # Build blocks 135 | self._blocks = nn.ModuleList([]) 136 | for block_args in self._blocks_args: 137 | 138 | # Update block input and output filters based on depth multiplier. 139 | block_args = block_args._replace( 140 | input_filters=round_filters(block_args.input_filters, self._global_params), 141 | output_filters=round_filters(block_args.output_filters, self._global_params), 142 | num_repeat=round_repeats(block_args.num_repeat, self._global_params) 143 | ) 144 | 145 | # The first block needs to take care of stride and filter size increase. 146 | self._blocks.append(MBConvBlock(block_args, self._global_params)) 147 | if block_args.num_repeat > 1: 148 | block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) 149 | for _ in range(block_args.num_repeat - 1): 150 | self._blocks.append(MBConvBlock(block_args, self._global_params)) 151 | 152 | # Head 153 | in_channels = block_args.output_filters # output of final block 154 | out_channels = round_filters(1280, self._global_params) 155 | self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False) 156 | self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) 157 | 158 | # Final linear layer 159 | self._avg_pooling = nn.AdaptiveAvgPool2d(1) 160 | self._dropout = nn.Dropout(self._global_params.dropout_rate) 161 | self._fc = nn.Linear(out_channels, self._global_params.num_classes) 162 | self._swish = MemoryEfficientSwish() 163 | 164 | def set_swish(self, memory_efficient=True): 165 | """Sets swish function as memory efficient (for training) or standard (for export)""" 166 | self._swish = MemoryEfficientSwish() if memory_efficient else Swish() 167 | for block in self._blocks: 168 | block.set_swish(memory_efficient) 169 | 170 | def extract_features(self, inputs): 171 | """ Returns output of the final convolution layer """ 172 | 173 | # Stem 174 | x = self._swish(self._bn0(self._conv_stem(inputs))) 175 | 176 | # Blocks 177 | for idx, block in enumerate(self._blocks): 178 | drop_connect_rate = self._global_params.drop_connect_rate 179 | if drop_connect_rate: 180 | drop_connect_rate *= float(idx) / len(self._blocks) 181 | x = block(x, drop_connect_rate=drop_connect_rate) 182 | 183 | # Head 184 | x = self._swish(self._bn1(self._conv_head(x))) 185 | 186 | return x 187 | 188 | def forward(self, inputs): 189 | """ Calls extract_features to extract features, applies final linear layer, and returns logits. """ 190 | bs = inputs.size(0) 191 | # Convolution layers 192 | x = self.extract_features(inputs) 193 | 194 | # Pooling and final linear layer 195 | x = self._avg_pooling(x) 196 | x = x.view(bs, -1) 197 | x = self._dropout(x) 198 | x = self._fc(x) 199 | return x 200 | 201 | @classmethod 202 | def from_name(cls, model_name, override_params=None): 203 | cls._check_model_name_is_valid(model_name) 204 | blocks_args, global_params = get_model_params(model_name, override_params) 205 | return cls(blocks_args, global_params) 206 | 207 | @classmethod 208 | def from_pretrained(cls, model_name, model_dir, num_classes=1000, in_channels=3, can_print=False): 209 | model = cls.from_name(model_name, override_params={'num_classes': num_classes}) 210 | load_pretrained_weights(model, model_name, model_dir, load_fc=(num_classes == 1000), can_print=can_print) 211 | if in_channels != 3: 212 | Conv2d = get_same_padding_conv2d(image_size=model._global_params.image_size) 213 | out_channels = round_filters(32, model._global_params) 214 | model._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) 215 | return model 216 | 217 | @classmethod 218 | def from_pretrained(cls, model_name, model_dir, num_classes=1000, can_print=False): 219 | model = cls.from_name(model_name, override_params={'num_classes': num_classes}) 220 | load_pretrained_weights(model, model_name, model_dir, load_fc=(num_classes == 1000), can_print=can_print) 221 | 222 | return model 223 | 224 | @classmethod 225 | def get_image_size(cls, model_name): 226 | cls._check_model_name_is_valid(model_name) 227 | _, _, res, _ = efficientnet_params(model_name) 228 | return res 229 | 230 | @classmethod 231 | def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False): 232 | """ Validates model name. None that pretrained weights are only available for 233 | the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """ 234 | num_models = 4 if also_need_pretrained_weights else 8 235 | valid_models = ['efficientnet-b' + str(i) for i in range(num_models)] 236 | if model_name not in valid_models: 237 | raise ValueError('model_name should be one of: ' + ', '.join(valid_models)) 238 | -------------------------------------------------------------------------------- /src/layers/backbone/efficientnet_pytorch_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains helper functions for building the model and for loading model parameters. 3 | These helper functions are built to mirror those in the official TensorFlow implementation. 4 | """ 5 | 6 | import re 7 | import math 8 | import collections 9 | from functools import partial 10 | import torch 11 | from torch import nn 12 | from torch.nn import functional as F 13 | from torch.utils import model_zoo 14 | 15 | ######################################################################## 16 | ############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ############### 17 | ######################################################################## 18 | 19 | 20 | # Parameters for the entire model (stem, all blocks, and head) 21 | GlobalParams = collections.namedtuple('GlobalParams', [ 22 | 'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate', 23 | 'num_classes', 'width_coefficient', 'depth_coefficient', 24 | 'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size']) 25 | 26 | # Parameters for an individual model block 27 | BlockArgs = collections.namedtuple('BlockArgs', [ 28 | 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', 29 | 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) 30 | 31 | # Change namedtuple defaults 32 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) 33 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) 34 | 35 | 36 | class SwishImplementation(torch.autograd.Function): 37 | @staticmethod 38 | def forward(ctx, i): 39 | result = i * torch.sigmoid(i) 40 | ctx.save_for_backward(i) 41 | return result 42 | 43 | @staticmethod 44 | def backward(ctx, grad_output): 45 | i = ctx.saved_variables[0] 46 | sigmoid_i = torch.sigmoid(i) 47 | return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) 48 | 49 | 50 | class MemoryEfficientSwish(nn.Module): 51 | def forward(self, x): 52 | return SwishImplementation.apply(x) 53 | 54 | class Swish(nn.Module): 55 | def forward(self, x): 56 | return x * torch.sigmoid(x) 57 | 58 | 59 | def round_filters(filters, global_params): 60 | """ Calculate and round number of filters based on depth multiplier. """ 61 | multiplier = global_params.width_coefficient 62 | if not multiplier: 63 | return filters 64 | divisor = global_params.depth_divisor 65 | min_depth = global_params.min_depth 66 | filters *= multiplier 67 | min_depth = min_depth or divisor 68 | new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) 69 | if new_filters < 0.9 * filters: # prevent rounding by more than 10% 70 | new_filters += divisor 71 | return int(new_filters) 72 | 73 | 74 | def round_repeats(repeats, global_params): 75 | """ Round number of filters based on depth multiplier. """ 76 | multiplier = global_params.depth_coefficient 77 | if not multiplier: 78 | return repeats 79 | return int(math.ceil(multiplier * repeats)) 80 | 81 | 82 | def drop_connect(inputs, p, training): 83 | """ Drop connect. """ 84 | if not training: return inputs 85 | batch_size = inputs.shape[0] 86 | keep_prob = 1 - p 87 | random_tensor = keep_prob 88 | random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device) 89 | binary_tensor = torch.floor(random_tensor) 90 | output = inputs / keep_prob * binary_tensor 91 | return output 92 | 93 | 94 | def get_same_padding_conv2d(image_size=None): 95 | """ Chooses static padding if you have specified an image size, and dynamic padding otherwise. 96 | Static padding is necessary for ONNX exporting of models. """ 97 | if image_size is None: 98 | return Conv2dDynamicSamePadding 99 | else: 100 | return partial(Conv2dStaticSamePadding, image_size=image_size) 101 | 102 | 103 | class Conv2dDynamicSamePadding(nn.Conv2d): 104 | """ 2D Convolutions like TensorFlow, for a dynamic image size """ 105 | 106 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True): 107 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) 108 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 109 | 110 | def forward(self, x): 111 | ih, iw = x.size()[-2:] 112 | kh, kw = self.weight.size()[-2:] 113 | sh, sw = self.stride 114 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 115 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 116 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 117 | if pad_h > 0 or pad_w > 0: 118 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) 119 | return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 120 | 121 | 122 | class Conv2dStaticSamePadding(nn.Conv2d): 123 | """ 2D Convolutions like TensorFlow, for a fixed image size""" 124 | 125 | def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs): 126 | super().__init__(in_channels, out_channels, kernel_size, **kwargs) 127 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 128 | 129 | # Calculate padding based on image size and save it 130 | assert image_size is not None 131 | ih, iw = image_size if type(image_size) == list else [image_size, image_size] 132 | kh, kw = self.weight.size()[-2:] 133 | sh, sw = self.stride 134 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 135 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 136 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 137 | if pad_h > 0 or pad_w > 0: 138 | self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)) 139 | else: 140 | self.static_padding = Identity() 141 | 142 | def forward(self, x): 143 | x = self.static_padding(x) 144 | x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 145 | return x 146 | 147 | 148 | class Identity(nn.Module): 149 | def __init__(self, ): 150 | super(Identity, self).__init__() 151 | 152 | def forward(self, input): 153 | return input 154 | 155 | 156 | ######################################################################## 157 | ############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ############## 158 | ######################################################################## 159 | 160 | 161 | def efficientnet_params(model_name): 162 | """ Map EfficientNet model name to parameter coefficients. """ 163 | params_dict = { 164 | # Coefficients: width,depth,res,dropout 165 | 'efficientnet-b0': (1.0, 1.0, 224, 0.2), 166 | 'efficientnet-b1': (1.0, 1.1, 240, 0.2), 167 | 'efficientnet-b2': (1.1, 1.2, 260, 0.3), 168 | 'efficientnet-b3': (1.2, 1.4, 300, 0.3), 169 | 'efficientnet-b4': (1.4, 1.8, 380, 0.4), 170 | 'efficientnet-b5': (1.6, 2.2, 456, 0.4), 171 | 'efficientnet-b6': (1.8, 2.6, 528, 0.5), 172 | 'efficientnet-b7': (2.0, 3.1, 600, 0.5), 173 | } 174 | return params_dict[model_name] 175 | 176 | 177 | class BlockDecoder(object): 178 | """ Block Decoder for readability, straight from the official TensorFlow repository """ 179 | 180 | @staticmethod 181 | def _decode_block_string(block_string): 182 | """ Gets a block through a string notation of arguments. """ 183 | assert isinstance(block_string, str) 184 | 185 | ops = block_string.split('_') 186 | options = {} 187 | for op in ops: 188 | splits = re.split(r'(\d.*)', op) 189 | if len(splits) >= 2: 190 | key, value = splits[:2] 191 | options[key] = value 192 | 193 | # Check stride 194 | assert (('s' in options and len(options['s']) == 1) or 195 | (len(options['s']) == 2 and options['s'][0] == options['s'][1])) 196 | 197 | return BlockArgs( 198 | kernel_size=int(options['k']), 199 | num_repeat=int(options['r']), 200 | input_filters=int(options['i']), 201 | output_filters=int(options['o']), 202 | expand_ratio=int(options['e']), 203 | id_skip=('noskip' not in block_string), 204 | se_ratio=float(options['se']) if 'se' in options else None, 205 | stride=[int(options['s'][0])]) 206 | 207 | @staticmethod 208 | def _encode_block_string(block): 209 | """Encodes a block to a string.""" 210 | args = [ 211 | 'r%d' % block.num_repeat, 212 | 'k%d' % block.kernel_size, 213 | 's%d%d' % (block.strides[0], block.strides[1]), 214 | 'e%s' % block.expand_ratio, 215 | 'i%d' % block.input_filters, 216 | 'o%d' % block.output_filters 217 | ] 218 | if 0 < block.se_ratio <= 1: 219 | args.append('se%s' % block.se_ratio) 220 | if block.id_skip is False: 221 | args.append('noskip') 222 | return '_'.join(args) 223 | 224 | @staticmethod 225 | def decode(string_list): 226 | """ 227 | Decodes a list of string notations to specify blocks inside the network. 228 | 229 | :param string_list: a list of strings, each string is a notation of block 230 | :return: a list of BlockArgs namedtuples of block args 231 | """ 232 | assert isinstance(string_list, list) 233 | blocks_args = [] 234 | for block_string in string_list: 235 | blocks_args.append(BlockDecoder._decode_block_string(block_string)) 236 | return blocks_args 237 | 238 | @staticmethod 239 | def encode(blocks_args): 240 | """ 241 | Encodes a list of BlockArgs to a list of strings. 242 | 243 | :param blocks_args: a list of BlockArgs namedtuples of block args 244 | :return: a list of strings, each string is a notation of block 245 | """ 246 | block_strings = [] 247 | for block in blocks_args: 248 | block_strings.append(BlockDecoder._encode_block_string(block)) 249 | return block_strings 250 | 251 | 252 | def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2, 253 | drop_connect_rate=0.2, image_size=None, num_classes=1000): 254 | """ Creates a efficientnet model. """ 255 | 256 | blocks_args = [ 257 | 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25', 258 | 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25', 259 | 'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25', 260 | 'r1_k3_s11_e6_i192_o320_se0.25', 261 | ] 262 | blocks_args = BlockDecoder.decode(blocks_args) 263 | 264 | global_params = GlobalParams( 265 | batch_norm_momentum=0.99, 266 | batch_norm_epsilon=1e-3, 267 | dropout_rate=dropout_rate, 268 | drop_connect_rate=drop_connect_rate, 269 | # data_format='channels_last', # removed, this is always true in PyTorch 270 | num_classes=num_classes, 271 | width_coefficient=width_coefficient, 272 | depth_coefficient=depth_coefficient, 273 | depth_divisor=8, 274 | min_depth=None, 275 | image_size=image_size, 276 | ) 277 | 278 | return blocks_args, global_params 279 | 280 | 281 | def get_model_params(model_name, override_params): 282 | """ Get the block args and global params for a given model """ 283 | if model_name.startswith('efficientnet'): 284 | w, d, s, p = efficientnet_params(model_name) 285 | # note: all models have drop connect rate = 0.2 286 | blocks_args, global_params = efficientnet( 287 | width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s) 288 | else: 289 | raise NotImplementedError('model name is not pre-defined: %s' % model_name) 290 | if override_params: 291 | # ValueError will be raised here if override_params has fields not included in global_params. 292 | global_params = global_params._replace(**override_params) 293 | return blocks_args, global_params 294 | 295 | 296 | url_map = { 297 | 'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b0-355c32eb.pth', 298 | 'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b1-f1951068.pth', 299 | 'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b2-8bb594d6.pth', 300 | 'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth', 301 | 'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b4-6ed6700e.pth', 302 | 'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b5-b6417697.pth', 303 | 'efficientnet-b6': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b6-c76e70fd.pth', 304 | 'efficientnet-b7': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b7-dcc49843.pth', 305 | } 306 | 307 | 308 | def load_pretrained_weights(model, model_name, model_dir, load_fc=True, can_print=False): 309 | """ Loads pretrained weights, and downloads if loading for the first time. """ 310 | state_dict = model_zoo.load_url(url_map[model_name], model_dir) 311 | if load_fc: 312 | model.load_state_dict(state_dict) 313 | else: 314 | state_dict.pop('_fc.weight') 315 | state_dict.pop('_fc.bias') 316 | res = model.load_state_dict(state_dict, strict=False) 317 | assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights' 318 | if can_print: 319 | print('Loaded pretrained weights for {}'.format(model_name)) 320 | -------------------------------------------------------------------------------- /src/layers/backbone/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | 5 | 6 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 7 | 'resnet152'] 8 | 9 | 10 | model_urls = { 11 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 12 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 13 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 14 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 15 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 16 | } 17 | 18 | 19 | def conv3x3(in_planes, out_planes, stride=1): 20 | """3x3 convolution with padding""" 21 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 22 | padding=1, bias=False) 23 | 24 | 25 | class BasicBlock(nn.Module): 26 | expansion = 1 27 | 28 | def __init__(self, inplanes, planes, stride=1, downsample=None): 29 | super(BasicBlock, self).__init__() 30 | self.conv1 = conv3x3(inplanes, planes, stride) 31 | self.bn1 = nn.BatchNorm2d(planes) 32 | self.relu = nn.ReLU(inplace=True) 33 | self.conv2 = conv3x3(planes, planes) 34 | self.bn2 = nn.BatchNorm2d(planes) 35 | self.downsample = downsample 36 | self.stride = stride 37 | 38 | def forward(self, x): 39 | residual = x 40 | 41 | out = self.conv1(x) 42 | out = self.bn1(out) 43 | out = self.relu(out) 44 | 45 | out = self.conv2(out) 46 | out = self.bn2(out) 47 | 48 | if self.downsample is not None: 49 | residual = self.downsample(x) 50 | 51 | out += residual 52 | out = self.relu(out) 53 | 54 | return out 55 | 56 | 57 | class Bottleneck(nn.Module): 58 | expansion = 4 59 | 60 | def __init__(self, inplanes, planes, stride=1, downsample=None): 61 | super(Bottleneck, self).__init__() 62 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 63 | self.bn1 = nn.BatchNorm2d(planes) 64 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 65 | padding=1, bias=False) 66 | self.bn2 = nn.BatchNorm2d(planes) 67 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 68 | self.bn3 = nn.BatchNorm2d(planes * 4) 69 | self.relu = nn.ReLU(inplace=True) 70 | self.downsample = downsample 71 | self.stride = stride 72 | 73 | def forward(self, x): 74 | residual = x 75 | 76 | out = self.conv1(x) 77 | out = self.bn1(out) 78 | out = self.relu(out) 79 | 80 | out = self.conv2(out) 81 | out = self.bn2(out) 82 | out = self.relu(out) 83 | 84 | out = self.conv3(out) 85 | out = self.bn3(out) 86 | 87 | if self.downsample is not None: 88 | residual = self.downsample(x) 89 | 90 | out += residual 91 | out = self.relu(out) 92 | 93 | return out 94 | 95 | 96 | class ResNet(nn.Module): 97 | 98 | def __init__(self, block, layers, num_classes=1000): 99 | self.inplanes = 64 100 | super(ResNet, self).__init__() 101 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 102 | bias=False) 103 | self.bn1 = nn.BatchNorm2d(64) 104 | self.relu = nn.ReLU(inplace=True) 105 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 106 | self.layer1 = self._make_layer(block, 64, layers[0]) 107 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 108 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 109 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 110 | self.avgpool = nn.AvgPool2d(7, stride=1) 111 | self.fc = nn.Linear(512 * block.expansion, num_classes) 112 | 113 | for m in self.modules(): 114 | if isinstance(m, nn.Conv2d): 115 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 116 | m.weight.data.normal_(0, math.sqrt(2. / n)) 117 | elif isinstance(m, nn.BatchNorm2d): 118 | m.weight.data.fill_(1) 119 | m.bias.data.zero_() 120 | 121 | def _make_layer(self, block, planes, blocks, stride=1): 122 | downsample = None 123 | if stride != 1 or self.inplanes != planes * block.expansion: 124 | downsample = nn.Sequential( 125 | nn.Conv2d(self.inplanes, planes * block.expansion, 126 | kernel_size=1, stride=stride, bias=False), 127 | nn.BatchNorm2d(planes * block.expansion), 128 | ) 129 | 130 | layers = [] 131 | layers.append(block(self.inplanes, planes, stride, downsample)) 132 | self.inplanes = planes * block.expansion 133 | for i in range(1, blocks): 134 | layers.append(block(self.inplanes, planes)) 135 | 136 | return nn.Sequential(*layers) 137 | 138 | def forward(self, x): 139 | x = self.conv1(x) 140 | x = self.bn1(x) 141 | x = self.relu(x) 142 | x = self.maxpool(x) 143 | 144 | x = self.layer1(x) 145 | x = self.layer2(x) 146 | x = self.layer3(x) 147 | x = self.layer4(x) 148 | 149 | x = self.avgpool(x) 150 | x = x.view(x.size(0), -1) 151 | x = self.fc(x) 152 | 153 | return x 154 | 155 | 156 | def resnet18(pretrained=False, **kwargs): 157 | """Constructs a ResNet-18 model. 158 | 159 | Args: 160 | pretrained (bool): If True, returns a model pre-trained on ImageNet 161 | """ 162 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 163 | if pretrained: 164 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 165 | return model 166 | 167 | 168 | def resnet34(pretrained=False, **kwargs): 169 | """Constructs a ResNet-34 model. 170 | 171 | Args: 172 | pretrained (bool): If True, returns a model pre-trained on ImageNet 173 | """ 174 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 175 | if pretrained: 176 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 177 | return model 178 | 179 | 180 | def resnet50(pretrained=False, **kwargs): 181 | """Constructs a ResNet-50 model. 182 | 183 | Args: 184 | pretrained (bool): If True, returns a model pre-trained on ImageNet 185 | """ 186 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 187 | if pretrained: 188 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 189 | return model 190 | 191 | 192 | def resnet101(pretrained=False, **kwargs): 193 | """Constructs a ResNet-101 model. 194 | 195 | Args: 196 | pretrained (bool): If True, returns a model pre-trained on ImageNet 197 | """ 198 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 199 | if pretrained: 200 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 201 | return model 202 | 203 | 204 | def resnet152(pretrained=False, **kwargs): 205 | """Constructs a ResNet-152 model. 206 | 207 | Args: 208 | pretrained (bool): If True, returns a model pre-trained on ImageNet 209 | """ 210 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 211 | if pretrained: 212 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 213 | return model 214 | 215 | 216 | -------------------------------------------------------------------------------- /src/layers/loss.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '../..') 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | from config.config import * 8 | 9 | class LabelSmoothingLoss(nn.Module): 10 | def __init__(self, smoothing=0.1): 11 | super(LabelSmoothingLoss, self).__init__() 12 | self.confidence = 1.0 - smoothing 13 | self.smoothing = smoothing 14 | 15 | def forward(self, logits, labels, epoch=0, **kwargs): 16 | if self.training: 17 | logits = logits.float() 18 | labels = labels.float() 19 | logprobs = torch.nn.functional.log_softmax(logits, dim=-1) 20 | 21 | nll_loss = -logprobs.gather(dim=-1, index=labels.unsqueeze(1).long()) 22 | nll_loss = nll_loss.squeeze(1) 23 | smooth_loss = -logprobs.mean(dim=-1) 24 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss 25 | loss = loss.mean() 26 | else: 27 | loss = F.cross_entropy(logits, labels) 28 | return loss 29 | 30 | class LabelSmoothingLossV1(nn.modules.Module): 31 | def __init__(self): 32 | super(LabelSmoothingLossV1, self).__init__() 33 | self.classify_loss = LabelSmoothingLoss() 34 | 35 | def forward(self, logits, labels, epoch=0): 36 | out_face, feature = logits 37 | loss = self.classify_loss(out_face, labels) 38 | return loss 39 | 40 | if __name__ == "__main__": 41 | loss = LabelSmoothingLossV1() 42 | logits = Variable(torch.randn(3, NUM_CLASSES)) 43 | labels = Variable(torch.LongTensor(3).random_(NUM_CLASSES)) 44 | output = loss([logits, None, logits], labels) 45 | print(output) 46 | -------------------------------------------------------------------------------- /src/layers/metric_learning.py: -------------------------------------------------------------------------------- 1 | # from https://github.com/ronghuaiyang/arcface-pytorch/blob/master/models/metrics.py 2 | # adacos: https://github.com/4uiiurz1/pytorch-adacos/blob/master/metrics.py 3 | from __future__ import print_function 4 | from __future__ import division 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import torch.autograd 9 | from torch.nn import Parameter 10 | import math 11 | 12 | class ArcMarginProduct(nn.Module): 13 | r"""Implement of large margin arc distance: : 14 | Args: 15 | in_features: size of each input sample 16 | out_features: size of each output sample 17 | s: norm of input feature 18 | m: margin 19 | cos(theta + m) 20 | """ 21 | def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False, ls_eps=0.0): 22 | super(ArcMarginProduct, self).__init__() 23 | self.in_features = in_features 24 | self.out_features = out_features 25 | self.s = s 26 | self.m = m 27 | self.ls_eps = ls_eps # label smoothing 28 | self.W = Parameter(torch.FloatTensor(out_features, in_features)) 29 | self.reset_parameters() 30 | 31 | self.easy_margin = easy_margin 32 | self.cos_m = math.cos(m) 33 | self.sin_m = math.sin(m) 34 | self.th = math.cos(math.pi - m) 35 | self.mm = math.sin(math.pi - m) * m 36 | 37 | def reset_parameters(self): 38 | stdv = 1. / math.sqrt(self.W.size(1)) 39 | self.W.data.uniform_(-stdv, stdv) 40 | 41 | def forward(self, input, label): 42 | # --------------------------- cos(theta) & phi(theta) --------------------------- 43 | cosine = F.linear(F.normalize(input), F.normalize(self.W)) 44 | if label is None: 45 | return cosine 46 | sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) 47 | phi = cosine * self.cos_m - sine * self.sin_m 48 | if self.easy_margin: 49 | phi = torch.where(cosine.float() > 0, phi, cosine.float()) 50 | else: 51 | phi = torch.where(cosine.float() > self.th, phi, cosine.float() - self.mm) 52 | # --------------------------- convert label to one-hot --------------------------- 53 | # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda') 54 | one_hot = torch.zeros(cosine.size(), device=label.device) 55 | one_hot.scatter_(1, label.view(-1, 1).long(), 1) 56 | if self.ls_eps > 0: 57 | one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features 58 | # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- 59 | output = (one_hot * phi) + ((1.0 - one_hot) * cosine) 60 | output *= self.s 61 | 62 | return output 63 | 64 | class ArcMarginProduct2(nn.Module): 65 | 66 | def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False, ls_eps=0.0): 67 | super(ArcMarginProduct2, self).__init__() 68 | self.in_features = in_features 69 | self.out_features = out_features 70 | self.s = s 71 | self.m = m 72 | self.ls_eps = ls_eps # label smoothing 73 | self.W = Parameter(torch.FloatTensor(out_features, in_features)) 74 | nn.init.xavier_uniform_(self.W) 75 | 76 | self.easy_margin = easy_margin 77 | self.cos_m = math.cos(m) 78 | self.sin_m = math.sin(m) 79 | self.th = math.cos(math.pi - m) 80 | self.mm = math.sin(math.pi - m) * m 81 | 82 | def forward(self, input, label): 83 | # --------------------------- cos(theta) & phi(theta) --------------------------- 84 | cosine = F.linear(F.normalize(input), F.normalize(self.W)) 85 | if label == None: 86 | return cosine 87 | sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) 88 | phi = cosine * self.cos_m - sine * self.sin_m 89 | if self.easy_margin: 90 | phi = torch.where(cosine.float() > 0, phi, cosine.float()) 91 | else: 92 | phi = torch.where(cosine.float() > self.th, phi, cosine.float() - self.mm) 93 | # --------------------------- convert label to one-hot --------------------------- 94 | # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda') 95 | one_hot = torch.zeros(cosine.size(), device=label.device) 96 | one_hot.scatter_(1, label.view(-1, 1).long(), 1) 97 | if self.ls_eps > 0: 98 | one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features 99 | # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------- 100 | output = (one_hot * phi) + ((1.0 - one_hot) * cosine) 101 | output *= self.s 102 | 103 | return output 104 | -------------------------------------------------------------------------------- /src/layers/normalization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | # -------------------------------------- 5 | # Normalization layers 6 | # -------------------------------------- 7 | def l2n(x, eps=1e-6): 8 | return x / (torch.norm(x, p=2, dim=1, keepdim=True) + eps).expand_as(x) 9 | 10 | class L2N(nn.Module): 11 | 12 | def __init__(self, eps=1e-6): 13 | super(L2N, self).__init__() 14 | self.eps = eps 15 | 16 | def forward(self, x): 17 | return l2n(x, eps=self.eps) 18 | 19 | def __repr__(self): 20 | return self.__class__.__name__ + '(' + 'eps=' + str(self.eps) + ')' 21 | -------------------------------------------------------------------------------- /src/layers/pooling.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | # -------------------------------------- 3 | # Pooling layers 4 | # -------------------------------------- 5 | def gem(x, p=3, eps=1e-6): 6 | return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p) 7 | -------------------------------------------------------------------------------- /src/layers/scheduler.py: -------------------------------------------------------------------------------- 1 | import torch.optim as optim 2 | from layers.scheduler_base import SchedulerBase 3 | 4 | class SGD(SchedulerBase): 5 | def __init__(self, model): 6 | super(SGD, self).__init__() 7 | self.model = model 8 | self._lr = 0.01 9 | self._optimizer = optim.SGD(model.parameters(), self._lr, momentum=0.9, weight_decay=1e-5) 10 | 11 | def schedule(self, epoch, epochs, **kwargs): 12 | lr = 0.01 13 | for param_group in self._optimizer.param_groups: 14 | param_group['lr'] = lr 15 | self._lr = self._optimizer.param_groups[0]['lr'] 16 | return self._optimizer, self._lr 17 | 18 | class SGD2a(SchedulerBase): 19 | def __init__(self, model): 20 | super(SGD2a, self).__init__() 21 | self.model = model 22 | self._lr = 0.005 23 | self._optimizer = optim.SGD(model.parameters(), self._lr, momentum=0.9, weight_decay=1e-5) 24 | 25 | def schedule(self, epoch, epochs, **kwargs): 26 | lr = 0.005 27 | for param_group in self._optimizer.param_groups: 28 | param_group['lr'] = lr 29 | self._lr = self._optimizer.param_groups[0]['lr'] 30 | return self._optimizer, self._lr 31 | 32 | class SGD2c(SchedulerBase): 33 | def __init__(self, model): 34 | super(SGD2c, self).__init__() 35 | self.model = model 36 | self._lr = 0.0025 37 | self._optimizer = optim.SGD(model.parameters(), self._lr, momentum=0.9, weight_decay=1e-5) 38 | 39 | def schedule(self, epoch, epochs, **kwargs): 40 | lr = 0.0025 41 | for param_group in self._optimizer.param_groups: 42 | param_group['lr'] = lr 43 | self._lr = self._optimizer.param_groups[0]['lr'] 44 | return self._optimizer, self._lr 45 | 46 | -------------------------------------------------------------------------------- /src/layers/scheduler_base.py: -------------------------------------------------------------------------------- 1 | import torch.optim as optim 2 | import torch.nn as nn 3 | class SchedulerBase(object): 4 | def __init__(self): 5 | self._is_load_best_weight = True 6 | self._is_load_best_optim = True 7 | self._is_adjust_lr = True 8 | self._lr = 0.01 9 | self._optimizer = None 10 | 11 | def schedule(self,net, epoch, epochs, **kwargs): 12 | raise Exception('Did not implemented') 13 | 14 | def is_load_best_weight(self): 15 | return self._is_load_best_weight 16 | 17 | def is_load_best_optim(self): 18 | return self._is_load_best_optim 19 | 20 | 21 | def reset(self): 22 | self._is_load_best_weight = True 23 | self._load_best_optim = True 24 | 25 | 26 | def is_adjust_lr(self): 27 | return self._is_adjust_lr 28 | 29 | def get_optimizer(self): 30 | return self._optimizer -------------------------------------------------------------------------------- /src/layers/tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import builtins 4 | 5 | def open(file, mode=None, encoding=None): 6 | if mode == None: mode = 'r' 7 | 8 | if '/' in file: 9 | if 'w' or 'a' in mode: 10 | dir = os.path.dirname(file) 11 | if not os.path.isdir(dir): os.makedirs(dir) 12 | 13 | f = builtins.open(file, mode=mode, encoding=encoding) 14 | return f 15 | 16 | # http://stackoverflow.com/questions/34950201/pycharm-print-end-r-statement-not-working 17 | class Logger(object): 18 | def __init__(self): 19 | self.terminal = sys.stdout #stdout 20 | self.file = None 21 | 22 | def open(self, file, mode=None): 23 | if mode is None: mode ='w' 24 | self.file = open(file, mode) 25 | 26 | def write(self, message, is_terminal=1, is_file=1 ): 27 | if '\r' in message: is_file=0 28 | 29 | if is_terminal == 1: 30 | self.terminal.write(message) 31 | self.terminal.flush() 32 | #time.sleep(1) 33 | 34 | if is_file == 1: 35 | self.file.write(message) 36 | self.file.flush() 37 | 38 | def flush(self): 39 | # this flush method is needed for python 3 compatibility. 40 | # this handles the flush command by doing nothing. 41 | # you might want to specify some extra behavior here. 42 | pass 43 | -------------------------------------------------------------------------------- /src/networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bestfitting/instance_level_recognition/683f021b4e65876835f028797ec28b0d1071bb45/src/networks/__init__.py -------------------------------------------------------------------------------- /src/networks/efficientnet_gem_fc_face.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | 4 | from layers.scheduler import * 5 | from layers.loss import * 6 | from config.config import * 7 | from layers.pooling import gem 8 | from layers.backbone.efficientnet_pytorch import EfficientNet 9 | from layers.metric_learning import * 10 | 11 | ## net ###################################################################### 12 | class ClsClass(nn.Module): 13 | 14 | def __init__(self, 15 | args, 16 | feature_net='efficientnet_b5', 17 | loss_module='AdaCos', 18 | margin=0.0, 19 | s=30.0, 20 | ): 21 | super().__init__() 22 | num_classes = args.num_classes 23 | 24 | if feature_net == 'efficientnet_b5': 25 | self.backbone = EfficientNet.from_pretrained('efficientnet-b5', model_dir=PRETRAINED_DIR, can_print=args.can_print) 26 | feat_dim = 2048 27 | elif feature_net == 'efficientnet_b6': 28 | self.backbone = EfficientNet.from_pretrained('efficientnet-b6', model_dir=PRETRAINED_DIR, can_print=args.can_print) 29 | feat_dim = 2304 30 | elif feature_net == 'efficientnet_b7': 31 | self.backbone = EfficientNet.from_pretrained('efficientnet-b7', model_dir=PRETRAINED_DIR, can_print=args.can_print) 32 | feat_dim = 2560 33 | 34 | self.in_channels = args.in_channels 35 | self.pool = gem 36 | fc_dim = 512 37 | self.fc = nn.Linear(feat_dim, fc_dim) 38 | self.bn = nn.BatchNorm1d(fc_dim) 39 | if loss_module == 'arcface': 40 | self.face_margin_product = ArcMarginProduct(fc_dim, num_classes, s=s, m=margin) 41 | elif loss_module == 'arcface2': 42 | self.face_margin_product = ArcMarginProduct2(fc_dim, num_classes, s=s, m=margin) 43 | else: 44 | raise ValueError(loss_module) 45 | 46 | def extract_feature(self, x): 47 | x = self.backbone.extract_features(x) 48 | x = self.pool(x) 49 | x = x.view(x.size(0), -1) 50 | x = self.fc(x) 51 | x = self.bn(x) 52 | return x 53 | 54 | def forward(self, x, label, **kargs): 55 | feature = self.extract_feature(x) 56 | out_face = self.face_margin_product(feature, label) 57 | return out_face, feature 58 | 59 | def class_efficientnet_b5_gem_fc_arcface_1head(**kwargs): 60 | args = kwargs['args'] 61 | model = ClsClass(args, feature_net='efficientnet_b5', loss_module='arcface', s=30, margin=0.3) 62 | return model 63 | 64 | def class_efficientnet_b5_gem_fc_arcface2_1head(**kwargs): 65 | args = kwargs['args'] 66 | model = ClsClass(args, feature_net='efficientnet_b5', loss_module='arcface2', s=30, margin=0.3) 67 | return model 68 | 69 | def class_efficientnet_b6_gem_fc_arcface2_1head(**kwargs): 70 | args = kwargs['args'] 71 | model = ClsClass(args, feature_net='efficientnet_b6', loss_module='arcface2', s=30, margin=0.3) 72 | return model 73 | 74 | def class_efficientnet_b7_gem_fc_arcface2_1head(**kwargs): 75 | args = kwargs['args'] 76 | model = ClsClass(args, feature_net='efficientnet_b7', loss_module='arcface2', s=30, margin=0.3) 77 | return model 78 | -------------------------------------------------------------------------------- /src/networks/imageclsnet.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from config.config import * 3 | from networks.efficientnet_gem_fc_face import ( 4 | class_efficientnet_b5_gem_fc_arcface_1head, 5 | class_efficientnet_b5_gem_fc_arcface2_1head, 6 | class_efficientnet_b6_gem_fc_arcface2_1head, 7 | class_efficientnet_b7_gem_fc_arcface2_1head, 8 | ) 9 | from networks.resnet_gem_fc_face import class_resnet152_gem_fc_arcface_1head 10 | 11 | def init_network(params): 12 | architecture = params.get('architecture', 'class_efficientnet_b7_gem_fc_arcface2_1head') 13 | args = Namespace(**{ 14 | 'num_classes': params.get('num_classes', 81313), 15 | 'in_channels': params.get('in_channels', 3), 16 | 'can_print': params.get('can_print', False), 17 | }) 18 | net = eval(architecture)(args=args) 19 | return net 20 | -------------------------------------------------------------------------------- /src/networks/resnet_gem_fc_face.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | 4 | from layers.scheduler import * 5 | from layers.loss import * 6 | from layers.backbone.resnet import * 7 | from config.config import * 8 | from layers.pooling import gem 9 | from layers.metric_learning import * 10 | from utilities.model_util import load_pretrained 11 | 12 | model_names = { 13 | 'resnet18': 'resnet18-5c106cde.pth', 14 | 'resnet34': 'resnet34-333f7ec4.pth', 15 | 'resnet50': 'resnet50-19c8e357.pth', 16 | 'resnet101': 'resnet101-5d3b4d8f.pth', 17 | 'resnet152': 'resnet152-b121ed2d.pth', 18 | } 19 | ## net ###################################################################### 20 | class ResnetClass(nn.Module): 21 | 22 | def __init__(self, 23 | args, 24 | feature_net='resnet101', 25 | loss_module='arcface', 26 | s=30.0, 27 | margin=0.3, 28 | ): 29 | super().__init__() 30 | num_classes = args.num_classes 31 | 32 | if feature_net == 'resnet18': 33 | self.backbone = resnet18() 34 | self.EX = 1 35 | elif feature_net == 'resnet34': 36 | self.backbone = resnet34() 37 | self.EX = 1 38 | elif feature_net == 'resnet50': 39 | self.backbone = resnet50() 40 | self.EX = 4 41 | elif feature_net == 'resnet101': 42 | self.backbone = resnet101() 43 | self.EX = 4 44 | elif feature_net == 'resnet152': 45 | self.backbone = resnet152() 46 | self.EX = 4 47 | 48 | self.backbone = load_pretrained(self.backbone, 49 | f'{PRETRAINED_DIR}/{model_names[feature_net]}', 50 | strict=True, can_print=args.can_print) 51 | self.in_channels = args.in_channels 52 | 53 | self.pool = gem 54 | fc_dim = 512 55 | self.fc = nn.Linear(512 * self.EX, fc_dim) 56 | self.bn = nn.BatchNorm1d(fc_dim) 57 | 58 | if loss_module == 'arcface': 59 | self.face_margin_product = ArcMarginProduct(fc_dim, num_classes, s=s, m=margin) 60 | elif loss_module == 'arcface2': 61 | self.face_margin_product = ArcMarginProduct2(fc_dim, num_classes, s=s, m=margin) 62 | else: 63 | raise ValueError(loss_module) 64 | 65 | def extract_feature(self, x): 66 | x = self.backbone.conv1(x) 67 | x = self.backbone.bn1(x) 68 | x = self.backbone.relu(x) 69 | x = self.backbone.maxpool(x) 70 | e2 = self.backbone.layer1(x) 71 | e3 = self.backbone.layer2(e2) 72 | e4 = self.backbone.layer3(e3) 73 | e5 = self.backbone.layer4(e4) 74 | x = self.pool(e5) 75 | x = x.view(x.size(0), -1) 76 | x = self.fc(x) 77 | x = self.bn(x) 78 | return x 79 | 80 | def forward(self, x, label, **kargs): 81 | feature = self.extract_feature(x) 82 | out_face = self.face_margin_product(feature, label) 83 | return out_face, feature 84 | 85 | def class_resnet152_gem_fc_arcface_1head(**kwargs): 86 | args = kwargs['args'] 87 | model = ResnetClass(args, feature_net='resnet152', loss_module='arcface', s=30.0, margin=0.3) 88 | return model 89 | -------------------------------------------------------------------------------- /src/networks/superpointglue/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bestfitting/instance_level_recognition/683f021b4e65876835f028797ec28b0d1071bb45/src/networks/superpointglue/__init__.py -------------------------------------------------------------------------------- /src/networks/superpointglue/matching.py: -------------------------------------------------------------------------------- 1 | # %BANNER_BEGIN% 2 | # --------------------------------------------------------------------- 3 | # %COPYRIGHT_BEGIN% 4 | # 5 | # Magic Leap, Inc. ("COMPANY") CONFIDENTIAL 6 | # 7 | # Unpublished Copyright (c) 2020 8 | # Magic Leap, Inc., All Rights Reserved. 9 | # 10 | # NOTICE: All information contained herein is, and remains the property 11 | # of COMPANY. The intellectual and technical concepts contained herein 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign 13 | # Patents, patents in process, and are protected by trade secret or 14 | # copyright law. Dissemination of this information or reproduction of 15 | # this material is strictly forbidden unless prior written permission is 16 | # obtained from COMPANY. Access to the source code contained herein is 17 | # hereby forbidden to anyone except current COMPANY employees, managers 18 | # or contractors who have executed Confidentiality and Non-disclosure 19 | # agreements explicitly covering such access. 20 | # 21 | # The copyright notice above does not evidence any actual or intended 22 | # publication or disclosure of this source code, which includes 23 | # information that is confidential and/or proprietary, and is a trade 24 | # secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, 25 | # PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS 26 | # SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND 28 | # INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, 31 | # USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART. 32 | # 33 | # %COPYRIGHT_END% 34 | # ---------------------------------------------------------------------- 35 | # %AUTHORS_BEGIN% 36 | # 37 | # Originating Authors: Paul-Edouard Sarlin 38 | # 39 | # %AUTHORS_END% 40 | # --------------------------------------------------------------------*/ 41 | # %BANNER_END% 42 | 43 | import torch 44 | 45 | from .superpoint import SuperPoint 46 | from .superglue import SuperGlue 47 | 48 | 49 | class Matching(torch.nn.Module): 50 | """ Image Matching Frontend (SuperPoint + SuperGlue) """ 51 | def __init__(self, config={}): 52 | super().__init__() 53 | self.superpoint = SuperPoint(config.get('superpoint', {})) 54 | self.superglue = SuperGlue(config.get('superglue', {})) 55 | 56 | def do_superpoint(self, data): 57 | pred = {} 58 | 59 | # Extract SuperPoint (keypoints, scores, descriptors) if not provided 60 | if 'keypoints0' not in data: 61 | pred0 = self.superpoint({'image': data['image0']}) 62 | pred = {**pred, **{k+'0': v for k, v in pred0.items()}} 63 | else: 64 | pred = {**pred, **{'keypoints0': data['keypoints0'], 'scores0': data['scores0'], 'descriptors0': data['descriptors0']}} 65 | data.pop('keypoints0') 66 | data.pop('scores0') 67 | data.pop('descriptors0') 68 | if 'keypoints1' not in data: 69 | pred1 = self.superpoint({'image': data['image1']}) 70 | pred = {**pred, **{k+'1': v for k, v in pred1.items()}} 71 | else: 72 | pred = {**pred, **{'keypoints1': data['keypoints1'], 'scores1': data['scores1'], 'descriptors1': data['descriptors1']}} 73 | data.pop('keypoints1') 74 | data.pop('scores1') 75 | data.pop('descriptors1') 76 | 77 | return pred 78 | 79 | def do_superglue(self, data, pred): 80 | # Batch all features 81 | # We should either have i) one image per batch, or 82 | # ii) the same number of local features for all images in the batch. 83 | data = {**data, **pred} 84 | 85 | for k in data: 86 | if isinstance(data[k], (list, tuple)): 87 | data[k] = torch.stack(data[k]) 88 | 89 | # Perform the matching 90 | # print(f'data : {data.keys()}') 91 | pred = {**pred, **self.superglue(data)} 92 | 93 | return pred 94 | 95 | def forward(self, data): 96 | """ Run SuperPoint (optionally) and SuperGlue 97 | SuperPoint is skipped if ['keypoints0', 'keypoints1'] exist in input 98 | Args: 99 | data: dictionary with minimal keys: ['image0', 'image1'] 100 | """ 101 | pred = self.do_superpoint(data) 102 | pred = self.do_superglue(data, pred) 103 | return pred 104 | -------------------------------------------------------------------------------- /src/networks/superpointglue/superglue.py: -------------------------------------------------------------------------------- 1 | # %BANNER_BEGIN% 2 | # --------------------------------------------------------------------- 3 | # %COPYRIGHT_BEGIN% 4 | # 5 | # Magic Leap, Inc. ("COMPANY") CONFIDENTIAL 6 | # 7 | # Unpublished Copyright (c) 2020 8 | # Magic Leap, Inc., All Rights Reserved. 9 | # 10 | # NOTICE: All information contained herein is, and remains the property 11 | # of COMPANY. The intellectual and technical concepts contained herein 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign 13 | # Patents, patents in process, and are protected by trade secret or 14 | # copyright law. Dissemination of this information or reproduction of 15 | # this material is strictly forbidden unless prior written permission is 16 | # obtained from COMPANY. Access to the source code contained herein is 17 | # hereby forbidden to anyone except current COMPANY employees, managers 18 | # or contractors who have executed Confidentiality and Non-disclosure 19 | # agreements explicitly covering such access. 20 | # 21 | # The copyright notice above does not evidence any actual or intended 22 | # publication or disclosure of this source code, which includes 23 | # information that is confidential and/or proprietary, and is a trade 24 | # secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, 25 | # PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS 26 | # SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND 28 | # INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, 31 | # USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART. 32 | # 33 | # %COPYRIGHT_END% 34 | # ---------------------------------------------------------------------- 35 | # %AUTHORS_BEGIN% 36 | # 37 | # Originating Authors: Paul-Edouard Sarlin 38 | # 39 | # %AUTHORS_END% 40 | # --------------------------------------------------------------------*/ 41 | # %BANNER_END% 42 | 43 | from copy import deepcopy 44 | from pathlib import Path 45 | import torch 46 | from torch import nn 47 | 48 | 49 | def MLP(channels: list, do_bn=True): 50 | """ Multi-layer perceptron """ 51 | n = len(channels) 52 | layers = [] 53 | for i in range(1, n): 54 | layers.append( 55 | nn.Conv1d(channels[i - 1], channels[i], kernel_size=1, bias=True)) 56 | if i < (n-1): 57 | if do_bn: 58 | layers.append(nn.BatchNorm1d(channels[i])) 59 | layers.append(nn.ReLU()) 60 | return nn.Sequential(*layers) 61 | 62 | 63 | def normalize_keypoints(kpts, image_shape): 64 | """ Normalize keypoints locations based on image image_shape""" 65 | _, _, height, width = image_shape 66 | one = kpts.new_tensor(1) 67 | size = torch.stack([one*width, one*height])[None] 68 | center = size / 2 69 | scaling = size.max(1, keepdim=True).values * 0.7 70 | return (kpts - center[:, None, :]) / scaling[:, None, :] 71 | 72 | 73 | class KeypointEncoder(nn.Module): 74 | """ Joint encoding of visual appearance and location using MLPs""" 75 | def __init__(self, feature_dim, layers): 76 | super().__init__() 77 | self.encoder = MLP([3] + layers + [feature_dim]) 78 | nn.init.constant_(self.encoder[-1].bias, 0.0) 79 | 80 | def forward(self, kpts, scores): 81 | inputs = [kpts.transpose(1, 2), scores.unsqueeze(1)] 82 | return self.encoder(torch.cat(inputs, dim=1)) 83 | 84 | 85 | def attention(query, key, value): 86 | dim = query.shape[1] 87 | scores = torch.einsum('bdhn,bdhm->bhnm', query, key) / dim**.5 88 | prob = torch.nn.functional.softmax(scores, dim=-1) 89 | return torch.einsum('bhnm,bdhm->bdhn', prob, value), prob 90 | 91 | 92 | class MultiHeadedAttention(nn.Module): 93 | """ Multi-head attention to increase model expressivitiy """ 94 | def __init__(self, num_heads: int, d_model: int): 95 | super().__init__() 96 | assert d_model % num_heads == 0 97 | self.dim = d_model // num_heads 98 | self.num_heads = num_heads 99 | self.merge = nn.Conv1d(d_model, d_model, kernel_size=1) 100 | self.proj = nn.ModuleList([deepcopy(self.merge) for _ in range(3)]) 101 | 102 | def forward(self, query, key, value): 103 | batch_dim = query.size(0) 104 | query, key, value = [l(x).view(batch_dim, self.dim, self.num_heads, -1) 105 | for l, x in zip(self.proj, (query, key, value))] 106 | x, _ = attention(query, key, value) 107 | return self.merge(x.contiguous().view(batch_dim, self.dim*self.num_heads, -1)) 108 | 109 | 110 | class AttentionalPropagation(nn.Module): 111 | def __init__(self, feature_dim: int, num_heads: int): 112 | super().__init__() 113 | self.attn = MultiHeadedAttention(num_heads, feature_dim) 114 | self.mlp = MLP([feature_dim*2, feature_dim*2, feature_dim]) 115 | nn.init.constant_(self.mlp[-1].bias, 0.0) 116 | 117 | def forward(self, x, source): 118 | message = self.attn(x, source, source) 119 | return self.mlp(torch.cat([x, message], dim=1)) 120 | 121 | 122 | class AttentionalGNN(nn.Module): 123 | def __init__(self, feature_dim: int, layer_names: list): 124 | super().__init__() 125 | self.layers = nn.ModuleList([ 126 | AttentionalPropagation(feature_dim, 4) 127 | for _ in range(len(layer_names))]) 128 | self.names = layer_names 129 | 130 | def forward(self, desc0, desc1): 131 | for layer, name in zip(self.layers, self.names): 132 | if name == 'cross': 133 | src0, src1 = desc1, desc0 134 | else: # if name == 'self': 135 | src0, src1 = desc0, desc1 136 | delta0, delta1 = layer(desc0, src0), layer(desc1, src1) 137 | desc0, desc1 = (desc0 + delta0), (desc1 + delta1) 138 | return desc0, desc1 139 | 140 | 141 | def log_sinkhorn_iterations(Z, log_mu, log_nu, iters: int): 142 | """ Perform Sinkhorn Normalization in Log-space for stability""" 143 | u, v = torch.zeros_like(log_mu), torch.zeros_like(log_nu) 144 | for _ in range(iters): 145 | u = log_mu - torch.logsumexp(Z + v.unsqueeze(1), dim=2) 146 | v = log_nu - torch.logsumexp(Z + u.unsqueeze(2), dim=1) 147 | return Z + u.unsqueeze(2) + v.unsqueeze(1) 148 | 149 | 150 | def log_optimal_transport(scores, alpha, iters: int): 151 | """ Perform Differentiable Optimal Transport in Log-space for stability""" 152 | b, m, n = scores.shape 153 | one = scores.new_tensor(1) 154 | ms, ns = (m*one).to(scores), (n*one).to(scores) 155 | 156 | bins0 = alpha.expand(b, m, 1) 157 | bins1 = alpha.expand(b, 1, n) 158 | alpha = alpha.expand(b, 1, 1) 159 | 160 | couplings = torch.cat([torch.cat([scores, bins0], -1), 161 | torch.cat([bins1, alpha], -1)], 1) 162 | 163 | norm = - (ms + ns).log() 164 | log_mu = torch.cat([norm.expand(m), ns.log()[None] + norm]) 165 | log_nu = torch.cat([norm.expand(n), ms.log()[None] + norm]) 166 | log_mu, log_nu = log_mu[None].expand(b, -1), log_nu[None].expand(b, -1) 167 | 168 | Z = log_sinkhorn_iterations(couplings, log_mu, log_nu, iters) 169 | Z = Z - norm # multiply probabilities by M+N 170 | return Z 171 | 172 | 173 | def arange_like(x, dim: int): 174 | return x.new_ones(x.shape[dim]).cumsum(0) - 1 # traceable in 1.1 175 | 176 | 177 | class SuperGlue(nn.Module): 178 | """SuperGlue feature matching middle-end 179 | 180 | Given two sets of keypoints and locations, we determine the 181 | correspondences by: 182 | 1. Keypoint Encoding (normalization + visual feature and location fusion) 183 | 2. Graph Neural Network with multiple self and cross-attention layers 184 | 3. Final projection layer 185 | 4. Optimal Transport Layer (a differentiable Hungarian matching algorithm) 186 | 5. Thresholding matrix based on mutual exclusivity and a match_threshold 187 | 188 | The correspondence ids use -1 to indicate non-matching points. 189 | 190 | Paul-Edouard Sarlin, Daniel DeTone, Tomasz Malisiewicz, and Andrew 191 | Rabinovich. SuperGlue: Learning Feature Matching with Graph Neural 192 | Networks. In CVPR, 2020. https://arxiv.org/abs/1911.11763 193 | 194 | """ 195 | default_config = { 196 | 'descriptor_dim': 256, 197 | 'weights': 'indoor', 198 | 'keypoint_encoder': [32, 64, 128, 256], 199 | 'GNN_layers': ['self', 'cross'] * 9, 200 | 'sinkhorn_iterations': 100, 201 | 'match_threshold': 0.2, 202 | } 203 | 204 | def __init__(self, config): 205 | super().__init__() 206 | self.config = {**self.default_config, **config} 207 | 208 | self.kenc = KeypointEncoder( 209 | self.config['descriptor_dim'], self.config['keypoint_encoder']) 210 | 211 | self.gnn = AttentionalGNN( 212 | self.config['descriptor_dim'], self.config['GNN_layers']) 213 | 214 | self.final_proj = nn.Conv1d( 215 | self.config['descriptor_dim'], self.config['descriptor_dim'], 216 | kernel_size=1, bias=True) 217 | 218 | bin_score = torch.nn.Parameter(torch.tensor(1.)) 219 | self.register_parameter('bin_score', bin_score) 220 | 221 | assert self.config['weights'] in ['indoor', 'outdoor'] 222 | path = Path(self.config['model_dir']) / 'weights/superglue_{}.pth'.format(self.config['weights']) 223 | self.load_state_dict(torch.load(str(path))) 224 | print('Loaded SuperGlue model (\"{}\" weights)'.format( 225 | self.config['weights'])) 226 | 227 | def forward(self, data): 228 | """Run SuperGlue on a pair of keypoints and descriptors""" 229 | desc0, desc1 = data['descriptors0'], data['descriptors1'] 230 | kpts0, kpts1 = data['keypoints0'], data['keypoints1'] 231 | 232 | if kpts0.shape[1] == 0 or kpts1.shape[1] == 0: # no keypoints 233 | shape0, shape1 = kpts0.shape[:-1], kpts1.shape[:-1] 234 | return { 235 | 'matches0': kpts0.new_full(shape0, -1, dtype=torch.int), 236 | 'matches1': kpts1.new_full(shape1, -1, dtype=torch.int), 237 | 'matching_scores0': kpts0.new_zeros(shape0), 238 | 'matching_scores1': kpts1.new_zeros(shape1), 239 | } 240 | 241 | # Keypoint normalization. 242 | kpts0 = normalize_keypoints(kpts0, data['image0'].shape) 243 | kpts1 = normalize_keypoints(kpts1, data['image1'].shape) 244 | 245 | # Keypoint MLP encoder. 246 | desc0 = desc0 + self.kenc(kpts0, data['scores0']) 247 | desc1 = desc1 + self.kenc(kpts1, data['scores1']) 248 | 249 | # Multi-layer Transformer network. 250 | desc0, desc1 = self.gnn(desc0, desc1) 251 | 252 | # Final MLP projection. 253 | mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1) 254 | 255 | # Compute matching descriptor distance. 256 | scores = torch.einsum('bdn,bdm->bnm', mdesc0, mdesc1) 257 | scores = scores / self.config['descriptor_dim']**.5 258 | 259 | # Run the optimal transport. 260 | scores = log_optimal_transport( 261 | scores, self.bin_score, 262 | iters=self.config['sinkhorn_iterations']) 263 | 264 | # Get the matches with score above "match_threshold". 265 | max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1) 266 | indices0, indices1 = max0.indices, max1.indices 267 | mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0) 268 | mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1) 269 | zero = scores.new_tensor(0) 270 | mscores0 = torch.where(mutual0, max0.values.exp(), zero) 271 | mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero) 272 | valid0 = mutual0 & (mscores0 > self.config['match_threshold']) 273 | valid1 = mutual1 & valid0.gather(1, indices1) 274 | indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1)) 275 | indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1)) 276 | 277 | return { 278 | 'matches0': indices0, # use -1 for invalid match 279 | 'matches1': indices1, # use -1 for invalid match 280 | 'matching_scores0': mscores0, 281 | 'matching_scores1': mscores1, 282 | } 283 | -------------------------------------------------------------------------------- /src/networks/superpointglue/superpoint.py: -------------------------------------------------------------------------------- 1 | # %BANNER_BEGIN% 2 | # --------------------------------------------------------------------- 3 | # %COPYRIGHT_BEGIN% 4 | # 5 | # Magic Leap, Inc. ("COMPANY") CONFIDENTIAL 6 | # 7 | # Unpublished Copyright (c) 2020 8 | # Magic Leap, Inc., All Rights Reserved. 9 | # 10 | # NOTICE: All information contained herein is, and remains the property 11 | # of COMPANY. The intellectual and technical concepts contained herein 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign 13 | # Patents, patents in process, and are protected by trade secret or 14 | # copyright law. Dissemination of this information or reproduction of 15 | # this material is strictly forbidden unless prior written permission is 16 | # obtained from COMPANY. Access to the source code contained herein is 17 | # hereby forbidden to anyone except current COMPANY employees, managers 18 | # or contractors who have executed Confidentiality and Non-disclosure 19 | # agreements explicitly covering such access. 20 | # 21 | # The copyright notice above does not evidence any actual or intended 22 | # publication or disclosure of this source code, which includes 23 | # information that is confidential and/or proprietary, and is a trade 24 | # secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, 25 | # PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS 26 | # SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND 28 | # INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, 31 | # USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART. 32 | # 33 | # %COPYRIGHT_END% 34 | # ---------------------------------------------------------------------- 35 | # %AUTHORS_BEGIN% 36 | # 37 | # Originating Authors: Paul-Edouard Sarlin 38 | # 39 | # %AUTHORS_END% 40 | # --------------------------------------------------------------------*/ 41 | # %BANNER_END% 42 | 43 | from pathlib import Path 44 | import torch 45 | from torch import nn 46 | 47 | def simple_nms(scores, nms_radius: int): 48 | """ Fast Non-maximum suppression to remove nearby points """ 49 | assert(nms_radius >= 0) 50 | 51 | def max_pool(x): 52 | return torch.nn.functional.max_pool2d( 53 | x, kernel_size=nms_radius*2+1, stride=1, padding=nms_radius) 54 | 55 | zeros = torch.zeros_like(scores) 56 | max_mask = scores == max_pool(scores) 57 | for _ in range(2): 58 | supp_mask = max_pool(max_mask.float()) > 0 59 | supp_scores = torch.where(supp_mask, zeros, scores) 60 | new_max_mask = supp_scores == max_pool(supp_scores) 61 | max_mask = max_mask | (new_max_mask & (~supp_mask)) 62 | return torch.where(max_mask, scores, zeros) 63 | 64 | 65 | def remove_borders(keypoints, scores, border: int, height: int, width: int): 66 | """ Removes keypoints too close to the border """ 67 | mask_h = (keypoints[:, 0] >= border) & (keypoints[:, 0] < (height - border)) 68 | mask_w = (keypoints[:, 1] >= border) & (keypoints[:, 1] < (width - border)) 69 | mask = mask_h & mask_w 70 | return keypoints[mask], scores[mask] 71 | 72 | 73 | def top_k_keypoints(keypoints, scores, k: int): 74 | if k >= len(keypoints): 75 | return keypoints, scores 76 | scores, indices = torch.topk(scores, k, dim=0) 77 | return keypoints[indices], scores 78 | 79 | 80 | def sample_descriptors(keypoints, descriptors, s: int = 8): 81 | """ Interpolate descriptors at keypoint locations """ 82 | b, c, h, w = descriptors.shape 83 | keypoints = keypoints - s / 2 + 0.5 84 | keypoints /= torch.tensor([(w*s - s/2 - 0.5), (h*s - s/2 - 0.5)]).to(keypoints)[None] 85 | keypoints = keypoints*2 - 1 # normalize to (-1, 1) 86 | args = {'align_corners': True} if int(torch.__version__[2]) > 2 else {} 87 | descriptors = torch.nn.functional.grid_sample( 88 | descriptors, keypoints.view(b, 1, -1, 2), mode='bilinear', **args) 89 | descriptors = torch.nn.functional.normalize( 90 | descriptors.reshape(b, c, -1), p=2, dim=1) 91 | return descriptors 92 | 93 | 94 | class SuperPoint(nn.Module): 95 | """SuperPoint Convolutional Detector and Descriptor 96 | 97 | SuperPoint: Self-Supervised Interest Point Detection and 98 | Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew 99 | Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629 100 | 101 | """ 102 | default_config = { 103 | 'descriptor_dim': 256, 104 | 'nms_radius': 4, 105 | 'keypoint_threshold': 0.005, 106 | 'max_keypoints': -1, 107 | 'remove_borders': 4, 108 | } 109 | 110 | def __init__(self, config): 111 | super().__init__() 112 | self.config = {**self.default_config, **config} 113 | 114 | self.relu = nn.ReLU(inplace=True) 115 | self.pool = nn.MaxPool2d(kernel_size=2, stride=2) 116 | c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256 117 | 118 | self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1) 119 | self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1) 120 | self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1) 121 | self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1) 122 | self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1) 123 | self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1) 124 | self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1) 125 | self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1) 126 | 127 | self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) 128 | self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0) 129 | 130 | self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) 131 | self.convDb = nn.Conv2d( 132 | c5, self.config['descriptor_dim'], 133 | kernel_size=1, stride=1, padding=0) 134 | 135 | path = Path(self.config['model_dir']) / 'weights/superpoint_v1.pth' 136 | self.load_state_dict(torch.load(str(path))) 137 | 138 | mk = self.config['max_keypoints'] 139 | if mk == 0 or mk < -1: 140 | raise ValueError('\"max_keypoints\" must be positive or \"-1\"') 141 | 142 | print('Loaded SuperPoint model') 143 | 144 | def forward(self, data): 145 | """ Compute keypoints, scores, descriptors for image """ 146 | # Shared Encoder 147 | x = self.relu(self.conv1a(data['image'])) 148 | x = self.relu(self.conv1b(x)) 149 | x = self.pool(x) 150 | x = self.relu(self.conv2a(x)) 151 | x = self.relu(self.conv2b(x)) 152 | x = self.pool(x) 153 | x = self.relu(self.conv3a(x)) 154 | x = self.relu(self.conv3b(x)) 155 | x = self.pool(x) 156 | x = self.relu(self.conv4a(x)) 157 | x = self.relu(self.conv4b(x)) 158 | 159 | # Compute the dense keypoint scores 160 | cPa = self.relu(self.convPa(x)) 161 | scores = self.convPb(cPa) 162 | scores = torch.nn.functional.softmax(scores, 1)[:, :-1] 163 | b, _, h, w = scores.shape 164 | scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8) 165 | scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h*8, w*8) 166 | scores = simple_nms(scores, self.config['nms_radius']) 167 | 168 | # Extract keypoints 169 | keypoints = [ 170 | torch.nonzero(s > self.config['keypoint_threshold']) 171 | for s in scores] 172 | scores = [s[tuple(k.t())] for s, k in zip(scores, keypoints)] 173 | 174 | # Discard keypoints near the image borders 175 | keypoints, scores = list(zip(*[ 176 | remove_borders(k, s, self.config['remove_borders'], h*8, w*8) 177 | for k, s in zip(keypoints, scores)])) 178 | 179 | # Keep the k keypoints with highest score 180 | if self.config['max_keypoints'] >= 0: 181 | keypoints, scores = list(zip(*[ 182 | top_k_keypoints(k, s, self.config['max_keypoints']) 183 | for k, s in zip(keypoints, scores)])) 184 | 185 | # Convert (h, w) to (x, y) 186 | keypoints = [torch.flip(k, [1]).float() for k in keypoints] 187 | 188 | # Compute the dense descriptors 189 | cDa = self.relu(self.convDa(x)) 190 | descriptors = self.convDb(cDa) 191 | descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1) 192 | 193 | # Extract descriptors 194 | descriptors = [sample_descriptors(k[None], d[None], 8)[0] 195 | for k, d in zip(keypoints, descriptors)] 196 | 197 | return { 198 | 'keypoints': keypoints, 199 | 'scores': scores, 200 | 'descriptors': descriptors, 201 | } 202 | -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import argparse 3 | from timeit import default_timer as timer 4 | 5 | from torch.nn import DataParallel 6 | import torch.distributed as dist 7 | from torch.utils.data import DataLoader, RandomSampler, SequentialSampler 8 | from apex.parallel import DistributedDataParallel 9 | from apex.parallel import convert_syncbn_model 10 | 11 | from config.config import * 12 | from layers.loss import * 13 | from layers.scheduler import * 14 | from utilities.augment_util import * 15 | from layers.tool import Logger 16 | from networks.imageclsnet import init_network 17 | from dataset.landmark_dataset import RetrievalDataset, image_collate 18 | from utilities.metric_util import generate_score_by_model 19 | 20 | parser = argparse.ArgumentParser(description='PyTorch Classification') 21 | parser.add_argument('--out_dir', type=str, help='destination where trained network should be saved') 22 | parser.add_argument('--gpu_id', default='0', type=str, help='gpu id used for training (default: 0)') 23 | parser.add_argument('--arch', default='class_efficientnet_b7_gem_fc_arcface2_1head', type=str, 24 | help='model architecture (default: class_efficientnet_b7_gem_fc_arcface2_1head)') 25 | parser.add_argument('--num_classes', default=81313, type=int, help='number of classes (default: 81313)') 26 | parser.add_argument('--in_channels', default=3, type=int, help='in channels (default: 3)') 27 | parser.add_argument('--distributed', default=1, type=int, help='distributed train (default: 1)') 28 | parser.add_argument('--local_rank', type=int, default=0) 29 | parser.add_argument('--loss', default='LabelSmoothingLossV1', type=str, help='loss function (default: LabelSmoothingLossV1)') 30 | parser.add_argument('--scheduler', default='SGD', type=str, help='scheduler name (default: SGD)') 31 | parser.add_argument('--epochs', default=7, type=int, help='number epochs to train (default: 7)') 32 | parser.add_argument('--resume', default=None, type=str, help='name of the latest checkpoint (default: None)') 33 | parser.add_argument('--aug_version', default='1', type=str, help='argument version (default: 1)') 34 | parser.add_argument('--split_type', default='v2c', type=str) 35 | parser.add_argument('--batch_size', default=7, type=int, help='train mini-batch size (default: 7)') 36 | parser.add_argument('--workers', default=4, type=int, help='number of data loading workers (default: 4)') 37 | parser.add_argument('--preprocessing', type=int, default=1) 38 | parser.add_argument('--save_every_epoch', type=float, default=0.1) 39 | parser.add_argument('--img_size', default=448, type=int, help='image size (default: 448)') 40 | parser.add_argument('--eval_img_size', default=512, type=int, help='image size (default: 512)') 41 | parser.add_argument('--model_file', default=None, type=str, help='fine tune with model file (default: None)') 42 | args = parser.parse_args() 43 | 44 | def main(): 45 | args.can_print = (args.distributed and args.local_rank == 0) or (not args.distributed) 46 | 47 | log_out_dir = f'{RESULT_DIR}/logs/{args.out_dir}' 48 | os.makedirs(log_out_dir, exist_ok=True) 49 | if args.can_print: 50 | log = Logger() 51 | log.open(f'{log_out_dir}/log.train.txt', mode='a') 52 | else: 53 | log = None 54 | 55 | model_out_dir = f'{RESULT_DIR}/models/{args.out_dir}' 56 | if args.can_print: 57 | log.write(f'>> Creating directory if it does not exist:\n>> {model_out_dir}\n') 58 | os.makedirs(model_out_dir, exist_ok=True) 59 | 60 | # set cuda visible device 61 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id 62 | if args.distributed: 63 | torch.cuda.set_device(args.local_rank) 64 | 65 | # set random seeds 66 | torch.manual_seed(0) 67 | torch.cuda.manual_seed_all(0) 68 | np.random.seed(0) 69 | 70 | model_params = {} 71 | model_params['architecture'] = args.arch 72 | model_params['num_classes'] = args.num_classes 73 | model_params['in_channels'] = args.in_channels 74 | model_params['can_print'] = args.can_print 75 | model = init_network(model_params) 76 | 77 | # move network to gpu 78 | if args.distributed: 79 | dist.init_process_group(backend='nccl', init_method='env://') 80 | model = convert_syncbn_model(model) 81 | model.cuda() 82 | if args.distributed: 83 | model = DistributedDataParallel(model, delay_allreduce=True) 84 | else: 85 | model = DataParallel(model) 86 | 87 | # define loss function (criterion) 88 | try: 89 | criterion = eval(args.loss)().cuda() 90 | except: 91 | raise RuntimeError(f'Loss {args.loss} not available!') 92 | 93 | start_epoch = 0 94 | best_score = 0 95 | best_epoch = 0 96 | 97 | # define scheduler 98 | try: 99 | scheduler = eval(args.scheduler)(model) 100 | except: 101 | raise RuntimeError(f'Scheduler {args.scheduler} not available!') 102 | 103 | # optionally resume from a checkpoint 104 | reset_epoch = True 105 | pretrained_file = None 106 | if args.model_file: 107 | reset_epoch = True 108 | pretrained_file = args.model_file 109 | if args.resume: 110 | reset_epoch = False 111 | pretrained_file = f'{model_out_dir}/{args.resume}' 112 | if pretrained_file and os.path.isfile(pretrained_file): 113 | # load checkpoint weights and update model and optimizer 114 | if args.can_print: 115 | log.write(f'>> Loading checkpoint:\n>> {pretrained_file}\n') 116 | 117 | checkpoint = torch.load(pretrained_file) 118 | if not reset_epoch: 119 | start_epoch = checkpoint['epoch'] 120 | best_epoch = checkpoint['best_epoch'] 121 | best_score = checkpoint['best_score'] 122 | model.module.load_state_dict(checkpoint['state_dict']) 123 | if args.can_print: 124 | if reset_epoch: 125 | log.write(f'>>>> loaded checkpoint:\n>>>> {pretrained_file}\n') 126 | else: 127 | log.write(f'>>>> loaded checkpoint:\n>>>> {pretrained_file} (epoch {checkpoint["epoch"]:.2f})\n') 128 | else: 129 | if args.can_print: 130 | log.write(f'>> No checkpoint found at {pretrained_file}\n') 131 | 132 | # Data loading code 133 | train_transform = eval(f'train_multi_augment{args.aug_version}') 134 | train_split_file = f'{DATA_DIR}/split/{args.split_type}/random_train_cv0.csv' 135 | valid_split_file = f'{DATA_DIR}/split/{args.split_type}/random_valid_cv0.csv' 136 | train_dataset = RetrievalDataset( 137 | args, 138 | train_split_file, 139 | transform=train_transform, 140 | data_type='train', 141 | ) 142 | valid_dataset = RetrievalDataset( 143 | args, 144 | valid_split_file, 145 | transform=None, 146 | data_type='valid', 147 | ) 148 | if args.distributed: 149 | train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) 150 | valid_sampler = torch.utils.data.distributed.DistributedSampler(valid_dataset) 151 | else: 152 | train_sampler = RandomSampler(train_dataset) 153 | valid_sampler = SequentialSampler(valid_dataset) 154 | train_loader = DataLoader( 155 | train_dataset, 156 | sampler=train_sampler, 157 | batch_size=args.batch_size, 158 | drop_last=True, 159 | num_workers=args.workers, 160 | pin_memory=True, 161 | collate_fn=image_collate, 162 | ) 163 | valid_loader = DataLoader( 164 | valid_dataset, 165 | sampler=valid_sampler, 166 | batch_size=args.batch_size, 167 | drop_last=False, 168 | num_workers=args.workers, 169 | pin_memory=True, 170 | collate_fn=image_collate, 171 | ) 172 | 173 | train(args, train_loader, valid_loader, model, criterion, scheduler, log, best_epoch, best_score, start_epoch, model_out_dir) 174 | 175 | def train_generator(dataloader): 176 | while True: 177 | for it, (images, labels) in enumerate(dataloader, 0): 178 | if it == len(dataloader)-1: 179 | dataloader.dataset.on_epoch_end() 180 | yield images, labels 181 | 182 | def reduce_tensor(tensor: torch.Tensor): 183 | rt = tensor.clone() 184 | dist.all_reduce(rt, op=dist.reduce_op.SUM) 185 | rt /= dist.get_world_size() 186 | return rt 187 | 188 | def train(args, train_loader, valid_loader, model, criterion, scheduler, log, best_epoch, best_score, start_epoch, model_out_dir): 189 | if args.can_print: 190 | log.write('** start training here! **\n') 191 | log.write('\n') 192 | log.write('epoch iter rate | train_loss/acc | valid_loss/acc/map100 | best_epoch/score | min (valid_min) \n') 193 | log.write('--------------------------------------------------------------------------------------------------------------------\n') 194 | 195 | model.train() 196 | last_cpt_epoch = start_epoch 197 | epoch = start_epoch 198 | max_iters = args.epochs * len(train_loader) 199 | global_step = int(start_epoch * len(train_loader)) 200 | smooth_loss = 0.0 201 | smooth_acc = 0.0 202 | train_num = 0 203 | start = timer() 204 | for it, iter_data in enumerate(train_generator(train_loader)): 205 | if global_step >= max_iters: 206 | break 207 | 208 | optimizer, rate = scheduler.schedule(epoch, args.epochs, best_epoch=best_epoch) 209 | images, labels = iter_data 210 | images = Variable(images.cuda()) 211 | labels = Variable(labels.cuda()) 212 | outputs = model(images, label=labels) 213 | loss = criterion(outputs, labels, epoch=epoch) 214 | if type(outputs) == tuple: 215 | logits = outputs[0] 216 | else: 217 | logits = outputs 218 | probs = F.softmax(logits).data 219 | 220 | optimizer.zero_grad() 221 | loss.backward() 222 | optimizer.step() 223 | 224 | train_acc = (probs.argmax(dim=1) == labels).float().mean() 225 | if args.distributed: 226 | train_loss = reduce_tensor(loss.data) 227 | train_acc = reduce_tensor(train_acc) 228 | else: 229 | train_loss = loss 230 | if smooth_loss == 0.0: 231 | smooth_loss = train_loss.item() 232 | smooth_acc = train_acc.item() 233 | smooth_loss = 0.99 * smooth_loss + 0.01 * train_loss.item() 234 | smooth_acc = 0.99 * smooth_acc + 0.01 * train_acc.item() 235 | global_step += 1 236 | if args.distributed: 237 | train_num += (args.batch_size * dist.get_world_size()) 238 | else: 239 | train_num += args.batch_size 240 | epoch = start_epoch + train_num / len(train_loader.dataset) 241 | 242 | if args.can_print: 243 | print('\r%5.2f %6d %0.6f | %6.4f %6.4f | ... ' % (epoch, global_step, rate, smooth_loss, smooth_acc), end='', flush=True) 244 | 245 | if int(epoch * 1000) > 0 and int(epoch * 1000) % int(args.save_every_epoch * 1000) == 0 \ 246 | and epoch - last_cpt_epoch > args.save_every_epoch * 0.1: 247 | need_eval = True 248 | last_cpt_epoch = epoch 249 | else: 250 | need_eval = False 251 | 252 | model_file = f'{model_out_dir}/{epoch:.2f}.pth' 253 | if need_eval and args.can_print: 254 | save_model(model, model_file, best_score, best_epoch, epoch) 255 | 256 | if need_eval: 257 | model.eval() 258 | valid_start = timer() 259 | with torch.no_grad(): 260 | valid_loss, valid_acc = validate(args, model, epoch, valid_loader, criterion) 261 | if args.can_print: 262 | valid_map100 = generate_score_by_model( 263 | model, img_size=(args.eval_img_size, args.eval_img_size), 264 | batch_size=1, preprocessing=args.preprocessing 265 | ) 266 | else: 267 | valid_map100 = 0 268 | valid_end = timer() 269 | valid_run_time = (valid_end - valid_start) / 60. 270 | 271 | if valid_map100 > best_score: 272 | best_score = valid_map100 273 | best_epoch = epoch 274 | final_model_file = f'{model_out_dir}/final.pth' 275 | save_model(model, final_model_file, best_score, best_epoch, epoch) 276 | save_model(model, model_file, best_score, best_epoch, epoch) 277 | 278 | end = timer() 279 | time = (end - start) / 60 280 | start = timer() 281 | 282 | if args.can_print: 283 | print('\r', end='', flush=True) 284 | log.write( 285 | '%5.2f %6d %0.6f | %6.4f %6.4f | %6.4f %6.4f %6.4f | %5.2f %6.4f | %3.1f min (%3.1f min) \n' % \ 286 | (epoch, global_step, rate, smooth_loss, smooth_acc, valid_loss, valid_acc, valid_map100, best_epoch, best_score, time, valid_run_time) 287 | ) 288 | model.train() 289 | 290 | def validate(args, model, epoch, valid_loader, criterion): 291 | valid_num = 0 292 | valid_loss = 0 293 | valid_acc = 0 294 | for it, iter_data in enumerate(valid_loader, 0): 295 | images, labels = iter_data 296 | 297 | images = Variable(images.cuda()) 298 | labels = Variable(labels.cuda()) 299 | 300 | outputs = model(images, label=labels) 301 | loss = criterion(outputs, labels, epoch=epoch) 302 | if type(outputs) == tuple: 303 | logits = outputs[0] 304 | else: 305 | logits = outputs 306 | probs = F.softmax(logits).data 307 | batch_size = len(images) 308 | valid_acc_batch = (probs.argmax(dim=1) == labels).float().mean() 309 | if args.distributed: 310 | valid_loss_batch = reduce_tensor(loss.data) 311 | valid_acc_batch = reduce_tensor(valid_acc_batch) 312 | else: 313 | valid_loss_batch = loss 314 | 315 | valid_num += batch_size 316 | valid_loss += batch_size * valid_loss_batch.item() 317 | valid_acc += batch_size * valid_acc_batch.item() 318 | 319 | valid_loss = valid_loss / valid_num 320 | valid_acc = valid_acc / valid_num 321 | return valid_loss, valid_acc 322 | 323 | def save_model(model, model_file, best_score, best_epoch, epoch): 324 | if type(model) == DataParallel or type(model) == DistributedDataParallel: 325 | state_dict = model.module.state_dict() 326 | else: 327 | state_dict = model.state_dict() 328 | state_dict_to_save=dict() 329 | for key in state_dict.keys(): 330 | state_dict_to_save[key] = state_dict[key].cpu() 331 | torch.save({ 332 | 'best_score': best_score, 333 | 'state_dict': state_dict_to_save, 334 | 'best_epoch': best_epoch, 335 | 'epoch': epoch, 336 | }, model_file) 337 | 338 | if __name__ == '__main__': 339 | main() 340 | -------------------------------------------------------------------------------- /src/utilities/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Module to extract deep local features.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | # pylint: disable=unused-import 21 | # from delf.protos import aggregation_config_pb2 22 | # from delf.protos import box_pb2 23 | # from delf.protos import datum_pb2 24 | # from delf.protos import delf_config_pb2 25 | # from delf.protos import feature_pb2 26 | # from delf.python import box_io 27 | # from delf.python import datum_io 28 | # from delf.python import feature_aggregation_extractor 29 | # from delf.python import feature_aggregation_similarity 30 | # from delf.python import feature_extractor 31 | # from delf.python import feature_io 32 | # from delf.python import utils 33 | # from delf.python.examples import detector 34 | # from delf.python.examples import extractor 35 | # from delf.python import detect_to_retrieve 36 | # from delf.python import training 37 | # from delf.python.training import model 38 | # from delf.python.training import datasets 39 | # pylint: enable=unused-import 40 | -------------------------------------------------------------------------------- /src/utilities/augment_rand.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from PIL import Image, ImageOps, ImageEnhance, ImageDraw 4 | 5 | fast_randaugment_list = [ 6 | ('Identity', 0, 1), 7 | ('AutoContrast', 0, 10), 8 | ('Block_fade', 0, 0.5), 9 | ('Brightness', 0.5, 1.5), 10 | ('Color', 0.0, 2.0), 11 | ('Contrast', 0.0, 2.0), 12 | ('Cutout', 0, 0.5), 13 | ('Rotate', -20, 20), 14 | ('ShearX', 0., 0.1), 15 | ('ShearY', 0., 0.1), 16 | ('TranslateX', 0., 0.25), 17 | ('TranslateY', 0., 0.25), 18 | ] 19 | 20 | def apply_op(image, op, severity): 21 | pil_img = Image.fromarray(image) 22 | pil_img = eval(op)(pil_img, severity) 23 | return np.asarray(pil_img) 24 | 25 | def randaugment_base(augment_list, img, n, m, div=10, prob=1.): 26 | # ops = np.random.choice(augment_list, size=n) 27 | ops_idx = np.random.choice(len(augment_list), replace=False, size=n) 28 | ops = np.array(augment_list)[ops_idx] 29 | for op, minseverity, maxseverity in ops: 30 | minseverity = float(minseverity) 31 | maxseverity = float(maxseverity) 32 | severity = (float(m) / div) * float(maxseverity - minseverity) + minseverity 33 | if np.random.random() < prob: 34 | img = apply_op(img, str(op), severity) 35 | return img 36 | 37 | def CutoutAbs(img, v): 38 | if v < 0: 39 | return img 40 | w, h = img.size 41 | x0 = np.random.uniform(w) 42 | y0 = np.random.uniform(h) 43 | 44 | x0 = int(max(0, x0 - v / 2.)) 45 | y0 = int(max(0, y0 - v / 2.)) 46 | x1 = min(w, x0 + v) 47 | y1 = min(h, y0 + v) 48 | 49 | xy = (x0, y0, x1, y1) 50 | color = (125, 123, 114) 51 | img = img.copy() 52 | ImageDraw.Draw(img).rectangle(xy, color) 53 | return img 54 | 55 | def do_random_block_fade(image, magnitude=0.5): 56 | size = [0.1, magnitude] 57 | height, width = image.shape[:2] 58 | 59 | # get bounding box 60 | m = image.copy() 61 | cv2.rectangle(m, (0, 0), (height, width), 1, 5) 62 | m = image < 0.5 63 | if m.sum() == 0: return image 64 | 65 | m = np.where(m) 66 | y0, y1, x0, x1 = np.min(m[0]), np.max(m[0]), np.min(m[1]), np.max(m[1]) 67 | w = x1 - x0 68 | h = y1 - y0 69 | if w * h < 10: return image 70 | 71 | ew, eh = np.random.uniform(*size, 2) 72 | ew = int(ew * w) 73 | eh = int(eh * h) 74 | 75 | ex = np.random.randint(0, w - ew) + x0 76 | ey = np.random.randint(0, h - eh) + y0 77 | 78 | image[ey:ey + eh, ex:ex + ew] *= np.random.uniform(0.1, 0.5) # 1 # 79 | image = np.clip(image, 0, 1) 80 | return image 81 | 82 | def Identity(img, _): 83 | return img 84 | 85 | def AutoContrast(img, v): 86 | return ImageOps.autocontrast(img, v) 87 | 88 | def Rotate(img, v): 89 | if np.random.random() > 0.5: 90 | v = -v 91 | return img.rotate(v) 92 | 93 | def Color(img, v): 94 | return ImageEnhance.Color(img).enhance(v) 95 | 96 | def Contrast(img, v): 97 | return ImageEnhance.Contrast(img).enhance(v) 98 | 99 | def Brightness(img, v): 100 | return ImageEnhance.Brightness(img).enhance(v) 101 | 102 | def ShearX(img, v): 103 | if np.random.random() > 0.5: 104 | v = -v 105 | return img.transform(img.size, Image.AFFINE, (1, v, 0, 0, 1, 0)) 106 | 107 | def ShearY(img, v): 108 | if np.random.random() > 0.5: 109 | v = -v 110 | return img.transform(img.size, Image.AFFINE, (1, 0, 0, v, 1, 0)) 111 | 112 | def TranslateX(img, v): 113 | if np.random.random() > 0.5: 114 | v = -v 115 | v = v * img.size[0] 116 | return img.transform(img.size, Image.AFFINE, (1, 0, v, 0, 1, 0)) 117 | 118 | def TranslateY(img, v): 119 | if np.random.random() > 0.5: 120 | v = -v 121 | v = v * img.size[1] 122 | return img.transform(img.size, Image.AFFINE, (1, 0, 0, 0, 1, v)) 123 | 124 | def Cutout(img, v): 125 | if v <= 0.: 126 | return img 127 | 128 | v = v * img.size[0] 129 | return CutoutAbs(img, v) 130 | 131 | def Block_fade(img, v): 132 | img = np.asarray(img) 133 | img = img / 255. 134 | img = do_random_block_fade(img, v) 135 | img = img * 255. 136 | return img.astype(np.uint8) 137 | -------------------------------------------------------------------------------- /src/utilities/augment_util.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | from imgaug import augmenters as iaa 4 | 5 | from config.config import * 6 | from utilities.augment_rand import * 7 | import random 8 | import torchvision 9 | 10 | def train_multi_augment1(image, **kwargs): 11 | seq = iaa.SomeOf(1, [ 12 | iaa.Noop(), 13 | iaa.Fliplr(p=1), 14 | ]) 15 | image = seq.augment_images([image])[0] 16 | return image 17 | 18 | def train_multi_augment3(image, img_size=(600, 600), **kwargs): 19 | if np.random.random() < 0.5: 20 | image = random_crop_long_edge(image, img_size) 21 | else: 22 | image = cv2.resize(image, img_size) 23 | return image 24 | 25 | def train_multi_augment3b(image, img_size=(600, 600), **kwargs): 26 | image = do_rand_aug(image, fast_randaugment_list) 27 | image = train_multi_augment3(image, img_size=img_size) 28 | return image 29 | 30 | def do_rand_aug(image, randaugment_list): 31 | if np.random.random() < 0.03: 32 | if np.random.random() < 0.5: 33 | image = np.rot90(image, k=1) 34 | else: 35 | image = np.rot90(image, k=3) 36 | 37 | n = 3 38 | m = random.randint(1, 9) 39 | if image.max() <= 1: 40 | image = (image * 255).astype('uint8') 41 | image = randaugment_base(randaugment_list, image, n, m, div=10, prob=1) 42 | 43 | seq = iaa.OneOf([ 44 | iaa.Noop(), 45 | iaa.Fliplr(p=1), 46 | iaa.Affine(scale=(0.75, 1.25)), 47 | ]) 48 | image = seq.augment_images([image])[0] 49 | return image 50 | 51 | def random_crop_long_edge(img, img_size=(600,600)): 52 | if img.max() <= 1: 53 | img = (img * 255).astype('uint8') 54 | size = (min(img.shape[:2]), min(img.shape[:2])) 55 | i = (0 if size[0] == img.shape[0] 56 | else np.random.randint(low=0,high=img.shape[0] - size[0])) 57 | j = (0 if size[1] == img.shape[1] 58 | else np.random.randint(low=0,high=img.shape[1] - size[1])) 59 | img = Image.fromarray(img) 60 | img = torchvision.transforms.functional.crop(img, i, j, size[1], size[0]) 61 | img = np.asarray(img) 62 | img = cv2.resize(img, img_size) 63 | return img 64 | -------------------------------------------------------------------------------- /src/utilities/common_util.py: -------------------------------------------------------------------------------- 1 | from config.config import * 2 | 3 | osp = os.path 4 | ope = os.path.exists 5 | opj = os.path.join 6 | -------------------------------------------------------------------------------- /src/utilities/metric_util.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | from pathlib import Path 4 | from scipy.spatial import distance 5 | import pandas as pd 6 | import cv2 7 | import torch 8 | from layers.normalization import L2N 9 | from config.config import * 10 | from torch.autograd import Variable 11 | from torch.utils.data import Dataset, DataLoader, SequentialSampler 12 | from albumentations import Normalize 13 | 14 | def MeanAveragePrecision(predictions, retrieval_solution, max_predictions=100, save_perimg_score=False): 15 | """Computes mean average precision for retrieval prediction. 16 | 17 | Args: 18 | predictions: Dict mapping test image ID to a list of strings corresponding 19 | to index image IDs. 20 | retrieval_solution: Dict mapping test image ID to list of ground-truth image 21 | IDs. 22 | max_predictions: Maximum number of predictions per query to take into 23 | account. For the Google Landmark Retrieval challenge, this should be set 24 | to 100. 25 | 26 | Returns: 27 | mean_ap: Mean average precision score (float). 28 | 29 | Raises: 30 | ValueError: If a test image in `predictions` is not included in 31 | `retrieval_solutions`. 32 | """ 33 | # Compute number of test images. 34 | num_test_images = len(retrieval_solution.keys()) 35 | 36 | # Loop over predictions for each query and compute mAP. 37 | mean_ap = 0.0 38 | score_map = {} 39 | for key, prediction in predictions.items(): 40 | if key not in retrieval_solution: 41 | raise ValueError('Test image %s is not part of retrieval_solution' % key) 42 | 43 | # Loop over predicted images, keeping track of those which were already 44 | # used (duplicates are skipped). 45 | ap = 0.0 46 | already_predicted = set() 47 | num_expected_retrieved = min(len(retrieval_solution[key]), max_predictions) 48 | num_correct = 0 49 | for i in range(min(len(prediction), max_predictions)): 50 | if prediction[i] not in already_predicted: 51 | if prediction[i] in retrieval_solution[key]: 52 | num_correct += 1 53 | ap += num_correct / (i + 1) 54 | already_predicted.add(prediction[i]) 55 | 56 | ap /= num_expected_retrieved 57 | mean_ap += ap 58 | score_map[key] = ap 59 | 60 | mean_ap /= num_test_images 61 | if save_perimg_score: 62 | return mean_ap, score_map 63 | else: 64 | return mean_ap 65 | 66 | def generate_score_by_model(model, img_size=None, scale=None, selected_num=200, batch_size=1, preprocessing=False): 67 | # assert (img_size is None and scale is not None) or (img_size is not None and scale is None) 68 | K = 100 69 | QUERY_IMAGE_DIR = f'{DATA_DIR}/images/test' 70 | INDEX_IMAGE_DIR = f'{DATA_DIR}/images/index' 71 | solution_df = pd.read_csv(f'{DATA_DIR}/raw/retrieval_solution_v2.1.csv') 72 | private_df = solution_df[solution_df['Usage'] == 'Private'] 73 | private_solution = {} 74 | private_test_img_ids = [] 75 | private_index_img_ids = [] 76 | 77 | for i, row in private_df.iterrows(): 78 | private_solution[row['id']] = row['images'].split(' ') 79 | private_test_img_ids.append(row['id']) 80 | private_index_img_ids.extend(row['images'].split(' ')) 81 | if len(private_test_img_ids) >= selected_num: 82 | break 83 | 84 | private_test_img_ids = list(set(private_test_img_ids)) 85 | private_index_img_ids = list(set(private_index_img_ids)) 86 | 87 | class _TestDataset(Dataset): 88 | def __init__(self, image_paths, img_size=None): 89 | self.image_paths = image_paths 90 | self.img_size = img_size 91 | 92 | def __len__(self): 93 | return len(self.image_paths) 94 | 95 | def __getitem__(self, idx): 96 | image_path = self.image_paths[idx] 97 | image = cv2.imread(str(image_path)) 98 | image = image[..., ::-1] 99 | image = cv2.resize(image, self.img_size) 100 | if preprocessing == 1: 101 | norm = Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0) 102 | image = norm(image=image)['image'] 103 | else: 104 | image = image / 255.0 105 | image = np.transpose(image, (2, 0, 1)) 106 | image_tensor = torch.from_numpy(image).float() 107 | return image_tensor 108 | 109 | def _create_dataset(image_paths, img_size, batch_size): 110 | dataset = _TestDataset(image_paths, img_size) 111 | data_loader = DataLoader( 112 | dataset, 113 | sampler=SequentialSampler(dataset), 114 | batch_size=batch_size, 115 | drop_last=False, 116 | num_workers=4, 117 | pin_memory=True, 118 | ) 119 | return data_loader 120 | 121 | def _get_embedding(image_tensor): 122 | image_tensor = Variable(image_tensor.cuda()) 123 | embedding = model.module.extract_feature(image_tensor) 124 | embedding = L2N()(embedding) 125 | return embedding 126 | 127 | def _get_id(image_path: Path): 128 | return int(image_path.name.split('.')[0], 16) 129 | 130 | def _get_embeddings(image_root_dir: str): 131 | if image_root_dir.count('test') > 0: 132 | image_paths = [Path(f'{image_root_dir}/{img_id}.jpg') for img_id in private_test_img_ids] 133 | else: 134 | image_paths = [Path(f'{image_root_dir}/{img_id}.jpg') for img_id in private_index_img_ids] 135 | dataloader = _create_dataset(image_paths, img_size, batch_size) 136 | embeddings = [] 137 | for image_tensor in dataloader: 138 | embedding = _get_embedding(image_tensor) 139 | embedding = embedding.cpu().detach().numpy() 140 | embeddings.extend(embedding) 141 | ids = [_get_id(image_path) for image_path in image_paths] 142 | return ids, embeddings 143 | 144 | def _to_hex(image_id: int) -> str: 145 | return '{0:0{1}x}'.format(image_id, 16) 146 | 147 | def _get_metrics(predictions, solution): 148 | relevant_predictions = {} 149 | 150 | for key in solution.keys(): 151 | if key in predictions: 152 | relevant_predictions[key] = predictions[key] 153 | 154 | # Mean average precision. 155 | mean_average_precision = MeanAveragePrecision(relevant_predictions, solution, max_predictions=K) 156 | 157 | return mean_average_precision 158 | 159 | query_ids, query_embeddings = _get_embeddings(QUERY_IMAGE_DIR) 160 | index_ids, index_embeddings = _get_embeddings(INDEX_IMAGE_DIR) 161 | distances = distance.cdist(np.array(query_embeddings), np.array(index_embeddings), 'euclidean') 162 | predicted_positions = np.argpartition(distances, K, axis=1)[:, :K] 163 | 164 | predictions = {} 165 | for i, query_id in enumerate(query_ids): 166 | nearest = [(index_ids[j], distances[i, j]) for j in predicted_positions[i]] 167 | nearest.sort(key=lambda x: x[1]) 168 | prediction = [_to_hex(index_id) for index_id, d in nearest] 169 | predictions[_to_hex(query_id)] = prediction 170 | score = _get_metrics(predictions, private_solution) 171 | return score 172 | -------------------------------------------------------------------------------- /src/utilities/model_util.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | import pandas as pd 4 | import torch 5 | from torch.nn import DataParallel 6 | from apex.parallel import DistributedDataParallel 7 | 8 | from config.config import * 9 | 10 | def load_pretrained_state_dict(net, load_state_dict, strict=False, can_print=True, extend_W=None, num_classes=-1): 11 | if 'epoch' in load_state_dict and can_print: 12 | epoch = load_state_dict['epoch'] 13 | print(f'load epoch:{epoch:.2f}') 14 | if 'state_dict' in load_state_dict: 15 | load_state_dict = load_state_dict['state_dict'] 16 | elif 'model_state_dict' in load_state_dict: 17 | load_state_dict = load_state_dict['model_state_dict'] 18 | elif 'model' in load_state_dict: 19 | load_state_dict = load_state_dict['model'] 20 | if type(net) == DataParallel or type(net) == DistributedDataParallel: 21 | state_dict = net.module.state_dict() 22 | else: 23 | state_dict = net.state_dict() 24 | 25 | new_load_state_dict = dict() 26 | for key in load_state_dict.keys(): 27 | if key.startswith('module.'): 28 | dst_key = key.replace('module.', '') 29 | else: 30 | dst_key = key 31 | new_load_state_dict[dst_key] = load_state_dict[key] 32 | load_state_dict = new_load_state_dict 33 | 34 | if extend_W is not None: 35 | extend_key = 'face_margin_product.W' 36 | if extend_key not in load_state_dict: 37 | raise Exception(f'{extend_key} is not in load_state_dict') 38 | if extend_W == 'nolandmark': 39 | old_landmarks = np.arange(load_state_dict[extend_key].size(0)) 40 | new_landmarks = np.concatenate([old_landmarks, [np.max(old_landmarks) + 1]]) 41 | else: 42 | arr = np.load(f'{DATA_DIR}/input/landmarks_mapping_{extend_W}.npz', allow_pickle=True) 43 | old_landmarks = arr['old_landmarks'] 44 | new_landmarks = arr['new_landmarks'] 45 | if load_state_dict[extend_key].size(0) != num_classes: 46 | print(f'{extend_key} shape: {len(old_landmarks)} -> {len(new_landmarks)}') 47 | load_state_dict = extend_model_weight(load_state_dict, old_landmarks, new_landmarks, key=extend_key) 48 | 49 | for key in list(load_state_dict.keys()): 50 | if key not in state_dict: 51 | if key == 'maxpool.1.filt': 52 | state_dict['maxpool.0.filt'] = load_state_dict[key] 53 | state_dict['maxpool.2.filt'] = load_state_dict[key] 54 | print('weight maxpool.1.filt --> maxpool.0.filt AND maxpool.2.filt') 55 | elif strict: 56 | raise Exception(f'not in {key}') 57 | if can_print: 58 | print('not in', key) 59 | continue 60 | if load_state_dict[key].size() != state_dict[key].size(): 61 | if strict: 62 | raise Exception(f'size not the same {key}') 63 | if ('last_linear' in key or 'attention' in key) and (load_state_dict[key].size()[1:] == state_dict[key].size()[1:]): 64 | min_channel = min(state_dict[key].size(0), load_state_dict[key].size(0)) 65 | state_dict[key][:min_channel] = load_state_dict[key][:min_channel] 66 | elif can_print: 67 | print('size not the same', key) 68 | continue 69 | state_dict[key] = load_state_dict[key] 70 | if type(net) == DataParallel or type(net) == DistributedDataParallel: 71 | net.module.load_state_dict(state_dict) 72 | else: 73 | net.load_state_dict(state_dict) 74 | return net 75 | 76 | def extend_model_weight(state_dict, old_landmarks, new_landmarks, key='face_margin_product.W'): 77 | old_landmarks = np.sort(old_landmarks) 78 | new_landmarks = np.sort(new_landmarks) 79 | 80 | W = state_dict[key] 81 | assert W.size(0) == len(old_landmarks) 82 | assert len(W.size()) == 2 83 | new_W = torch.zeros((len(new_landmarks), W.size(1)), dtype=W.dtype) 84 | new_W[:, :] = W.mean(dim=0) 85 | 86 | old_ts = pd.Series(index=old_landmarks, data=np.arange(len(old_landmarks))) 87 | new_ts = pd.Series(index=new_landmarks, data=np.arange(len(new_landmarks))) 88 | intersection_landmarks = np.sort(list(set.intersection(set(old_landmarks), set(new_landmarks)))) 89 | new_W[new_ts[intersection_landmarks].tolist(), :] = W[old_ts[intersection_landmarks].tolist(), :] 90 | 91 | state_dict[key] = new_W 92 | return state_dict 93 | 94 | def load_pretrained(net, pretrained_file, strict=False,can_print=True): 95 | if can_print: 96 | print(f'load pretrained file: {pretrained_file}') 97 | load_state_dict = torch.load(pretrained_file) 98 | net = load_pretrained_state_dict(net, load_state_dict, strict=strict, can_print=can_print) 99 | return net 100 | -------------------------------------------------------------------------------- /src/utilities/superpointglue_util.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | 5 | def process_resize(w, h, resize): 6 | assert (len(resize) > 0 and len(resize) <= 2) 7 | if len(resize) == 1 and resize[0] > -1: 8 | scale = resize[0] / max(h, w) 9 | w_new, h_new = int(round(w * scale)), int(round(h * scale)) 10 | elif len(resize) == 1 and resize[0] == -1: 11 | w_new, h_new = w, h 12 | else: # len(resize) == 2: 13 | w_new, h_new = resize[0], resize[1] 14 | 15 | # Issue warning if resolution is too small or too large. 16 | if max(w_new, h_new) < 160: 17 | print('Warning: input resolution is very small, results may vary') 18 | elif max(w_new, h_new) > 2000: 19 | print('Warning: input resolution is very large, results may vary') 20 | 21 | return w_new, h_new 22 | 23 | def frame2tensor(frame, device='cuda'): 24 | return torch.from_numpy(frame / 255.).float()[None, None].to(device) 25 | 26 | def read_image(path, resize, rotation, resize_float, device='cuda'): 27 | image = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE) 28 | if image is None: 29 | return None, None, None 30 | w, h = image.shape[1], image.shape[0] 31 | w_new, h_new = process_resize(w, h, resize) 32 | scales = (float(w) / float(w_new), float(h) / float(h_new)) 33 | 34 | if resize_float: 35 | image = cv2.resize(image.astype('float32'), (w_new, h_new)) 36 | else: 37 | image = cv2.resize(image, (w_new, h_new)).astype('float32') 38 | 39 | if rotation != 0: 40 | image = np.rot90(image, k=rotation) 41 | if rotation % 2: 42 | scales = scales[::-1] 43 | 44 | inp = frame2tensor(image, device=device) 45 | return image, inp, scales 46 | -------------------------------------------------------------------------------- /src/utilities/vectors_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def ivecs_read(fname): 4 | a = np.fromfile(fname, dtype='int32') 5 | d = a[0] 6 | return a.reshape(-1, d + 1)[:, 1:].copy() 7 | 8 | def fvecs_read(fname): 9 | return ivecs_read(fname).view('float32') 10 | --------------------------------------------------------------------------------