├── README.md ├── core ├── __init__.py ├── base.py ├── data_loader │ ├── __init__.py │ ├── dataset.py │ └── loader.py ├── models │ ├── __init__.py │ ├── model_gcn.py │ ├── model_graph_matching │ │ ├── __init__.py │ │ ├── affinity_layer.py │ │ ├── gconv.py │ │ ├── permutation_loss.py │ │ ├── sinkhorn.py │ │ └── voting_layer.py │ ├── model_keypoints │ │ ├── __init__.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ ├── default.py │ │ │ ├── experiments │ │ │ │ ├── coco │ │ │ │ │ ├── hrnet │ │ │ │ │ │ ├── w32_256x192_adam_lr1e-3.yaml │ │ │ │ │ │ ├── w32_384x288_adam_lr1e-3.yaml │ │ │ │ │ │ ├── w48_256x192_adam_lr1e-3.yaml │ │ │ │ │ │ └── w48_384x288_adam_lr1e-3.yaml │ │ │ │ │ └── resnet │ │ │ │ │ │ ├── res101_256x192_d256x3_adam_lr1e-3.yaml │ │ │ │ │ │ ├── res101_384x288_d256x3_adam_lr1e-3.yaml │ │ │ │ │ │ ├── res152_256x192_d256x3_adam_lr1e-3.yaml │ │ │ │ │ │ ├── res152_384x288_d256x3_adam_lr1e-3.yaml │ │ │ │ │ │ ├── res50_256x128_d256x3_adam_lr1e-3.yaml │ │ │ │ │ │ ├── res50_256x192_d256x3_adam_lr1e-3.yaml │ │ │ │ │ │ └── res50_384x288_d256x3_adam_lr1e-3.yaml │ │ │ │ └── mpii │ │ │ │ │ ├── hrnet │ │ │ │ │ ├── w32_256x256_adam_lr1e-3.yaml │ │ │ │ │ └── w48_256x256_adam_lr1e-3.yaml │ │ │ │ │ └── resnet │ │ │ │ │ ├── res101_256x256_d256x3_adam_lr1e-3.yaml │ │ │ │ │ ├── res152_256x256_d256x3_adam_lr1e-3.yaml │ │ │ │ │ └── res50_256x256_d256x3_adam_lr1e-3.yaml │ │ │ └── models.py │ │ ├── crop_by_skeleton.py │ │ ├── gaussian_blur.py │ │ ├── pose_config.py │ │ ├── pose_hrnet.py │ │ ├── pose_processor.py │ │ ├── pose_resnet.py │ │ └── pose_resnet2.py │ └── model_reid.py ├── test.py ├── train.py └── visualize.py ├── main.py └── tools ├── __init__.py ├── evaluation ├── __init__.py ├── classification.py └── retrieval2.py ├── logger.py ├── loss.py ├── meter.py ├── metric.py ├── transforms2.py ├── utils.py └── visualize.py /README.md: -------------------------------------------------------------------------------- 1 | # HOReID 2 | [CVPR2020] High-Order Information Matters: Learning Relation and Topology for Occluded Person Re-Identification. [paper](http://openaccess.thecvf.com/content_CVPR_2020/html/Wang_High-Order_Information_Matters_Learning_Relation_and_Topology_for_Occluded_Person_CVPR_2020_paper.html) 3 | 4 | ### Update 5 | 2020-12: We release a strong pipeline for occluded/partial reid. [link](https://github.com/wangguanan/light-reid/tree/master/examples/occluded_reid) 6 | 7 | 2020-06-16: Update Code. 8 | 9 | 2020-04-01: Happy April's Fool Day!!! Code is comming soon. 10 | 11 | ### Bibtex 12 | If you find the code useful, please consider citing our paper: 13 | ``` 14 | @InProceedings{wang2020cvpr, 15 | author = {Wang, Guan'an and Yang, Shuo and Liu, Huanyu and Wang, Zhicheng and Yang, Yang and Wang, Shuliang and Yu, Gang and Zhou, Erjin and Sun, Jian}, 16 | title = {High-Order Information Matters: Learning Relation and Topology for Occluded Person Re-Identification}, 17 | booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 18 | month = {June}, 19 | year = {2020} 20 | } 21 | ``` 22 | 23 | ### Set Up 24 | ```shell script 25 | conda create -n horeid python=3.7 26 | conda activate horeid 27 | conda install pytorch==1.1.0 torchvision==0.3.0 -c pytorch 28 | # GPU Memory >= 10G, Memory >= 20G 29 | ``` 30 | 31 | 32 | ### Preparation 33 | * Dataset: Occluded DukeMTMC-reID ([Project](https://github.com/lightas/Occluded-DukeMTMC-Dataset)) 34 | * Pre-trained Pose Model ([pose_hrnet_w48_256x192.pth](https://drive.google.com/drive/folders/1hOTihvbyIxsm5ygDpbUuJ7O_tzv4oXjC), 35 | please download it to path ```./core/models/model_keypoints/pose_hrnet_w48_256x192.pth```) 36 | 37 | 38 | ### Trained Model 39 | * [BaiDuDisk](https://pan.baidu.com/s/10TQ221aPz5-FMaW2YP2NJw) (pwd:fgit) 40 | * Google Drive (comming soon) 41 | 42 | ### Train 43 | ``` 44 | python main.py --mode train \ 45 | --duke_path path/to/occluded/duke \ 46 | --output_path ./results 47 | ``` 48 | 49 | ### Test with Trained Model 50 | ``` 51 | python main.py --mode test \ 52 | --resume_test_path path/to/pretrained/model --resume_test_epoch 119 \ 53 | --duke_path path/to/occluded/duke --output_path ./results 54 | ``` 55 | 56 | ## License 57 | 58 | This repo is released under the MIT License. 59 | 60 | 61 | ## Contacts 62 | If you have any question about the project, please feel free to contact me. 63 | 64 | E-mail: guan.wang0706@gmail.com 65 | 66 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_loader import Loaders 2 | from .base import Base 3 | from .train import train_an_epoch 4 | from .test import testwithVer2 5 | from .visualize import visualize_ranked_images 6 | -------------------------------------------------------------------------------- /core/base.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('..') 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from bisect import bisect_right 9 | import os 10 | 11 | from .models import Encoder, BNClassifiers 12 | from .models import ScoremapComputer, compute_local_features 13 | from .models import GraphConvNet, generate_adj 14 | from .models import GMNet, PermutationLoss, Verificator, mining_hard_pairs 15 | from tools import CrossEntropyLabelSmooth, TripletLoss, accuracy 16 | 17 | 18 | def os_walk(folder_dir): 19 | for root, dirs, files in os.walk(folder_dir): 20 | files = sorted(files, reverse=True) 21 | dirs = sorted(dirs, reverse=True) 22 | return root, dirs, files 23 | 24 | 25 | class Base: 26 | 27 | def __init__(self, config, loaders): 28 | 29 | self.config = config 30 | self.loaders = loaders 31 | 32 | self.pid_num = config.pid_num 33 | self.margin = config.margin 34 | self.branch_num = config.branch_num 35 | 36 | # Logger Configuration 37 | self.max_save_model_num = config.max_save_model_num 38 | self.output_path = config.output_path 39 | self.save_model_path = os.path.join(self.output_path, 'models/') 40 | self.save_logs_path = os.path.join(self.output_path, 'logs/') 41 | self.save_visualize_market_path = os.path.join(self.output_path, 'visualization/market/') 42 | self.save_visualize_duke_path = os.path.join(self.output_path, 'visualization/duke/') 43 | 44 | # Train Configuration 45 | self.base_learning_rate = config.base_learning_rate 46 | self.weight_decay = config.weight_decay 47 | self.milestones = config.milestones 48 | 49 | # init model 50 | self._init_device() 51 | self._init_model() 52 | self._init_creiteron() 53 | self._init_optimizer() 54 | 55 | def _init_device(self): 56 | self.device = torch.device('cuda') 57 | 58 | def _init_model(self): 59 | 60 | # feature learning 61 | self.encoder = Encoder(class_num=self.pid_num) 62 | self.bnclassifiers = BNClassifiers(2048, self.pid_num, self.branch_num) 63 | self.bnclassifiers2 = BNClassifiers(2048, self.pid_num, self.branch_num) # for gcned features 64 | self.encoder = nn.DataParallel(self.encoder).to(self.device) 65 | self.bnclassifiers = nn.DataParallel(self.bnclassifiers).to(self.device) 66 | self.bnclassifiers2 = nn.DataParallel(self.bnclassifiers2).to(self.device) 67 | 68 | # keypoints model 69 | self.scoremap_computer = ScoremapComputer(self.config.norm_scale).to(self.device) 70 | # self.scoremap_computer = nn.DataParallel(self.scoremap_computer).to(self.device) 71 | self.scoremap_computer = self.scoremap_computer.eval() 72 | 73 | # GCN 74 | self.linked_edges = \ 75 | [[13, 0], [13, 1], [13, 2], [13, 3], [13, 4], [13, 5], [13, 6], [13, 7], [13, 8], [13, 9], [13, 10], 76 | [13, 11], [13, 12], # global 77 | [0, 1], [0, 2], # head 78 | [1, 2], [1, 7], [2, 8], [7, 8], [1, 8], [2, 7], # body 79 | [1, 3], [3, 5], [2, 4], [4, 6], [7, 9], [9, 11], [8, 10], [10, 12], # libs 80 | # [3,4],[5,6],[9,10],[11,12], # semmetric libs links 81 | ] 82 | self.adj = generate_adj(self.branch_num, self.linked_edges, self_connect=0.0).to(self.device) 83 | self.gcn = GraphConvNet(self.adj, 2048, 2048, 2048, self.config.gcn_scale).to(self.device) 84 | 85 | # graph matching 86 | self.gmnet = GMNet().to(self.device) 87 | 88 | # verification 89 | self.verificator = Verificator(self.config).to(self.device) 90 | 91 | def _init_creiteron(self): 92 | self.ide_creiteron = CrossEntropyLabelSmooth(self.pid_num, reduce=False) 93 | self.triplet_creiteron = TripletLoss(self.margin, 'euclidean') 94 | self.bce_loss = nn.BCELoss() 95 | self.permutation_loss = PermutationLoss() 96 | 97 | def compute_ide_loss(self, score_list, pids, weights): 98 | loss_all = 0 99 | for i, score_i in enumerate(score_list): 100 | loss_i = self.ide_creiteron(score_i, pids) 101 | loss_i = (weights[:, i] * loss_i).mean() 102 | loss_all += loss_i 103 | return loss_all 104 | 105 | def compute_triplet_loss(self, feature_list, pids): 106 | '''we suppose the last feature is global, and only compute its loss''' 107 | loss_all = 0 108 | for i, feature_i in enumerate(feature_list): 109 | if i == len(feature_list) - 1: 110 | loss_i = self.triplet_creiteron(feature_i, feature_i, feature_i, pids, pids, pids) 111 | loss_all += loss_i 112 | return loss_all 113 | 114 | def compute_accuracy(self, cls_score_list, pids): 115 | overall_cls_score = 0 116 | for cls_score in cls_score_list: 117 | overall_cls_score += cls_score 118 | acc = accuracy(overall_cls_score, pids, [1])[0] 119 | return acc 120 | 121 | def _init_optimizer(self): 122 | params = [] 123 | 124 | for key, value in self.encoder.named_parameters(): 125 | if not value.requires_grad: 126 | continue 127 | lr = self.base_learning_rate 128 | weight_decay = self.weight_decay 129 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 130 | 131 | for key, value in self.bnclassifiers.named_parameters(): 132 | if not value.requires_grad: 133 | continue 134 | lr = self.base_learning_rate 135 | weight_decay = self.weight_decay 136 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 137 | 138 | for key, value in self.bnclassifiers2.named_parameters(): 139 | if not value.requires_grad: 140 | continue 141 | lr = self.base_learning_rate 142 | weight_decay = self.weight_decay 143 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 144 | 145 | for key, value in self.gcn.named_parameters(): 146 | if not value.requires_grad: 147 | continue 148 | lr = self.base_learning_rate 149 | weight_decay = self.weight_decay 150 | params += [{"params": [value], "lr": self.config.gcn_lr_scale * lr, "weight_decay": weight_decay}] 151 | 152 | for key, value in self.gmnet.named_parameters(): 153 | if not value.requires_grad: 154 | continue 155 | lr = self.base_learning_rate 156 | weight_decay = self.weight_decay 157 | params += [{"params": [value], "lr": self.config.gm_lr_scale * lr, "weight_decay": weight_decay}] 158 | 159 | for key, value in self.verificator.named_parameters(): 160 | if not value.requires_grad: 161 | continue 162 | lr = self.base_learning_rate 163 | weight_decay = self.weight_decay 164 | params += [{"params": [value], "lr": self.config.ver_lr_scale * lr, "weight_decay": weight_decay}] 165 | 166 | self.optimizer = optim.Adam(params) 167 | self.lr_scheduler = WarmupMultiStepLR( 168 | self.optimizer, self.milestones, gamma=0.1, warmup_factor=0.01, warmup_iters=10) 169 | 170 | ## save model as save_epoch 171 | def save_model(self, save_epoch): 172 | 173 | torch.save(self.encoder.state_dict(), os.path.join(self.save_model_path, 'encoder_{}.pkl'.format(save_epoch))) 174 | torch.save(self.bnclassifiers.state_dict(), 175 | os.path.join(self.save_model_path, 'bnclassifiers_{}.pkl'.format(save_epoch))) 176 | torch.save(self.bnclassifiers2.state_dict(), 177 | os.path.join(self.save_model_path, 'bnclassifiers2_{}.pkl'.format(save_epoch))) 178 | torch.save(self.gcn.state_dict(), os.path.join(self.save_model_path, 'gcn_{}.pkl'.format(save_epoch))) 179 | torch.save(self.gmnet.state_dict(), os.path.join(self.save_model_path, 'gmnet_{}.pkl'.format(save_epoch))) 180 | torch.save(self.verificator.state_dict(), 181 | os.path.join(self.save_model_path, 'verificator_{}.pkl'.format(save_epoch))) 182 | 183 | # if saved model is more than max num, delete the model with smallest iter 184 | if self.max_save_model_num > 0: 185 | root, _, files = os_walk(self.save_model_path) 186 | new_file = [] 187 | for file_ in files: 188 | if file_.endswith('.pkl'): 189 | new_file.append(file_) 190 | file_iters = sorted(list(set([int(file.replace('.', '_').split('_')[-2]) for file in new_file])), 191 | reverse=False) 192 | 193 | if len(file_iters) > self.max_save_model_num: 194 | for i in range(len(file_iters) - self.max_save_model_num): 195 | file_path = os.path.join(root, '*_{}.pkl'.format(file_iters[i])) 196 | print('remove files:', file_path) 197 | os.system('rm -f {}'.format(file_path)) 198 | 199 | ## resume model from resume_epoch 200 | def resume_model(self, resume_epoch): 201 | self.encoder.load_state_dict( 202 | torch.load(os.path.join(self.save_model_path, 'encoder_{}.pkl'.format(resume_epoch)))) 203 | self.bnclassifiers.load_state_dict( 204 | torch.load(os.path.join(self.save_model_path, 'bnclassifiers_{}.pkl'.format(resume_epoch)))) 205 | self.bnclassifiers2.load_state_dict( 206 | torch.load(os.path.join(self.save_model_path, 'bnclassifiers2_{}.pkl'.format(resume_epoch)))) 207 | self.gcn.load_state_dict(torch.load( 208 | os.path.join(self.save_model_path, 'gcn_{}.pkl'.format(resume_epoch)))) 209 | self.gmnet.load_state_dict( 210 | torch.load(os.path.join(self.save_model_path, 'gmnet_{}.pkl'.format(resume_epoch)))) 211 | self.verificator.load_state_dict( 212 | torch.load(os.path.join(self.save_model_path, 'verificator_{}.pkl'.format(resume_epoch)))) 213 | 214 | 215 | ## resume model from resume_epoch 216 | def resume_model_from_path(self, path, resume_epoch): 217 | self.encoder.load_state_dict( 218 | torch.load(os.path.join(path, 'encoder_{}.pkl'.format(resume_epoch)))) 219 | self.bnclassifiers.load_state_dict( 220 | torch.load(os.path.join(path, 'bnclassifiers_{}.pkl'.format(resume_epoch)))) 221 | self.bnclassifiers2.load_state_dict( 222 | torch.load(os.path.join(path, 'bnclassifiers2_{}.pkl'.format(resume_epoch)))) 223 | self.gcn.load_state_dict(torch.load( 224 | os.path.join(path, 'gcn_{}.pkl'.format(resume_epoch)))) 225 | self.gmnet.load_state_dict(torch.load( 226 | os.path.join(path, 'gmnet_{}.pkl'.format(resume_epoch)))) 227 | self.verificator.load_state_dict( 228 | torch.load(os.path.join(path, 'verificator_{}.pkl'.format(resume_epoch)))) 229 | 230 | 231 | ## set model as train mode 232 | def set_train(self): 233 | self.encoder = self.encoder.train() 234 | self.bnclassifiers = self.bnclassifiers.train() 235 | self.bnclassifiers2 = self.bnclassifiers2.train() 236 | self.gcn = self.gcn.train() 237 | self.gmnet = self.gmnet.train() 238 | self.verificator = self.verificator.train() 239 | 240 | ## set model as eval mode 241 | def set_eval(self): 242 | self.encoder = self.encoder.eval() 243 | self.bnclassifiers = self.bnclassifiers.eval() 244 | self.bnclassifiers2 = self.bnclassifiers2.eval() 245 | self.gcn = self.gcn.eval() 246 | self.gmnet = self.gmnet.eval() 247 | self.verificator = self.verificator.eval() 248 | 249 | def forward(self, images, pids, training): 250 | 251 | # feature 252 | feature_maps = self.encoder(images) 253 | with torch.no_grad(): 254 | score_maps, keypoints_confidence, _ = self.scoremap_computer(images) 255 | feature_vector_list, keypoints_confidence = compute_local_features( 256 | self.config, feature_maps, score_maps, keypoints_confidence) 257 | bned_feature_vector_list, cls_score_list = self.bnclassifiers(feature_vector_list) 258 | 259 | # gcn 260 | gcned_feature_vector_list = self.gcn(feature_vector_list) 261 | bned_gcned_feature_vector_list, gcned_cls_score_list = self.bnclassifiers2(gcned_feature_vector_list) 262 | 263 | if training: 264 | 265 | # mining hard samples 266 | new_bned_gcned_feature_vector_list, bned_gcned_feature_vector_list_p, bned_gcned_feature_vector_list_n = mining_hard_pairs( 267 | bned_gcned_feature_vector_list, pids) 268 | 269 | # graph matching 270 | s_p, emb_p, emb_pp = self.gmnet(new_bned_gcned_feature_vector_list, bned_gcned_feature_vector_list_p, None) 271 | s_n, emb_n, emb_nn = self.gmnet(new_bned_gcned_feature_vector_list, bned_gcned_feature_vector_list_n, None) 272 | 273 | # verificate 274 | # ver_prob_p = self.verificator(bned_gcned_feature_vector_list, bned_gcned_feature_vector_list_p) 275 | # ver_prob_n = self.verificator(bned_gcned_feature_vector_list, bned_gcned_feature_vector_list_n) 276 | ver_prob_p = self.verificator(emb_p, emb_pp) 277 | ver_prob_n = self.verificator(emb_n, emb_nn) 278 | 279 | return (feature_vector_list, gcned_feature_vector_list), \ 280 | (cls_score_list, gcned_cls_score_list), \ 281 | (ver_prob_p, ver_prob_n), \ 282 | (s_p, emb_p, emb_pp),\ 283 | (s_n, emb_n, emb_nn), \ 284 | keypoints_confidence 285 | else: 286 | bs, keypoints_num = keypoints_confidence.shape 287 | keypoints_confidence = torch.sqrt(keypoints_confidence).unsqueeze(2).repeat([1, 1, 2048]).view( 288 | [bs, 2048 * keypoints_num]) 289 | 290 | # features = keypoints_confidence * torch.cat(feature_vector_list, dim=1) 291 | # bned_features = keypoints_confidence * torch.cat(bned_feature_vector_list, dim=1) 292 | # gcned_features = keypoints_confidence * torch.cat(gcned_feature_vector_list, dim=1) 293 | # bned_gcned_features = keypoints_confidence * torch.cat(bned_gcned_feature_vector_list, dim=1) 294 | # return (features, bned_features), (gcned_features, bned_gcned_features) 295 | 296 | # features = torch.cat([i.unsqueeze(1) for i in feature_vector_list], dim=1) 297 | # bned_features = keypoints_confidence * torch.cat(bned_feature_vector_list, dim=1) 298 | # gcned_features = torch.cat([i.unsqueeze(1) for i in gcned_feature_vector_list], dim=1) 299 | # bned_gcned_features = keypoints_confidence * torch.cat(bned_gcned_feature_vector_list, dim=1) 300 | # return (features, bned_features), (gcned_features, bned_gcned_features) 301 | 302 | features_stage1 = keypoints_confidence * torch.cat(bned_feature_vector_list, dim=1) 303 | features_satge2 = torch.cat([i.unsqueeze(1) for i in bned_feature_vector_list], dim=1) 304 | gcned_features_stage1 = keypoints_confidence * torch.cat(bned_gcned_feature_vector_list, dim=1) 305 | gcned_features_stage2 = torch.cat([i.unsqueeze(1) for i in bned_gcned_feature_vector_list], dim=1) 306 | 307 | return (features_stage1, features_satge2), (gcned_features_stage1, gcned_features_stage2) 308 | 309 | 310 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 311 | 312 | def __init__(self, optimizer, milestones, gamma=0.1, warmup_factor=1.0 / 3, warmup_iters=500, 313 | warmup_method="linear", last_epoch=-1): 314 | if not list(milestones) == sorted(milestones): 315 | raise ValueError( 316 | "Milestones should be a list of" " increasing integers. Got {}", 317 | milestones, 318 | ) 319 | 320 | if warmup_method not in ("constant", "linear"): 321 | raise ValueError( 322 | "Only 'constant' or 'linear' warmup_method accepted" 323 | "got {}".format(warmup_method) 324 | ) 325 | self.milestones = milestones 326 | self.gamma = gamma 327 | self.warmup_factor = warmup_factor 328 | self.warmup_iters = warmup_iters 329 | self.warmup_method = warmup_method 330 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 331 | 332 | def get_lr(self): 333 | warmup_factor = 1 334 | if self.last_epoch < self.warmup_iters: 335 | if self.warmup_method == "constant": 336 | warmup_factor = self.warmup_factor 337 | elif self.warmup_method == "linear": 338 | alpha = float(self.last_epoch) / float(self.warmup_iters) 339 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 340 | return [ 341 | base_lr 342 | * warmup_factor 343 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 344 | for base_lr in self.base_lrs 345 | ] 346 | -------------------------------------------------------------------------------- /core/data_loader/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('../') 4 | 5 | from .dataset import * 6 | from .loader import * 7 | 8 | import torchvision.transforms as transforms 9 | from tools import * 10 | 11 | 12 | class Loaders: 13 | 14 | def __init__(self, config): 15 | 16 | self.transform_train = transforms.Compose([ 17 | transforms.Resize(config.image_size, interpolation=3), 18 | transforms.RandomHorizontalFlip(p=0.5), 19 | transforms.Pad(10), 20 | transforms.RandomCrop(config.image_size), 21 | transforms.ToTensor(), 22 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 23 | RandomErasing(probability=0.5, mean=[0.485, 0.456, 0.406]) 24 | ]) 25 | self.transform_test = transforms.Compose([ 26 | transforms.Resize(config.image_size, interpolation=3), 27 | transforms.ToTensor(), 28 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 29 | ]) 30 | 31 | self.datasets = ['duke'] 32 | 33 | # dataset 34 | self.duke_path = config.duke_path 35 | self.train_dataset = config.train_dataset 36 | assert self.train_dataset in self.datasets 37 | 38 | # batch size 39 | self.p = config.p 40 | self.k = config.k 41 | 42 | # dataset paths 43 | self.samples_path = { 44 | 'duke': os.path.join(self.duke_path, 'bounding_box_train/'), 45 | 'duke_query': os.path.join(self.duke_path, 'query/'), 46 | 'duke_gallery': os.path.join(self.duke_path, 'bounding_box_test/')} 47 | 48 | # load 49 | self._load() 50 | 51 | def _load(self): 52 | 53 | # train dataset and iter 54 | train_samples = self._get_train_samples(self.train_dataset) 55 | self.train_iter = self._get_uniform_iter(train_samples, self.transform_train, self.p, self.k) 56 | 57 | # duke test dataset and loader 58 | self.duke_query_samples, self.duke_gallery_samples = self._get_test_samples('duke') 59 | self.duke_query_loader = self._get_loader(self.duke_query_samples, self.transform_test, 128) 60 | self.duke_gallery_loader = self._get_loader(self.duke_gallery_samples, self.transform_test, 128) 61 | 62 | def _get_train_samples(self, train_dataset): 63 | train_samples_path = self.samples_path[train_dataset] 64 | samples = Samples4Duke(train_samples_path) 65 | return samples 66 | 67 | def _get_test_samples(self, test_dataset): 68 | query_data_path = self.samples_path[test_dataset + '_query'] 69 | gallery_data_path = self.samples_path[test_dataset + '_gallery'] 70 | query_samples = Samples4Duke(query_data_path, reorder=False) 71 | gallery_samples = Samples4Duke(gallery_data_path, reorder=False) 72 | return query_samples, gallery_samples 73 | 74 | def _get_uniform_iter(self, samples, transform, p, k): 75 | ''' 76 | load person reid data_loader from images_folder 77 | and uniformly sample according to class 78 | :param images_folder_path: 79 | :param transform: 80 | :param p: 81 | :param k: 82 | :return: 83 | ''' 84 | dataset = PersonReIDDataSet(samples.samples, transform=transform) 85 | loader = data.DataLoader(dataset, batch_size=p * k, num_workers=8, drop_last=False, 86 | sampler=ClassUniformlySampler(dataset, class_position=1, k=k)) 87 | iters = IterLoader(loader) 88 | 89 | return iters 90 | 91 | def _get_random_iter(self, samples, transform, batch_size): 92 | dataset = PersonReIDDataSet(samples.samples, transform=transform) 93 | loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=8, drop_last=False, shuffle=True) 94 | iters = IterLoader(loader) 95 | return iters 96 | 97 | def _get_random_loader(self, samples, transform, batch_size): 98 | dataset = PersonReIDDataSet(samples.samples, transform=transform) 99 | loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=8, drop_last=False, shuffle=True) 100 | return loader 101 | 102 | def _get_loader(self, samples, transform, batch_size): 103 | dataset = PersonReIDDataSet(samples.samples, transform=transform) 104 | loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=8, drop_last=False, shuffle=False) 105 | return loader 106 | -------------------------------------------------------------------------------- /core/data_loader/dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import copy 4 | import os 5 | 6 | 7 | def os_walk(folder_dir): 8 | for root, dirs, files in os.walk(folder_dir): 9 | files = sorted(files, reverse=True) 10 | dirs = sorted(dirs, reverse=True) 11 | return root, dirs, files 12 | 13 | 14 | class PersonReIDSamples: 15 | 16 | def __init__(self, samples_path, reorder=True): 17 | 18 | # parameters 19 | self.samples_path = samples_path 20 | self.reorder = reorder 21 | 22 | # load samples 23 | samples = self._load_images_path(self.samples_path) 24 | 25 | # reorder person identities and camera identities 26 | if self.reorder: 27 | samples = self._reorder_labels(samples, 1) 28 | samples = self._reorder_labels(samples, 2) 29 | self.samples = samples 30 | 31 | def _reorder_labels(self, samples, label_index): 32 | 33 | ids = [] 34 | for sample in samples: 35 | ids.append(sample[label_index]) 36 | 37 | # delete repetitive elments and order 38 | ids = list(set(ids)) 39 | ids.sort() 40 | # reorder 41 | for sample in samples: 42 | sample[label_index] = ids.index(sample[label_index]) 43 | 44 | return samples 45 | 46 | def _load_images_path(self, folder_dir): 47 | ''' 48 | :param folder_dir: 49 | :return: [(path, identiti_id, camera_id)] 50 | ''' 51 | samples = [] 52 | root_path, _, files_name = os_walk(folder_dir) 53 | for file_name in files_name: 54 | if '.jpg' in file_name: 55 | identi_id, camera_id = self._analysis_file_name(file_name) 56 | samples.append([root_path + file_name, identi_id, camera_id]) 57 | return samples 58 | 59 | def _analysis_file_name(self, file_name): 60 | ''' 61 | 62 | :param file_name: format like 0844_c3s2_107328_01.jpg 63 | :return: 64 | ''' 65 | split_list = file_name.replace('.jpg', '').replace('c', '').replace('s', '_').split('_') 66 | identi_id, camera_id = int(split_list[0]), int(split_list[1]) 67 | return identi_id, camera_id 68 | 69 | 70 | class Samples4Market(PersonReIDSamples): 71 | ''' 72 | Market Dataset 73 | ''' 74 | 75 | pass 76 | 77 | 78 | class Samples4Duke(PersonReIDSamples): 79 | ''' 80 | Duke dataset 81 | ''' 82 | 83 | def _analysis_file_name(self, file_name): 84 | ''' 85 | 86 | :param file_name: format like 0002_c1_f0044158.jpg 87 | :return: 88 | ''' 89 | split_list = file_name.replace('.jpg', '').replace('c', '').split('_') 90 | identi_id, camera_id = int(split_list[0]), int(split_list[1]) 91 | return identi_id, camera_id 92 | 93 | 94 | class PersonReIDDataSet: 95 | 96 | def __init__(self, samples, transform): 97 | self.samples = samples 98 | self.transform = transform 99 | 100 | def __getitem__(self, index): 101 | this_sample = copy.deepcopy(self.samples[index]) 102 | 103 | this_sample[0] = self._loader(this_sample[0]) 104 | if self.transform is not None: 105 | this_sample[0] = self.transform(this_sample[0]) 106 | this_sample[1] = np.array(this_sample[1]) 107 | 108 | return this_sample 109 | 110 | def __len__(self): 111 | return len(self.samples) 112 | 113 | def _loader(self, img_path): 114 | return Image.open(img_path).convert('RGB') 115 | 116 | -------------------------------------------------------------------------------- /core/data_loader/loader.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('..') 4 | 5 | import torch.utils.data as data 6 | import random 7 | 8 | 9 | class ClassUniformlySampler(data.sampler.Sampler): 10 | ''' 11 | random sample according to class label 12 | Arguments: 13 | data_source (Dataset): data_loader to sample from 14 | class_position (int): which one is used as class 15 | k (int): sample k images of each class 16 | ''' 17 | 18 | def __init__(self, data_source, class_position, k): 19 | 20 | self.data_source = data_source 21 | self.class_position = class_position 22 | self.k = k 23 | 24 | self.samples = self.data_source.samples 25 | self.class_dict = self._tuple2dict(self.samples) 26 | 27 | def __iter__(self): 28 | self.sample_list = self._generate_list(self.class_dict) 29 | return iter(self.sample_list) 30 | 31 | def __len__(self): 32 | return len(self.sample_list) 33 | 34 | def _tuple2dict(self, inputs): 35 | ''' 36 | 37 | :param inputs: list with tuple elemnts, [(image_path1, class_index_1), (imagespath_2, class_index_2), ...] 38 | :return: dict, {class_index_i: [samples_index1, samples_index2, ...]} 39 | ''' 40 | dict = {} 41 | for index, each_input in enumerate(inputs): 42 | class_index = each_input[self.class_position] 43 | if class_index not in list(dict.keys()): 44 | dict[class_index] = [index] 45 | else: 46 | dict[class_index].append(index) 47 | return dict 48 | 49 | def _generate_list(self, dict): 50 | ''' 51 | :param dict: dict, whose values are list 52 | :return: 53 | ''' 54 | 55 | sample_list = [] 56 | 57 | dict_copy = dict.copy() 58 | keys = list(dict_copy.keys()) 59 | random.shuffle(keys) 60 | for key in keys: 61 | value = dict_copy[key] 62 | if len(value) >= self.k: 63 | random.shuffle(value) 64 | sample_list.extend(value[0: self.k]) 65 | else: 66 | value = value * self.k 67 | random.shuffle(value) 68 | sample_list.extend(value[0: self.k]) 69 | 70 | return sample_list 71 | 72 | 73 | class IterLoader: 74 | 75 | def __init__(self, loader): 76 | self.loader = loader 77 | self.iter = iter(self.loader) 78 | 79 | def next_one(self): 80 | try: 81 | return next(self.iter) 82 | except: 83 | self.iter = iter(self.loader) 84 | return next(self.iter) 85 | -------------------------------------------------------------------------------- /core/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .model_reid import * 2 | from .model_keypoints import * 3 | from .model_gcn import * 4 | from .model_graph_matching import * 5 | -------------------------------------------------------------------------------- /core/models/model_gcn.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import itertools 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | def generate_adj(node_num, linked_edges, self_connect=1): 10 | ''' 11 | Params: 12 | node_num: node number 13 | linked_edges: [[from_where, to_where], ...] 14 | self_connect: float, 15 | ''' 16 | 17 | if self_connect > 0: 18 | adj = np.eye(node_num) * self_connect 19 | else: 20 | adj = np.zeros([node_num] * 2) 21 | 22 | for i, j in linked_edges: 23 | adj[i, j] = 1.0 24 | adj[j, i] = 1.0 25 | 26 | # we suppose the last one is global feature 27 | adj[-1, :-1] = 0 28 | adj[-1, -1] = 1 29 | print(adj) 30 | 31 | adj = torch.from_numpy(adj.astype(np.float32)) 32 | return adj 33 | 34 | 35 | class AdaptDirGraphGonvLayer(nn.Module): 36 | 37 | def __init__(self, in_dim, out_dim, adj, scale): 38 | super(AdaptDirGraphGonvLayer, self).__init__() 39 | 40 | # parameters 41 | self.in_dim = in_dim 42 | self.out_dim = out_dim 43 | self.adj = adj 44 | self.scale = scale 45 | self.weight = nn.Parameter(torch.Tensor(in_dim, out_dim)) 46 | 47 | self.reset_parameters() 48 | self.out = 0 49 | 50 | # layers for adj 51 | self.fc_direct = nn.Linear(in_dim, 1, bias=False) 52 | self.bn_direct = nn.BatchNorm1d(in_dim) 53 | self.sigmoid = nn.Sigmoid() 54 | 55 | # layers for feature 56 | self.fc_original_feature = nn.Linear(in_dim, out_dim, bias=False) 57 | self.fc_merged_feature = nn.Linear(in_dim, out_dim, bias=False) 58 | self.relu = nn.ReLU() 59 | 60 | def reset_parameters(self): 61 | stdv = 1. / math.sqrt(self.weight.size(1)) 62 | self.weight.data.uniform_(-stdv, stdv) 63 | 64 | def forward(self, inputs): 65 | 66 | # learn adj 67 | adj2 = self.learn_adj(inputs, self.adj) 68 | 69 | # merge feature 70 | merged_inputs = torch.matmul(adj2, inputs) 71 | outputs1 = self.fc_merged_feature(merged_inputs) 72 | 73 | # embed original feature 74 | outputs2 = self.fc_original_feature(inputs) 75 | 76 | outputs = self.relu(outputs1) + outputs2 77 | return outputs 78 | 79 | def learn_adj(self, inputs, adj): 80 | 81 | # inputs [bs, k(node_num), c] 82 | bs, k, c = inputs.shape 83 | 84 | # 85 | global_features = inputs[:, k - 1, :].unsqueeze(1).repeat([1, k, 1]) # [bs,k,2048] 86 | distances = torch.abs(inputs - global_features) # [bs, k, 2048] 87 | 88 | # bottom triangle 89 | distances_gap = [] 90 | position_list = [] 91 | for i, j in itertools.product(list(range(k)), list(range(k))): 92 | if i < j and (i != k - 1 and j != k - 1) and adj[i, j] > 0: 93 | distances_gap.append(distances[:, i, :].unsqueeze(1) - distances[:, j, :].unsqueeze(1)) 94 | position_list.append([i, j]) 95 | distances_gap = 15 * torch.cat(distances_gap, dim=1) # [bs, edge_number, 2048] 96 | adj_tmp = self.sigmoid(self.scale * self.fc_direct( 97 | self.bn_direct(distances_gap.transpose(1, 2)).transpose(1, 2))).squeeze() # [bs, edge_number] 98 | 99 | # re-assign 100 | adj2 = torch.ones([bs, k, k]).cuda() 101 | for indx, (i, j) in enumerate(position_list): 102 | adj2[:, i, j] = adj_tmp[:, indx] * 2 103 | adj2[:, j, i] = (1 - adj_tmp[:, indx]) * 2 104 | 105 | mask = adj.unsqueeze(0).repeat([bs, 1, 1]) 106 | new_adj = adj2 * mask 107 | new_adj = F.normalize(new_adj, p=1, dim=2) 108 | 109 | return new_adj 110 | 111 | def __repr__(self): 112 | return self.__class__.__name__ + ' (' \ 113 | + str(self.in_dim) + ' -> ' \ 114 | + str(self.out_dim) + ')' 115 | 116 | 117 | class GraphConvNet(nn.Module): 118 | 119 | def __init__(self, adj, in_dim, hidden_dim, out_dim, scale): 120 | super(GraphConvNet, self).__init__() 121 | 122 | self.adgcn1 = AdaptDirGraphGonvLayer(in_dim, hidden_dim, adj, scale) 123 | self.adgcn2 = AdaptDirGraphGonvLayer(hidden_dim, out_dim, adj, scale) 124 | self.relu = nn.ReLU(inplace=True) 125 | 126 | def __call__(self, feature_list): 127 | cated_features = [feature.unsqueeze(1) for feature in feature_list] 128 | cated_features = torch.cat(cated_features, dim=1) 129 | 130 | middle_features = self.adgcn1(cated_features) 131 | out_features = self.adgcn2(middle_features) 132 | 133 | out_feats_list = [] 134 | for i in range(out_features.shape[1]): 135 | out_feat_i = out_features[:, i].squeeze(1) 136 | out_feats_list.append(out_feat_i) 137 | 138 | return out_feats_list 139 | -------------------------------------------------------------------------------- /core/models/model_graph_matching/__init__.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from .sinkhorn import Sinkhorn 8 | from .voting_layer import Voting 9 | from .gconv import Siamese_Gconv 10 | from .affinity_layer import Affinity 11 | from .permutation_loss import PermutationLoss 12 | 13 | from tools import cosine_dist, label2similarity 14 | 15 | 16 | class GMNet(nn.Module): 17 | def __init__(self): 18 | super(GMNet, self).__init__() 19 | 20 | self.BS_ITER_NUM = 20 21 | self.BS_EPSILON = 1e-10 22 | self.FEATURE_CHANNEL = 2048 23 | self.GNN_FEAT = 1024 24 | self.GNN_LAYER = 2 25 | self.VOT_ALPHA = 200.0 26 | 27 | self.bi_stochastic = Sinkhorn(max_iter=self.BS_ITER_NUM, epsilon=self.BS_EPSILON) 28 | self.voting_layer = Voting(self.VOT_ALPHA) 29 | 30 | for i in range(self.GNN_LAYER): 31 | gnn_layer = Siamese_Gconv(self.FEATURE_CHANNEL, self.GNN_FEAT) if i == 0 else Siamese_Gconv(self.GNN_FEAT, self.GNN_FEAT) 32 | self.add_module('gnn_layer_{}'.format(i), gnn_layer) 33 | self.add_module('affinity_{}'.format(i), Affinity(self.GNN_FEAT)) 34 | if i == self.GNN_LAYER - 2: # only second last layer will have cross-graph module 35 | self.add_module('cross_graph_{}'.format(i), nn.Linear(self.GNN_FEAT * 2, self.GNN_FEAT)) 36 | 37 | 38 | def forward(self, emb1_list, emb2_list, adj): 39 | 40 | if type(emb1_list).__name__ == type(emb2_list).__name__ == 'list': 41 | emb1 = torch.cat([emb1.unsqueeze(1) for emb1 in emb1_list], dim=1) 42 | emb2 = torch.cat([emb2.unsqueeze(1) for emb2 in emb2_list], dim=1) 43 | else: 44 | emb1 = emb1_list 45 | emb2 = emb2_list 46 | 47 | org_emb1 = emb1 48 | org_emb2 = emb2 49 | 50 | ns_src = (torch.ones([emb1.shape[0]]) * 14).int() 51 | ns_tgt = (torch.ones([emb2.shape[0]]) * 14).int() 52 | 53 | for i in range(self.GNN_LAYER): 54 | gnn_layer = getattr(self, 'gnn_layer_{}'.format(i)) 55 | emb1, emb2 = gnn_layer([adj, emb1], [adj, emb2]) 56 | affinity = getattr(self, 'affinity_{}'.format(i)) 57 | s = affinity(emb1, emb2) 58 | s = self.voting_layer(s, ns_src, ns_tgt) 59 | s = self.bi_stochastic(s, ns_src, ns_tgt) 60 | 61 | if i == self.GNN_LAYER - 2: 62 | emb1_before_cross, emb2_before_cross = emb1, emb2 63 | cross_graph = getattr(self, 'cross_graph_{}'.format(i)) 64 | emb1 = cross_graph(torch.cat((emb1_before_cross, torch.bmm(s, emb2_before_cross)), dim=-1)) 65 | emb2 = cross_graph(torch.cat((emb2_before_cross, torch.bmm(s.transpose(1, 2), emb1_before_cross)), dim=-1)) 66 | 67 | fin_emb1 = org_emb1 + torch.bmm(s, org_emb2) 68 | fin_emb2 = org_emb2 + torch.bmm(s.transpose(1,2), org_emb1) 69 | 70 | return s, fin_emb1, fin_emb2 71 | 72 | 73 | 74 | class Verificator(nn.Module): 75 | 76 | def __init__(self, config): 77 | super(Verificator, self).__init__() 78 | 79 | self.config = config 80 | 81 | self.bn = nn.BatchNorm1d(2048*14) 82 | self.layer1 = nn.Linear(2048*14, 1, bias=True) 83 | self.sigmoid = nn.Sigmoid() 84 | 85 | 86 | def __call__(self, feature_vectors_list1, feature_vectors_list2): 87 | ''' 88 | :param feature_vectors_list1: list with length node_num, element size is [bs, feature_length] 89 | :param feature_vectors_list2: list with length node_num, element size is [bs, feature_length] 90 | :return: 91 | ''' 92 | 93 | 94 | if type(feature_vectors_list1).__name__ == 'list': 95 | feature_vectors_1 = torch.cat( 96 | [feature_vector1.unsqueeze(1) for feature_vector1 in feature_vectors_list1], dim=1) 97 | feature_vectors_2 = torch.cat( 98 | [feature_vector2.unsqueeze(1) for feature_vector2 in feature_vectors_list2], dim=1) 99 | elif type(feature_vectors_list1).__name__ == 'Tensor': # [bs, branch_num, channel_num] 100 | feature_vectors_1 = feature_vectors_list1 101 | feature_vectors_2 = feature_vectors_list2 102 | 103 | # feature_vectors_1 = feature_vectors_1.detach() 104 | # feature_vectors_2 = feature_vectors_2.detach() 105 | 106 | feature_vectors_1 = F.normalize(feature_vectors_1, p=2, dim=2) 107 | feature_vectors_2 = F.normalize(feature_vectors_2, p=2, dim=2) 108 | 109 | features = self.config.ver_in_scale * feature_vectors_1 * feature_vectors_2 110 | features = features.view([features.shape[0], features.shape[1] * features.shape[2]]) 111 | 112 | logit = self.layer1(features) 113 | prob = self.sigmoid(logit) 114 | 115 | return prob 116 | 117 | 118 | def mining_hard_pairs(feature_vector_list, pids): 119 | ''' 120 | use global feature (the last one) to mining hard positive and negative pairs 121 | cosine distance is used to measure similarity 122 | :param feature_vector_list: 123 | :param pids: 124 | :return: 125 | ''' 126 | 127 | global_feature_vectors = feature_vector_list[-1] 128 | dist_matrix = cosine_dist(global_feature_vectors, global_feature_vectors) 129 | label_matrix = label2similarity(pids, pids).float() 130 | 131 | _, sorted_mat_distance_index = torch.sort(dist_matrix + (9999999.) * (1 - label_matrix), dim=1, descending=False) 132 | hard_p_index = sorted_mat_distance_index[:, 0] 133 | _, sorted_mat_distance_index = torch.sort(dist_matrix + (-9999999.) * (label_matrix), dim=1, descending=True) 134 | hard_n_index = sorted_mat_distance_index[:, 0] 135 | 136 | new_feature_vector_list = [] 137 | p_feature_vector_list = [] 138 | n_feature_vector_list = [] 139 | for feature_vector in feature_vector_list: 140 | feature_vector = copy.copy(feature_vector) 141 | new_feature_vector_list.append(feature_vector.detach()) 142 | feature_vector = copy.copy(feature_vector.detach()) 143 | p_feature_vector_list.append(feature_vector[hard_p_index, :]) 144 | feature_vector = copy.copy(feature_vector.detach()) 145 | n_feature_vector_list.append(feature_vector[hard_n_index, :]) 146 | 147 | return new_feature_vector_list, p_feature_vector_list, n_feature_vector_list 148 | 149 | 150 | def analyze_ver_prob(prob, positive): 151 | ''' 152 | :param prob: [bs, 1] 153 | :param positive: True or False 154 | :return: 155 | ''' 156 | 157 | if positive: 158 | hit = (prob > 0.5).float() 159 | unhit = (prob < 0.5).float() 160 | else: 161 | hit = (prob < 0.5).float() 162 | unhit = (prob > 0.5).float() 163 | 164 | avg_prob = torch.mean(prob) 165 | acc = torch.mean(hit) 166 | avg_hit_prob = torch.sum(prob * hit) / torch.sum(hit) 167 | avg_unhit_prob = torch.sum(prob * unhit) / torch.sum(unhit) 168 | 169 | return avg_prob, acc, avg_hit_prob, avg_unhit_prob 170 | -------------------------------------------------------------------------------- /core/models/model_graph_matching/affinity_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.parameter import Parameter 4 | from torch import Tensor 5 | import math 6 | 7 | 8 | class Affinity(nn.Module): 9 | """ 10 | Affinity Layer to compute the affinity matrix from feature space. 11 | M = X * A * Y^T 12 | Parameter: scale of weight d 13 | Input: feature X, Y 14 | Output: affinity matrix M 15 | """ 16 | 17 | def __init__(self, d): 18 | super(Affinity, self).__init__() 19 | self.d = d 20 | self.A = Parameter(Tensor(self.d, self.d)) 21 | self.reset_parameters() 22 | 23 | def reset_parameters(self): 24 | stdv = 1. / math.sqrt(self.d) 25 | self.A.data.uniform_(-stdv, stdv) 26 | self.A.data += torch.eye(self.d) 27 | 28 | def forward(self, X, Y): 29 | assert X.shape[2] == Y.shape[2] == self.d 30 | M = torch.matmul(X, (self.A + self.A.transpose(0, 1)) / 2) 31 | M = torch.matmul(M, Y.transpose(1, 2)) 32 | return M 33 | -------------------------------------------------------------------------------- /core/models/model_graph_matching/gconv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import torch.nn.functional as F 5 | 6 | 7 | class Gconv(nn.Module): 8 | """ 9 | (Intra) graph convolution operation, with single convolutional layer 10 | """ 11 | 12 | def __init__(self, in_features, out_features): 13 | super(Gconv, self).__init__() 14 | self.num_inputs = in_features 15 | self.num_outputs = out_features 16 | self.a_fc = nn.Linear(self.num_inputs, self.num_outputs) 17 | self.u_fc = nn.Linear(self.num_inputs, self.num_outputs) 18 | self.relu = nn.LeakyReLU(0.2) 19 | 20 | def forward(self, A, x, norm=True): 21 | # if norm is True: 22 | # A = F.normalize(A, p=1, dim=-2) 23 | 24 | ax = self.a_fc(x) 25 | ux = self.u_fc(x) 26 | # x = torch.bmm(A, F.relu(ax)) + F.relu(ux) # has size (bs, N, num_outputs) 27 | 28 | x = self.relu(ax) + self.relu(ux) 29 | 30 | return x 31 | 32 | 33 | class Siamese_Gconv(nn.Module): 34 | """ 35 | Perform graph convolution on two input graphs (g1, g2) 36 | """ 37 | 38 | def __init__(self, in_features, num_features): 39 | super(Siamese_Gconv, self).__init__() 40 | self.gconv = Gconv(in_features, num_features) 41 | 42 | def forward(self, g1, g2): 43 | emb1 = self.gconv(*g1) 44 | emb2 = self.gconv(*g2) 45 | # embx are tensors of size (bs, N, num_features) 46 | return emb1, emb2 47 | -------------------------------------------------------------------------------- /core/models/model_graph_matching/permutation_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class PermutationLoss(nn.Module): 7 | """ 8 | Cross entropy loss between two permutations. 9 | """ 10 | def __init__(self): 11 | super(PermutationLoss, self).__init__() 12 | 13 | def forward(self, pred_perm, gt_perm): 14 | 15 | pred_ns = (torch.ones([pred_perm.shape[0]]) * 14).int() 16 | gt_ns = (torch.ones([gt_perm.shape[0]]) * 14).int() 17 | 18 | batch_num = pred_perm.shape[0] 19 | pred_perm = pred_perm.to(dtype=torch.float32) 20 | 21 | assert torch.all((pred_perm >= 0) * (pred_perm <= 1)) 22 | assert torch.all((gt_perm >= 0) * (gt_perm <= 1)) 23 | 24 | loss = torch.tensor(0.).to(pred_perm.device) 25 | n_sum = torch.zeros_like(loss) 26 | for b in range(batch_num): 27 | loss += F.binary_cross_entropy( 28 | pred_perm[b, :pred_ns[b], :gt_ns[b]], 29 | gt_perm[b, :pred_ns[b], :gt_ns[b]], 30 | reduction='sum') 31 | n_sum += pred_ns[b].to(n_sum.dtype).to(pred_perm.device) 32 | 33 | return loss / n_sum 34 | -------------------------------------------------------------------------------- /core/models/model_graph_matching/sinkhorn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Sinkhorn(nn.Module): 6 | """ 7 | BiStochastic Layer turns the input matrix into a bi-stochastic matrix. 8 | Parameter: maximum iterations max_iter 9 | a small number for numerical stability epsilon 10 | Input: input matrix s 11 | Output: bi-stochastic matrix s 12 | """ 13 | 14 | def __init__(self, max_iter=10, epsilon=1e-4): 15 | super(Sinkhorn, self).__init__() 16 | self.max_iter = max_iter 17 | self.epsilon = epsilon 18 | 19 | def forward(self, s, nrows=None, ncols=None, exp=False, exp_alpha=20, dummy_row=False, dtype=torch.float32): 20 | batch_size = s.shape[0] 21 | 22 | if dummy_row: 23 | dummy_shape = list(s.shape) 24 | dummy_shape[1] = s.shape[2] - s.shape[1] 25 | s = torch.cat((s, torch.full(dummy_shape, 0.).to(s.device)), dim=1) 26 | new_nrows = ncols 27 | for b in range(batch_size): 28 | s[b, nrows[b]:new_nrows[b], :ncols[b]] = self.epsilon 29 | nrows = new_nrows 30 | 31 | row_norm_ones = torch.zeros(batch_size, s.shape[1], s.shape[1], device=s.device) # size: row x row 32 | col_norm_ones = torch.zeros(batch_size, s.shape[2], s.shape[2], device=s.device) # size: col x col 33 | for b in range(batch_size): 34 | row_slice = slice(0, nrows[b] if nrows is not None else s.shape[2]) 35 | col_slice = slice(0, ncols[b] if ncols is not None else s.shape[1]) 36 | row_norm_ones[b, row_slice, row_slice] = 1 37 | col_norm_ones[b, col_slice, col_slice] = 1 38 | 39 | # for Sinkhorn stacked on last dimension 40 | if len(s.shape) == 4: 41 | row_norm_ones = row_norm_ones.unsqueeze(-1) 42 | col_norm_ones = col_norm_ones.unsqueeze(-1) 43 | 44 | s += self.epsilon 45 | 46 | for i in range(self.max_iter): 47 | if exp: 48 | s = torch.exp(exp_alpha * s) 49 | if i % 2 == 1: 50 | # column norm 51 | sum = torch.sum(torch.mul(s.unsqueeze(3), col_norm_ones.unsqueeze(1)), dim=2) 52 | else: 53 | # row norm 54 | sum = torch.sum(torch.mul(row_norm_ones.unsqueeze(3), s.unsqueeze(1)), dim=2) 55 | 56 | tmp = torch.zeros_like(s) 57 | for b in range(batch_size): 58 | row_slice = slice(0, nrows[b] if nrows is not None else s.shape[2]) 59 | col_slice = slice(0, ncols[b] if ncols is not None else s.shape[1]) 60 | tmp[b, row_slice, col_slice] = 1 / sum[b, row_slice, col_slice] 61 | s = s * tmp 62 | 63 | if dummy_row and dummy_shape[1] > 0: 64 | s = s[:, :-dummy_shape[1]] 65 | 66 | return s 67 | -------------------------------------------------------------------------------- /core/models/model_graph_matching/voting_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Voting(nn.Module): 6 | """ 7 | Voting Layer computes a new row-stotatic matrix with softmax. A large number (alpha) is multiplied to the input 8 | stochastic matrix to scale up the difference. 9 | Parameter: value multiplied before softmax alpha 10 | threshold that will ignore such points while calculating displacement in pixels pixel_thresh 11 | Input: permutation or doubly stochastic matrix s 12 | ///point set on source image P_src 13 | ///point set on target image P_tgt 14 | ground truth number of effective points in source image ns_gt 15 | Output: softmax matrix s 16 | """ 17 | 18 | def __init__(self, alpha=200, pixel_thresh=None): 19 | super(Voting, self).__init__() 20 | self.alpha = alpha 21 | self.softmax = nn.Softmax(dim=-1) # Voting among columns 22 | self.pixel_thresh = pixel_thresh 23 | 24 | def forward(self, s, nrow_gt, ncol_gt=None): 25 | ret_s = torch.zeros_like(s) 26 | # filter dummy nodes 27 | for b, n in enumerate(nrow_gt): 28 | if ncol_gt is None: 29 | ret_s[b, 0:n, :] = \ 30 | self.softmax(self.alpha * s[b, 0:n, :]) 31 | else: 32 | ret_s[b, 0:n, 0:ncol_gt[b]] = \ 33 | self.softmax(self.alpha * s[b, 0:n, 0:ncol_gt[b]]) 34 | return ret_s 35 | 36 | # def forward(self, s): 37 | # ret_s = self.softmax(self.alpha * s) 38 | # return ret_s 39 | -------------------------------------------------------------------------------- /core/models/model_keypoints/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | import math 6 | 7 | from .config import cfg as pose_config 8 | from .pose_hrnet import get_pose_net 9 | from .pose_processor import HeatmapProcessor2 10 | 11 | 12 | class ScoremapComputer(nn.Module): 13 | 14 | def __init__(self, norm_scale): 15 | super(ScoremapComputer, self).__init__() 16 | 17 | # init skeleton model 18 | self.keypoints_predictor = get_pose_net(pose_config, False) 19 | self.keypoints_predictor.load_state_dict(torch.load(pose_config.TEST.MODEL_FILE)) 20 | # self.heatmap_processor = HeatmapProcessor(normalize_heatmap=True, group_mode='sum', gaussion_smooth=None) 21 | self.heatmap_processor = HeatmapProcessor2(normalize_heatmap=True, group_mode='sum', norm_scale=norm_scale) 22 | 23 | def forward(self, x): 24 | heatmap = self.keypoints_predictor(x) # before normalization 25 | scoremap, keypoints_confidence, keypoints_location = self.heatmap_processor(heatmap) # after normalization 26 | return scoremap.detach(), keypoints_confidence.detach(), keypoints_location.detach() 27 | 28 | 29 | def compute_local_features(config, feature_maps, score_maps, keypoints_confidence): 30 | ''' 31 | the last one is global feature 32 | :param config: 33 | :param feature_maps: 34 | :param score_maps: 35 | :param keypoints_confidence: 36 | :return: 37 | ''' 38 | fbs, fc, fh, fw = feature_maps.shape 39 | sbs, sc, sh, sw = score_maps.shape 40 | assert fbs == sbs and fh == sh and fw == sw 41 | 42 | # get feature_vector_list 43 | feature_vector_list = [] 44 | for i in range(sc + 1): 45 | if i < sc: # skeleton-based local feature vectors 46 | score_map_i = score_maps[:, i, :, :].unsqueeze(1).repeat([1, fc, 1, 1]) 47 | feature_vector_i = torch.sum(score_map_i * feature_maps, [2, 3]) 48 | feature_vector_list.append(feature_vector_i) 49 | else: # global feature vectors 50 | feature_vector_i = ( 51 | F.adaptive_avg_pool2d(feature_maps, 1) + F.adaptive_max_pool2d(feature_maps, 1)).squeeze() 52 | feature_vector_list.append(feature_vector_i) 53 | keypoints_confidence = torch.cat([keypoints_confidence, torch.ones([fbs, 1]).cuda()], dim=1) 54 | 55 | # compute keypoints confidence 56 | keypoints_confidence[:, sc:] = F.normalize( 57 | keypoints_confidence[:, sc:], 1, 1) * config.weight_global_feature # global feature score_confidence 58 | keypoints_confidence[:, :sc] = F.normalize(keypoints_confidence[:, :sc], 1, 59 | 1) * config.weight_global_feature # partial feature score_confidence 60 | 61 | return feature_vector_list, keypoints_confidence 62 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from .default import _C as cfg 8 | from .default import update_config 9 | from .models import MODEL_EXTRAS 10 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/default.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | 13 | from yacs.config import CfgNode as CN 14 | from .models import MODEL_EXTRAS 15 | 16 | _C = CN() 17 | 18 | _C.OUTPUT_DIR = '' 19 | _C.LOG_DIR = '' 20 | _C.DATA_DIR = '' 21 | _C.GPUS = (0,) 22 | _C.WORKERS = 4 23 | _C.PRINT_FREQ = 20 24 | _C.AUTO_RESUME = False 25 | _C.PIN_MEMORY = True 26 | _C.RANK = 0 27 | 28 | # Cudnn related params 29 | _C.CUDNN = CN() 30 | _C.CUDNN.BENCHMARK = True 31 | _C.CUDNN.DETERMINISTIC = False 32 | _C.CUDNN.ENABLED = True 33 | 34 | # common params for NETWORK 35 | _C.MODEL = CN() 36 | _C.MODEL.NAME = 'pose_hrnet' 37 | _C.MODEL.INIT_WEIGHTS = True 38 | _C.MODEL.PRETRAINED = '' 39 | _C.MODEL.NUM_JOINTS = 17 40 | _C.MODEL.TAG_PER_JOINT = True 41 | _C.MODEL.TARGET_TYPE = 'gaussian' 42 | # _C.MODEL.IMAGE_SIZE = [256, 256] # width * height, ex: 192 * 256 43 | _C.MODEL.IMAGE_SIZE = [192, 256] # width * height, ex: 192 * 256 44 | 45 | _C.MODEL.HEATMAP_SIZE = [64, 64] # width * height, ex: 24 * 32 46 | _C.MODEL.SIGMA = 2 47 | # _C.MODEL.EXTRA = CN(new_allowed=True) 48 | # _C.MODEL.EXTRA = MODEL_EXTRAS['pose_resnet'] 49 | _C.MODEL.EXTRA = MODEL_EXTRAS['pose_high_resolution_net'] 50 | 51 | _C.MODEL.JOINTS_GROUPS = [[0, 1, 2, 3, 4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], 52 | [16]] # coco group 2 53 | # _C.MODEL.JOINTS_GROUPS = [[0,1,2,3,4,5,6],[5,7,9],[11,13,15],[6,8,10],[12,14,16],[5,6,11,12]] # coco group 3 54 | # _C.MODEL.JOINTS_GROUPS = [[0,1,2,3,4,5,6],[5,6,7,8,9,10,11,12],[11,12,13,14,15,16]] # coco group 2 55 | # _C.MODEL.JOINTS_GROUPS = [[0,1,2,3,4,5,6],[5,7,9],[11,13,15],[6,8,10],[12,14,16],[5,6,11,12]] # coco group 3 56 | # _C.MODEL.JOINTS_GROUPS = [[0,1,2,3,4,5,6],[5,7,9],[11,13,15],[6,8,10],[12,14,16],[5,6,11,12],[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]] # coco group 4 57 | # _C.MODEL.JOINTS_GROUPS = [[0,1,2,3,4,5,6],[5,7,9],[11,13,15],[6,8,10],[12,14,16],[5,6,11,12],[5,6,7,8,9,10,11,12],[11,12,13,14,15,16],[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]] # coco group 5 58 | # _C.MODEL.JOINTS_GROUPS = [[0,1,2,3,4,5,6],[5,6,7,8,9,10,11,12],[11,12,13,14,15,16],[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]] # coco group 6 59 | # _C.MODEL.JOINTS_GROUPS = [[0,1,2,3,4,5],[6,7,8,9],[10,11,12,13,14,15]] # mpii group 60 | 61 | _C.LOSS = CN() 62 | _C.LOSS.USE_OHKM = False 63 | _C.LOSS.TOPK = 8 64 | _C.LOSS.USE_TARGET_WEIGHT = True 65 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False 66 | 67 | # DATASET related params 68 | _C.DATASET = CN() 69 | _C.DATASET.ROOT = '' 70 | _C.DATASET.DATASET = 'mpii' 71 | _C.DATASET.TRAIN_SET = 'train' 72 | _C.DATASET.TEST_SET = 'valid' 73 | _C.DATASET.DATA_FORMAT = 'jpg' 74 | _C.DATASET.HYBRID_JOINTS_TYPE = '' 75 | _C.DATASET.SELECT_DATA = False 76 | 77 | # training data augmentation 78 | _C.DATASET.FLIP = True 79 | _C.DATASET.SCALE_FACTOR = 0.25 80 | _C.DATASET.ROT_FACTOR = 30 81 | _C.DATASET.PROB_HALF_BODY = 0.0 82 | _C.DATASET.NUM_JOINTS_HALF_BODY = 8 83 | _C.DATASET.COLOR_RGB = False 84 | 85 | # train 86 | _C.TRAIN = CN() 87 | 88 | _C.TRAIN.LR_FACTOR = 0.1 89 | _C.TRAIN.LR_STEP = [90, 110] 90 | _C.TRAIN.LR = 0.001 91 | 92 | _C.TRAIN.OPTIMIZER = 'adam' 93 | _C.TRAIN.MOMENTUM = 0.9 94 | _C.TRAIN.WD = 0.0001 95 | _C.TRAIN.NESTEROV = False 96 | _C.TRAIN.GAMMA1 = 0.99 97 | _C.TRAIN.GAMMA2 = 0.0 98 | 99 | _C.TRAIN.BEGIN_EPOCH = 0 100 | _C.TRAIN.END_EPOCH = 140 101 | 102 | _C.TRAIN.RESUME = False 103 | _C.TRAIN.CHECKPOINT = '' 104 | 105 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32 106 | _C.TRAIN.SHUFFLE = True 107 | 108 | # testing 109 | _C.TEST = CN() 110 | 111 | # size of images for each device 112 | _C.TEST.BATCH_SIZE_PER_GPU = 32 113 | # Test Model Epoch 114 | _C.TEST.FLIP_TEST = False 115 | _C.TEST.POST_PROCESS = False 116 | _C.TEST.SHIFT_HEATMAP = False 117 | 118 | _C.TEST.USE_GT_BBOX = False 119 | 120 | # nms 121 | _C.TEST.IMAGE_THRE = 0.1 122 | _C.TEST.NMS_THRE = 0.6 123 | _C.TEST.SOFT_NMS = False 124 | _C.TEST.OKS_THRE = 0.5 125 | _C.TEST.IN_VIS_THRE = 0.0 126 | _C.TEST.COCO_BBOX_FILE = '' 127 | _C.TEST.BBOX_THRE = 1.0 128 | _C.TEST.MODEL_FILE = '' 129 | # _C.TEST.MODEL_FILE = '/data/model_zoo/skeleton/pose_coco/pose_resnet_50_256x192.pth.tar' 130 | # _C.TEST.MODEL_FILE = '/data/model_zoo/skeleton/pose_coco/pose_hrnet_w48_256x192.pth' 131 | _C.TEST.MODEL_FILE = './core/models/model_keypoints/pose_hrnet_w48_256x192.pth' 132 | 133 | # debug 134 | _C.DEBUG = CN() 135 | _C.DEBUG.DEBUG = False 136 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False 137 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False 138 | _C.DEBUG.SAVE_HEATMAPS_GT = False 139 | _C.DEBUG.SAVE_HEATMAPS_PRED = False 140 | 141 | 142 | def update_config(cfg, args): 143 | cfg.defrost() 144 | cfg.merge_from_file(args.cfg) 145 | cfg.merge_from_list(args.opts) 146 | 147 | if args.modelDir: 148 | cfg.OUTPUT_DIR = args.modelDir 149 | 150 | if args.logDir: 151 | cfg.LOG_DIR = args.logDir 152 | 153 | if args.dataDir: 154 | cfg.DATA_DIR = args.dataDir 155 | 156 | cfg.DATASET.ROOT = os.path.join( 157 | cfg.DATA_DIR, cfg.DATASET.ROOT 158 | ) 159 | 160 | cfg.MODEL.PRETRAINED = os.path.join( 161 | cfg.DATA_DIR, cfg.MODEL.PRETRAINED 162 | ) 163 | 164 | if cfg.TEST.MODEL_FILE: 165 | cfg.TEST.MODEL_FILE = os.path.join( 166 | cfg.DATA_DIR, cfg.TEST.MODEL_FILE 167 | ) 168 | 169 | cfg.freeze() 170 | 171 | 172 | if __name__ == '__main__': 173 | import sys 174 | 175 | with open(sys.argv[1], 'w') as f: 176 | print(_C, file=f) 177 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/coco/hrnet/w32_384x288_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/coco/hrnet/w48_256x192_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 288 33 | - 384 34 | HEATMAP_SIZE: 35 | - 72 36 | - 96 37 | SIGMA: 3 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 24 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 24 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true 128 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/coco/resnet/res101_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 101 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/coco/resnet/res101_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 101 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/coco/resnet/res152_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 152 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/coco/resnet/res152_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 152 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/coco/resnet/res50_256x128_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 128 27 | - 256 28 | HEATMAP_SIZE: 29 | - 32 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 192 27 | - 256 28 | HEATMAP_SIZE: 29 | - 48 30 | - 64 31 | SIGMA: 2 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/coco/resnet/res50_384x288_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: 'coco' 16 | ROOT: 'data/coco/' 17 | TEST_SET: 'val2017' 18 | TRAIN_SET: 'train2017' 19 | FLIP: true 20 | ROT_FACTOR: 40 21 | SCALE_FACTOR: 0.3 22 | MODEL: 23 | NAME: 'pose_resnet' 24 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 25 | IMAGE_SIZE: 26 | - 288 27 | - 384 28 | HEATMAP_SIZE: 29 | - 72 30 | - 96 31 | SIGMA: 3 32 | NUM_JOINTS: 17 33 | TARGET_TYPE: 'gaussian' 34 | EXTRA: 35 | FINAL_CONV_KERNEL: 1 36 | DECONV_WITH_BIAS: false 37 | NUM_DECONV_LAYERS: 3 38 | NUM_DECONV_FILTERS: 39 | - 256 40 | - 256 41 | - 256 42 | NUM_DECONV_KERNELS: 43 | - 4 44 | - 4 45 | - 4 46 | NUM_LAYERS: 50 47 | LOSS: 48 | USE_TARGET_WEIGHT: true 49 | TRAIN: 50 | BATCH_SIZE_PER_GPU: 32 51 | SHUFFLE: true 52 | BEGIN_EPOCH: 0 53 | END_EPOCH: 140 54 | OPTIMIZER: 'adam' 55 | LR: 0.001 56 | LR_FACTOR: 0.1 57 | LR_STEP: 58 | - 90 59 | - 120 60 | WD: 0.0001 61 | GAMMA1: 0.99 62 | GAMMA2: 0.0 63 | MOMENTUM: 0.9 64 | NESTEROV: false 65 | TEST: 66 | BATCH_SIZE_PER_GPU: 32 67 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 68 | BBOX_THRE: 1.0 69 | IMAGE_THRE: 0.0 70 | IN_VIS_THRE: 0.2 71 | MODEL_FILE: '' 72 | NMS_THRE: 1.0 73 | OKS_THRE: 0.9 74 | FLIP_TEST: true 75 | POST_PROCESS: true 76 | SHIFT_HEATMAP: true 77 | USE_GT_BBOX: true 78 | DEBUG: 79 | DEBUG: true 80 | SAVE_BATCH_IMAGES_GT: true 81 | SAVE_BATCH_IMAGES_PRED: true 82 | SAVE_HEATMAPS_GT: true 83 | SAVE_HEATMAPS_PRED: true 84 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 16 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 256 33 | - 256 34 | HEATMAP_SIZE: 35 | - 64 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | MODEL_FILE: '' 112 | FLIP_TEST: true 113 | POST_PROCESS: true 114 | SHIFT_HEATMAP: true 115 | DEBUG: 116 | DEBUG: true 117 | SAVE_BATCH_IMAGES_GT: true 118 | SAVE_BATCH_IMAGES_PRED: true 119 | SAVE_HEATMAPS_GT: true 120 | SAVE_HEATMAPS_PRED: true 121 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/mpii/hrnet/w48_256x256_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 16 29 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 256 33 | - 256 34 | HEATMAP_SIZE: 35 | - 64 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | MODEL_FILE: '' 112 | FLIP_TEST: true 113 | POST_PROCESS: true 114 | SHIFT_HEATMAP: true 115 | DEBUG: 116 | DEBUG: true 117 | SAVE_BATCH_IMAGES_GT: true 118 | SAVE_BATCH_IMAGES_PRED: true 119 | SAVE_HEATMAPS_GT: true 120 | SAVE_HEATMAPS_PRED: true 121 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/mpii/resnet/res101_256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | NAME: 'pose_resnet' 27 | PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth' 28 | IMAGE_SIZE: 29 | - 256 30 | - 256 31 | HEATMAP_SIZE: 32 | - 64 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 16 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | FINAL_CONV_KERNEL: 1 39 | DECONV_WITH_BIAS: false 40 | NUM_DECONV_LAYERS: 3 41 | NUM_DECONV_FILTERS: 42 | - 256 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | - 4 49 | NUM_LAYERS: 101 50 | LOSS: 51 | USE_TARGET_WEIGHT: true 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.2 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/mpii/resnet/res152_256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | NAME: 'pose_resnet' 27 | PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth' 28 | IMAGE_SIZE: 29 | - 256 30 | - 256 31 | HEATMAP_SIZE: 32 | - 64 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 16 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | FINAL_CONV_KERNEL: 1 39 | DECONV_WITH_BIAS: false 40 | NUM_DECONV_LAYERS: 3 41 | NUM_DECONV_FILTERS: 42 | - 256 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | - 4 49 | NUM_LAYERS: 152 50 | LOSS: 51 | USE_TARGET_WEIGHT: true 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.2 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: false 15 | DATASET: mpii 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: -1.0 20 | ROOT: 'data/mpii/' 21 | ROT_FACTOR: 30 22 | SCALE_FACTOR: 0.25 23 | TEST_SET: valid 24 | TRAIN_SET: train 25 | MODEL: 26 | NAME: 'pose_resnet' 27 | PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth' 28 | IMAGE_SIZE: 29 | - 256 30 | - 256 31 | HEATMAP_SIZE: 32 | - 64 33 | - 64 34 | SIGMA: 2 35 | NUM_JOINTS: 16 36 | TARGET_TYPE: 'gaussian' 37 | EXTRA: 38 | FINAL_CONV_KERNEL: 1 39 | DECONV_WITH_BIAS: false 40 | NUM_DECONV_LAYERS: 3 41 | NUM_DECONV_FILTERS: 42 | - 256 43 | - 256 44 | - 256 45 | NUM_DECONV_KERNELS: 46 | - 4 47 | - 4 48 | - 4 49 | NUM_LAYERS: 50 50 | LOSS: 51 | USE_TARGET_WEIGHT: true 52 | TRAIN: 53 | BATCH_SIZE_PER_GPU: 32 54 | SHUFFLE: true 55 | BEGIN_EPOCH: 0 56 | END_EPOCH: 140 57 | OPTIMIZER: 'adam' 58 | LR: 0.001 59 | LR_FACTOR: 0.1 60 | LR_STEP: 61 | - 90 62 | - 120 63 | WD: 0.0001 64 | GAMMA1: 0.99 65 | GAMMA2: 0.0 66 | MOMENTUM: 0.9 67 | NESTEROV: false 68 | TEST: 69 | BATCH_SIZE_PER_GPU: 32 70 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 71 | BBOX_THRE: 1.0 72 | IMAGE_THRE: 0.0 73 | IN_VIS_THRE: 0.2 74 | MODEL_FILE: '' 75 | NMS_THRE: 1.0 76 | OKS_THRE: 0.9 77 | FLIP_TEST: true 78 | POST_PROCESS: true 79 | SHIFT_HEATMAP: true 80 | USE_GT_BBOX: true 81 | DEBUG: 82 | DEBUG: true 83 | SAVE_BATCH_IMAGES_GT: true 84 | SAVE_BATCH_IMAGES_PRED: true 85 | SAVE_HEATMAPS_GT: true 86 | SAVE_HEATMAPS_PRED: true 87 | -------------------------------------------------------------------------------- /core/models/model_keypoints/config/models.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from yacs.config import CfgNode as CN 12 | 13 | # pose_resnet related params 14 | POSE_RESNET = CN() 15 | POSE_RESNET.NUM_LAYERS = 50 16 | POSE_RESNET.DECONV_WITH_BIAS = False 17 | POSE_RESNET.NUM_DECONV_LAYERS = 3 18 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256] 19 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4] 20 | POSE_RESNET.FINAL_CONV_KERNEL = 1 21 | POSE_RESNET.PRETRAINED_LAYERS = ['*'] 22 | 23 | # pose_multi_resoluton_net related params 24 | POSE_HIGH_RESOLUTION_NET = CN() 25 | POSE_HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*'] 26 | POSE_HIGH_RESOLUTION_NET.STEM_INPLANES = 64 27 | POSE_HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1 28 | 29 | POSE_HIGH_RESOLUTION_NET.STAGE2 = CN() 30 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1 31 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2 32 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4] 33 | # POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64] 34 | POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [48, 96] 35 | POSE_HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC' 36 | POSE_HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM' 37 | 38 | POSE_HIGH_RESOLUTION_NET.STAGE3 = CN() 39 | # POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1 40 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 4 41 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3 42 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4] 43 | # POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128] 44 | POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [48, 96, 192] 45 | POSE_HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC' 46 | POSE_HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM' 47 | 48 | POSE_HIGH_RESOLUTION_NET.STAGE4 = CN() 49 | # POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1 50 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 3 51 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4 52 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 53 | # POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] 54 | POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [48, 96, 192, 384] 55 | POSE_HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC' 56 | POSE_HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM' 57 | 58 | MODEL_EXTRAS = { 59 | 'pose_resnet': POSE_RESNET, 60 | 'pose_high_resolution_net': POSE_HIGH_RESOLUTION_NET, 61 | } 62 | -------------------------------------------------------------------------------- /core/models/model_keypoints/crop_by_skeleton.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torchvision.transforms as transforms 4 | from torch.utils.data import DataLoader 5 | 6 | from .pose_config import config as pose_config 7 | from .pose_resnet import get_pose_net 8 | 9 | import os 10 | import cv2 11 | import math 12 | import numpy as np 13 | 14 | 15 | def get_max_preds(batch_heatmaps): 16 | ''' 17 | get predictions from score maps 18 | heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) 19 | ''' 20 | assert isinstance(batch_heatmaps, np.ndarray), \ 21 | 'batch_heatmaps should be numpy.ndarray' 22 | assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' 23 | 24 | batch_size = batch_heatmaps.shape[0] 25 | num_joints = batch_heatmaps.shape[1] 26 | width = batch_heatmaps.shape[3] 27 | heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) 28 | idx = np.argmax(heatmaps_reshaped, 2) 29 | maxvals = np.amax(heatmaps_reshaped, 2) 30 | 31 | maxvals = maxvals.reshape((batch_size, num_joints, 1)) 32 | idx = idx.reshape((batch_size, num_joints, 1)) 33 | 34 | preds = np.tile(idx, (1, 1, 2)).astype(np.float32) 35 | 36 | preds[:, :, 0] = (preds[:, :, 0]) % width 37 | preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) 38 | 39 | pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) 40 | pred_mask = pred_mask.astype(np.float32) 41 | 42 | preds *= pred_mask 43 | return preds, maxvals 44 | 45 | 46 | def get_final_preds(batch_heatmaps, pose_processing=True): 47 | coords, maxvals = get_max_preds(batch_heatmaps) 48 | 49 | heatmap_height = batch_heatmaps.shape[2] 50 | heatmap_width = batch_heatmaps.shape[3] 51 | 52 | # post-processing 53 | if pose_processing: 54 | for n in range(coords.shape[0]): 55 | for p in range(coords.shape[1]): 56 | hm = batch_heatmaps[n][p] 57 | px = int(math.floor(coords[n][p][0] + 0.5)) 58 | py = int(math.floor(coords[n][p][1] + 0.5)) 59 | if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: 60 | diff = np.array([hm[py][px + 1] - hm[py][px - 1], 61 | hm[py + 1][px] - hm[py - 1][px]]) 62 | coords[n][p] += np.sign(diff) * .25 63 | 64 | return coords, maxvals 65 | 66 | 67 | def im_preprocessing_for_skeleton(im): 68 | ''' 69 | preprocessing the input image 70 | @param 71 | im: opencv image (bgr) 72 | @return 73 | im 74 | (h_scale,w_scale) the scale of height and width 75 | ''' 76 | o_h, o_w, _ = im.shape 77 | width, height = pose_config.MODEL.IMAGE_SIZE 78 | im = cv2.resize(im, (width, height)) 79 | 80 | h_scale = o_h / height 81 | w_scale = o_w / width 82 | 83 | im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) 84 | im = im / 255.0 85 | im -= np.array([0.485, 0.456, 0.406]) # mean: rgb 86 | im /= np.array([0.229, 0.224, 0.225]) # std: rgb 87 | 88 | im = np.transpose(im, (2, 0, 1)) # hwc --> chw 89 | 90 | return im, (h_scale, w_scale) 91 | 92 | 93 | def get_skeleton(model, inputs): 94 | ''' 95 | get skeleton of inputs 96 | @param 97 | model: the torch model of skeleton 98 | input: ndarray of input images, the dim should be nchw 99 | @return 100 | predict joints, joints shape (batchsize, num_joints, 2(u,v)) 101 | maxvalue of joints on response heatmap, shape(batchsize, num_joints) 102 | ''' 103 | # assert inputs.ndim==4,'input dim should be nchw' 104 | n, c, h, w = inputs.shape 105 | # inputs = torch.from_numpy(inputs) 106 | # inputs = inputs.type(torch.FloatTensor) 107 | # print("----",type(inputs)) 108 | # if len(pose_config.GPUS.split(','))>0: 109 | # # print("use gpu") 110 | # model = model.cuda() 111 | # inputs = inputs.cuda() 112 | model.eval() 113 | with torch.no_grad(): 114 | heatmaps = model(inputs) 115 | 116 | # get scale of width and height 117 | _, _, h1, w1 = heatmaps.size() 118 | h_scale = h / h1 119 | w_scale = w / w1 120 | 121 | # do something of the heatmap 122 | heatmaps = heatmaps.cpu().numpy() 123 | 124 | # get the u,v of each joints and its max response value 125 | joints, maxvalue = get_final_preds(heatmaps) # joints shape (batchsize, num_joints, 2(u,v)) 126 | 127 | # re-calculate joints location on input images 128 | joints[:, :, 0] *= w_scale 129 | joints[:, :, 1] *= h_scale 130 | 131 | return joints, maxvalue 132 | 133 | 134 | def filter_joints_by_threshold(maxvalue, threshold=0.3): 135 | ''' 136 | filter joints by maxvalue and threshold 137 | @param 138 | maxvalue of joints on response heatmap, shape(num_joints,) 139 | threshold, minimum value of confidence of joints 140 | @return 141 | joints index, which joints is beyound uppon threshold 142 | joints confidence 143 | ''' 144 | joint_indexs = np.where(maxvalue > threshold) 145 | confidence = maxvalue[joint_indexs] 146 | 147 | return joint_indexs, confidence 148 | 149 | 150 | def crop_by_joints(image, joints, expand_pixels=(2, 4, 2, 2)): 151 | ''' 152 | get bounding box by selected joints, expand the box by expand_pixels 153 | @param 154 | image, opencv image 155 | joints, selected joints, shape(num_joints,2(u,v)) 156 | expand_pixel: (bottom,top,left,right) pixels to expand the joints based box 157 | @return 158 | croped image 159 | ''' 160 | if len(joints) <= 0: 161 | # when no joint given, we just return the input image 162 | return image 163 | bottom, top, left, right = expand_pixels 164 | height, width, _ = image.shape 165 | min_u = int(np.min(joints[:, 0]) - left) 166 | min_v = int(np.min(joints[:, 1]) - top) 167 | max_u = int(np.max(joints[:, 0]) + right) 168 | max_v = int(np.max(joints[:, 1]) + bottom) 169 | 170 | if min_u < 0: 171 | min_u = 0 172 | if min_v < 0: 173 | min_v = 0 174 | if max_u > width: 175 | max_u = width 176 | if max_v > height: 177 | max_v = height 178 | 179 | croped = image[min_v:max_v, min_u:max_u] 180 | 181 | return croped 182 | 183 | 184 | def crop_query(model, query, threshold=0.3, expand_pixels=(2, 4, 2, 2)): 185 | ''' 186 | crop images by the specific predicted skeleton 187 | @param 188 | model: the torch model of skeleton 189 | query: query image 190 | threshold: minimum value of confidence of joints 191 | expand_pixel: (bottom,top,left,right) pixels to expand the joints based box 192 | @return 193 | croped_query: croped query image 194 | joint_index: joints index of query image, the occluded joints has been filter out 195 | ''' 196 | query_h, query_w, _ = query.shape 197 | query_input, query_scale = im_preprocessing_for_skeleton(query) 198 | query_scale_h, query_scale_w = query_scale 199 | 200 | query_input = np.expand_dims(query_input, axis=0) 201 | # get 2d joint on resized input image 202 | joint, maxvalue = get_skeleton(model, query_input) 203 | joint_query = joint[0] 204 | maxvalue_query = maxvalue[0, :, 0] 205 | # get final joint on query image 206 | joint_query[:, 0] *= query_scale_w 207 | joint_query[:, 1] *= query_scale_h 208 | 209 | # only for test purpose 210 | # add the points on the original images 211 | # query = add_joints_to_image(query,joint_query,maxvalue_query) 212 | 213 | # filter out the occuluded joints 214 | joint_index, confidence = filter_joints_by_threshold(maxvalue_query, threshold) 215 | 216 | # crop query image 217 | selected_query_joints = joint_query[joint_index] 218 | croped_query = crop_by_joints(query, selected_query_joints, expand_pixels) 219 | 220 | return croped_query, joint_index 221 | 222 | 223 | def crop_gallery(model, gallery, joint_index, expand_pixels=(2, 4, 2, 2)): 224 | ''' 225 | crop the gallery image by joint_index of query image 226 | @param 227 | model: the torch model of skeleton 228 | gallery: gallery image 229 | joint_index: joints index of query image 230 | expand_pixel: (bottom,top,left,right) pixels to expand the joints based box 231 | @return 232 | croped gallery image 233 | ''' 234 | gallery_h, gallery_w, _ = gallery.shape 235 | gallery_input, gallery_scale = im_preprocessing_for_skeleton(gallery) 236 | gallery_scale_h, gallery_scale_w = gallery_scale 237 | 238 | gallery_input = np.expand_dims(gallery_input, axis=0) 239 | # get 2d joint on resized input image 240 | joint, maxvalue = get_skeleton(model, gallery_input) 241 | joint_gallery = joint[0] 242 | maxvalue_gallery = maxvalue[0] 243 | 244 | # get final joint on gallery image 245 | joint_gallery[:, 0] *= gallery_scale_w 246 | joint_gallery[:, 1] *= gallery_scale_h 247 | 248 | # only for test purpose 249 | # add the points on the original images 250 | # gallery = add_joints_to_image(gallery,joint_gallery,maxvalue_gallery) 251 | 252 | selected_gallery_joints = joint_gallery[joint_index] 253 | croped_gallery = crop_by_joints(gallery, selected_gallery_joints, expand_pixels) 254 | 255 | return croped_gallery 256 | 257 | 258 | def crop_query_galley(model, query_image, gallery_images, threshold=0.3, expand_pixels=(2, 4, 2, 2)): 259 | ''' 260 | crop the query and gallery 261 | @param 262 | model: the torch model of skeleton 263 | query_image: as is, ndarray of image, (hwc,bgr) 264 | gallery_images: list of images(ndarray, hwc,bgr) 265 | @return 266 | croped_query: croped image, ndarray 267 | croped_gallery: croped gallery images, list of ndarray 268 | ''' 269 | croped_query, joint_index = crop_query(model, query_image, threshold=threshold, expand_pixels=expand_pixels) 270 | croped_gallery = {} 271 | for i in range(len(gallery_images)): 272 | gallery_image = gallery_images[i] 273 | croped_gallery_image = crop_gallery(model, gallery_image, joint_index, expand_pixels) 274 | croped_gallery[i] = croped_gallery_image 275 | 276 | return croped_query, croped_gallery 277 | 278 | 279 | def add_joints_to_image(image, joint_im, maxvalue_im): 280 | ''' 281 | add joint and its maxvalue to the image and save to the disk 282 | ''' 283 | colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (100, 100, 100), (25, 25, 25), (200, 200, 200), (233, 233, 234), 284 | (111, 111, 111), (66, 66, 66)] 285 | joints_all = [] 286 | new_image = image.copy() 287 | if len(joint_im) == 16: 288 | group1 = [0, 1, 2, 3, 4, 5] 289 | group2 = [10, 11, 12, 13, 14, 15] 290 | group3 = [6, 7, 8, 9] 291 | group4 = [] 292 | group5 = [] 293 | group6 = [] 294 | group7 = [] 295 | joints_all = [group1, group2, group3, group4, group5, group6, group7] 296 | elif len(joint_im) == 17: 297 | group1 = [0, 1, 2, 3, 4] 298 | group2 = [5, 6, 11, 12] 299 | group3 = [7, 8, 9, 10] 300 | group4 = [13, 14, 15, 16] 301 | group5 = [] 302 | group6 = [] 303 | group7 = [] 304 | joints_all = [group1, group2, group3, group4, group5, group6, group7] 305 | for i in range(len(joints_all)): 306 | group = joints_all[i] 307 | for j in group: 308 | u = int(joint_im[j, 0]) 309 | v = int(joint_im[j, 1]) 310 | new_image = cv2.circle(new_image, (u, v), 2, colors[i], -1) 311 | new_image = cv2.putText(new_image, '{%.2f}' % maxvalue_im[j], (u + 1, v - 1), cv2.FONT_HERSHEY_SIMPLEX, 0.4, 312 | (255, 0, 255), 1) 313 | 314 | return new_image 315 | 316 | 317 | def test_joints(model, image_path, output_dir): 318 | print("output_dir:", output_dir) 319 | if not os.path.exists(output_dir): 320 | os.makedirs(output_dir) 321 | print("===========make directory:", output_dir, "================") 322 | image_name = image_path.split("/")[-1] 323 | out_name = os.path.join(output_dir, image_name) 324 | print(out_name) 325 | image = cv2.imread(image_path) 326 | im_h, im_w, _ = image.shape 327 | im_input, im_scale = im_preprocessing_for_skeleton(image) 328 | 329 | im_scale_h, im_scale_w = im_scale 330 | # print(im_input.shape,im_h,im_w,im_scale) 331 | 332 | im_input = np.expand_dims(im_input, axis=0) 333 | # get 2d joint on resized input image 334 | joint, maxvalue = get_skeleton(model, im_input) 335 | joint_im = joint[0] 336 | maxvalue_im = maxvalue[0, :, 0] 337 | # print(joint_im.shape,maxvalue_im.shape) 338 | # get final joint on im image 339 | joint_im[:, 0] *= im_scale_w 340 | joint_im[:, 1] *= im_scale_h 341 | 342 | image = cv2.resize(image, (im_w * 4, im_h * 4)) 343 | joint_im *= 4 344 | new_image = add_joints_to_image(image, joint_im, maxvalue_im) 345 | cv2.imwrite(out_name, new_image) 346 | 347 | 348 | def test_crop_image(model, query_path, gallery_paths, output_dir): 349 | print("output_dir:", output_dir) 350 | if not os.path.exists(output_dir): 351 | os.makedirs(output_dir) 352 | print("===========make directory:", output_dir, "================") 353 | query_image = cv2.imread(query_path) 354 | gallery_images = [] 355 | for i in range(len(gallery_paths)): 356 | gallery_image = cv2.imread(gallery_paths[i]) 357 | gallery_images.append(gallery_image) 358 | croped_query, croped_gallerys = crop_query_galley(model, query_image, gallery_images, 359 | expand_pixels=(2, 4, 1000, 1000)) 360 | 361 | h, w, _ = query_image.shape 362 | h1, w1, _ = croped_query.shape 363 | im = np.zeros((h, w * 2 + 10, 3)) 364 | im[0:h, 0:w, :] = query_image 365 | im[0:h1, w + 10:w1 + w + 10, :] = croped_query 366 | cv2.imwrite(os.path.join(output_dir, '000_query.jpg'), im) 367 | # cv2.imwrite(os.path.join(output_dir,'query_image.jpg'),query_image) 368 | # cv2.imwrite(os.path.join(output_dir,'croped_query.jpg'),croped_query) 369 | 370 | for i in range(len(gallery_paths)): 371 | h, w, _ = gallery_images[i].shape 372 | h1, w1, _ = croped_gallerys[i].shape 373 | im = np.zeros((h, w * 2 + 10, 3)) 374 | im[0:h, 0:w, :] = gallery_images[i] 375 | im[0:h1, w + 10:w1 + w + 10, :] = croped_gallerys[i] 376 | cv2.imwrite(os.path.join(output_dir, 'gallery_image_{}.jpg'.format(i)), im) 377 | # cv2.imwrite(os.path.join(output_dir,'gallery_image_{}.jpg'.format(i)),gallery_images[i]) 378 | # cv2.imwrite(os.path.join(output_dir,'croped_gallery{}.jpg'.format(i)),croped_gallerys[i]) 379 | 380 | 381 | if __name__ == '__main__': 382 | 383 | pose_config.GPUS = '0' 384 | pose_config.MODEL.IMAGE_SIZE = [128, 256] # width*height 385 | # pose_config.MODEL.IMAGE_SIZE = [256,256] 386 | pose_config.TEST.MODEL_FILE = '/data/model_zoo/skeleton/pose_coco/pose_resnet_50_256x192.pth.tar' 387 | # pose_config.TEST.MODEL_FILE = 'pose_mpii/pose_resnet_50_256x256.pth.tar' 388 | model = get_pose_net(pose_config, False) 389 | model.load_state_dict(torch.load(pose_config.TEST.MODEL_FILE)) 390 | 391 | out_dir = '../output' 392 | partial_root = '/data/dataset/ReID/public/Partial_iLIDS' 393 | 394 | partial_file = open(os.path.join(partial_root, 'Probe.txt')) 395 | partial_paths = [] 396 | parts = 0 397 | for line in partial_file: 398 | # print("partial,",parts) 399 | im_path = os.path.join(partial_root, line.strip()) 400 | partial_paths.append(im_path) 401 | test_joints(model, im_path, os.path.join(out_dir, 'partial')) 402 | parts += 1 403 | # if a>=30: 404 | # break 405 | 406 | # whole_root = '/data/dataset/ReID/public/market1501' 407 | # whole_file = open(os.path.join(whole_root,'train.txt')) 408 | whole_root = '/data/dataset/ReID/public/Partial_iLIDS' 409 | whole_file = open(os.path.join(whole_root, 'Gallery.txt')) 410 | whole_paths = [] 411 | a = 0 412 | for line in whole_file: 413 | # print("whole,",a) 414 | im_path = os.path.join(whole_root, line.strip()) 415 | whole_paths.append(im_path) 416 | test_joints(model, im_path, os.path.join(out_dir, 'whole')) 417 | a += 1 418 | # if a>=30: 419 | # break 420 | 421 | # partial image path 422 | for i in range(parts): 423 | print("crop,", i) 424 | query_im_path = partial_paths[i] 425 | gallery_im_paths = whole_paths 426 | test_crop_image(model, query_im_path, gallery_im_paths, os.path.join(out_dir, str(i))) 427 | 428 | # get -r /home/yangshuo/skeleton/output 429 | -------------------------------------------------------------------------------- /core/models/model_keypoints/gaussian_blur.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @Author: shuo yang 3 | @Email: yangshuo129@gmail.com 4 | @Date: 2019-06-04 11:56:16 5 | @LastEditors: shuo yang 6 | @LastEditTime: 2019-06-04 15:44:14 7 | @Description: code from https://github.com/arraiyopensource/kornia/blob/master/kornia/filters/gaussian.py 8 | ''' 9 | from typing import Tuple 10 | 11 | import torch 12 | import torch.nn as nn 13 | from torch.nn.functional import conv2d 14 | 15 | 16 | def gaussian(window_size, sigma): 17 | def gauss_fcn(x): 18 | return -(x - window_size // 2) ** 2 / float(2 * sigma ** 2) 19 | 20 | gauss = torch.stack( 21 | [torch.exp(torch.tensor(gauss_fcn(x))) for x in range(window_size)]) 22 | return gauss / gauss.sum() 23 | 24 | 25 | def get_gaussian_kernel(kernel_size: int, sigma: float) -> torch.Tensor: 26 | r"""Function that returns Gaussian filter coefficients. 27 | Args: 28 | kernel_size (int): filter size. It should be odd and positive. 29 | sigma (float): gaussian standard deviation. 30 | Returns: 31 | Tensor: 1D tensor with gaussian filter coefficients. 32 | Shape: 33 | - Output: :math:`(\text{kernel_size})` 34 | Examples:: 35 | >>> kornia.image.get_gaussian_kernel(3, 2.5) 36 | tensor([0.3243, 0.3513, 0.3243]) 37 | >>> kornia.image.get_gaussian_kernel(5, 1.5) 38 | tensor([0.1201, 0.2339, 0.2921, 0.2339, 0.1201]) 39 | """ 40 | if not isinstance(kernel_size, int) or kernel_size % 2 == 0 or \ 41 | kernel_size <= 0: 42 | raise TypeError("kernel_size must be an odd positive integer. " 43 | "Got {}".format(kernel_size)) 44 | window_1d = gaussian(kernel_size, sigma) 45 | return window_1d 46 | 47 | 48 | def get_gaussian_kernel2d(kernel_size: Tuple[int, int], 49 | sigma: Tuple[float, float]) -> torch.Tensor: 50 | r"""Function that returns Gaussian filter matrix coefficients. 51 | Args: 52 | kernel_size (Tuple[int, int]): filter sizes in the x and y direction. 53 | Sizes should be odd and positive. 54 | sigma (Tuple[int, int]): gaussian standard deviation in the x and y 55 | direction. 56 | Returns: 57 | Tensor: 2D tensor with gaussian filter matrix coefficients. 58 | Shape: 59 | - Output: :math:`(\text{kernel_size}_x, \text{kernel_size}_y)` 60 | Examples:: 61 | >>> kornia.image.get_gaussian_kernel2d((3, 3), (1.5, 1.5)) 62 | tensor([[0.0947, 0.1183, 0.0947], 63 | [0.1183, 0.1478, 0.1183], 64 | [0.0947, 0.1183, 0.0947]]) 65 | >>> kornia.image.get_gaussian_kernel2d((3, 5), (1.5, 1.5)) 66 | tensor([[0.0370, 0.0720, 0.0899, 0.0720, 0.0370], 67 | [0.0462, 0.0899, 0.1123, 0.0899, 0.0462], 68 | [0.0370, 0.0720, 0.0899, 0.0720, 0.0370]]) 69 | """ 70 | if not isinstance(kernel_size, tuple) or len(kernel_size) != 2: 71 | raise TypeError("kernel_size must be a tuple of length two. Got {}" 72 | .format(kernel_size)) 73 | if not isinstance(sigma, tuple) or len(sigma) != 2: 74 | raise TypeError("sigma must be a tuple of length two. Got {}" 75 | .format(sigma)) 76 | ksize_x, ksize_y = kernel_size 77 | sigma_x, sigma_y = sigma 78 | kernel_x = get_gaussian_kernel(ksize_x, sigma_x) 79 | kernel_y = get_gaussian_kernel(ksize_y, sigma_y) 80 | kernel_2d = torch.matmul( 81 | kernel_x.unsqueeze(-1), kernel_y.unsqueeze(-1).t()) 82 | print("---------kernel--------------") 83 | print(kernel_2d) 84 | print("---------kernel--------------") 85 | return kernel_2d 86 | 87 | 88 | class GaussianBlur(nn.Module): 89 | r"""Creates an operator that blurs a tensor using a Gaussian filter. 90 | The operator smooths the given tensor with a gaussian kernel by convolving 91 | it to each channel. It suports batched operation. 92 | Arguments: 93 | kernel_size (Tuple[int, int]): the size of the kernel. 94 | sigma (Tuple[float, float]): the standard deviation of the kernel. 95 | Returns: 96 | Tensor: the blurred tensor. 97 | Shape: 98 | - Input: :math:`(B, C, H, W)` 99 | - Output: :math:`(B, C, H, W)` 100 | Examples:: 101 | >>> input = torch.rand(2, 4, 5, 5) 102 | >>> gauss = kornia.filters.GaussianBlur((3, 3), (1.5, 1.5)) 103 | >>> output = gauss(input) # 2x4x5x5 104 | """ 105 | 106 | def __init__(self, kernel_size: Tuple[int, int], 107 | sigma: Tuple[float, float]) -> None: 108 | super(GaussianBlur, self).__init__() 109 | self.kernel_size = kernel_size 110 | self.sigma = sigma 111 | self._padding = self.compute_zero_padding(kernel_size) 112 | self.kernel = get_gaussian_kernel2d(kernel_size, sigma) 113 | 114 | @staticmethod 115 | def compute_zero_padding(kernel_size: Tuple[int, int]) -> Tuple[int, int]: 116 | """Computes zero padding tuple.""" 117 | computed = [(k - 1) // 2 for k in kernel_size] 118 | return computed[0], computed[1] 119 | 120 | def forward(self, x: torch.Tensor): # type: ignore 121 | if not torch.is_tensor(x): 122 | raise TypeError("Input x type is not a torch.Tensor. Got {}" 123 | .format(type(x))) 124 | if not len(x.shape) == 4: 125 | raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}" 126 | .format(x.shape)) 127 | # prepare kernel 128 | b, c, h, w = x.shape 129 | tmp_kernel = self.kernel.to(x.device).to(x.dtype) 130 | kernel = tmp_kernel.repeat(c, 1, 1, 1) 131 | 132 | # TODO: explore solution when using jit.trace since it raises a warning 133 | # because the shape is converted to a tensor instead to a int. 134 | # convolve tensor with gaussian kernel 135 | return conv2d(x, kernel, padding=self._padding, stride=1, groups=c) 136 | 137 | 138 | ###################### 139 | # functional interface 140 | ###################### 141 | 142 | 143 | def gaussian_blur(input: torch.Tensor, 144 | kernel_size: Tuple[int, 145 | int], 146 | sigma: Tuple[float, 147 | float]) -> torch.Tensor: 148 | r"""Function that blurs a tensor using a Gaussian filter. 149 | See :class:`~kornia.filters.GaussianBlur` for details. 150 | """ 151 | return GaussianBlur(kernel_size, sigma)(input) 152 | -------------------------------------------------------------------------------- /core/models/model_keypoints/pose_config.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @Author: shuo yang 3 | @Email: yangshuo129@gmail.com 4 | @Date: 2019-05-31 10:12:33 5 | @LastEditors: shuo yang 6 | @LastEditTime: 2019-06-15 16:24:43 7 | @Description: 8 | ''' 9 | # ------------------------------------------------------------------------------ 10 | # Copyright (c) Microsoft 11 | # Licensed under the MIT License. 12 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 13 | # ------------------------------------------------------------------------------ 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | import yaml 21 | 22 | import numpy as np 23 | from easydict import EasyDict as edict 24 | 25 | config = edict() 26 | 27 | config.OUTPUT_DIR = '' 28 | config.LOG_DIR = '' 29 | config.DATA_DIR = '' 30 | config.GPUS = '0' 31 | config.WORKERS = 4 32 | config.PRINT_FREQ = 20 33 | 34 | # Cudnn related params 35 | config.CUDNN = edict() 36 | config.CUDNN.BENCHMARK = True 37 | config.CUDNN.DETERMINISTIC = False 38 | config.CUDNN.ENABLED = True 39 | 40 | # pose_resnet related params 41 | POSE_RESNET = edict() 42 | POSE_RESNET.NUM_LAYERS = 50 43 | POSE_RESNET.DECONV_WITH_BIAS = False 44 | POSE_RESNET.NUM_DECONV_LAYERS = 3 45 | POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256] 46 | POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4] 47 | POSE_RESNET.FINAL_CONV_KERNEL = 1 48 | POSE_RESNET.TARGET_TYPE = 'gaussian' 49 | POSE_RESNET.HEATMAP_SIZE = [64, 64] # width * height, ex: 24 * 32 50 | POSE_RESNET.SIGMA = 2 51 | 52 | MODEL_EXTRAS = { 53 | 'pose_resnet': POSE_RESNET, 54 | } 55 | 56 | # common params for NETWORK 57 | config.MODEL = edict() 58 | config.MODEL.NAME = 'pose_resnet' 59 | config.MODEL.INIT_WEIGHTS = True 60 | config.MODEL.PRETRAINED = '' 61 | config.MODEL.NUM_JOINTS = 17 62 | config.MODEL.JOINTS_GROUPS = [[0, 1, 2, 3, 4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], 63 | [16]] # coco group 2 64 | config.MODEL.IMAGE_SIZE = [128, 256] # width * height, ex: 192 * 256 65 | config.MODEL.EXTRA = MODEL_EXTRAS[config.MODEL.NAME] 66 | 67 | config.MODEL.STYLE = 'pytorch' 68 | 69 | config.LOSS = edict() 70 | config.LOSS.USE_TARGET_WEIGHT = True 71 | 72 | # DATASET related params 73 | config.DATASET = edict() 74 | config.DATASET.ROOT = '' 75 | config.DATASET.DATASET = 'mpii' 76 | config.DATASET.TRAIN_SET = 'train' 77 | config.DATASET.TEST_SET = 'valid' 78 | config.DATASET.DATA_FORMAT = 'jpg' 79 | config.DATASET.HYBRID_JOINTS_TYPE = '' 80 | config.DATASET.SELECT_DATA = False 81 | 82 | # training data augmentation 83 | config.DATASET.FLIP = True 84 | config.DATASET.SCALE_FACTOR = 0.25 85 | config.DATASET.ROT_FACTOR = 30 86 | 87 | # train 88 | config.TRAIN = edict() 89 | 90 | config.TRAIN.LR_FACTOR = 0.1 91 | config.TRAIN.LR_STEP = [90, 110] 92 | config.TRAIN.LR = 0.001 93 | 94 | config.TRAIN.OPTIMIZER = 'adam' 95 | config.TRAIN.MOMENTUM = 0.9 96 | config.TRAIN.WD = 0.0001 97 | config.TRAIN.NESTEROV = False 98 | config.TRAIN.GAMMA1 = 0.99 99 | config.TRAIN.GAMMA2 = 0.0 100 | 101 | config.TRAIN.BEGIN_EPOCH = 0 102 | config.TRAIN.END_EPOCH = 140 103 | 104 | config.TRAIN.RESUME = False 105 | config.TRAIN.CHECKPOINT = '' 106 | 107 | config.TRAIN.BATCH_SIZE = 32 108 | config.TRAIN.SHUFFLE = True 109 | 110 | # testing 111 | config.TEST = edict() 112 | 113 | # size of images for each device 114 | config.TEST.BATCH_SIZE = 32 115 | # Test Model Epoch 116 | config.TEST.FLIP_TEST = False 117 | config.TEST.POST_PROCESS = True 118 | config.TEST.SHIFT_HEATMAP = True 119 | 120 | config.TEST.USE_GT_BBOX = False 121 | # nms 122 | config.TEST.OKS_THRE = 0.5 123 | config.TEST.IN_VIS_THRE = 0.0 124 | config.TEST.COCO_BBOX_FILE = '' 125 | config.TEST.BBOX_THRE = 1.0 126 | # config.TEST.MODEL_FILE = '/data/model_zoo/skeleton/pose_coco/pose_resnet_50_256x192.pth.tar' 127 | config.TEST.MODEL_FILE = '/data/projects/20190531_Partial-ReID-Skeleton-Feature-Alignment-ReID/datasets/coco/pose_resnet_50_256x192.pth.tar' 128 | # pose_config.TEST.MODEL_FILE = 'pose_mpii/pose_resnet_50_256x256.pth.tar' # for NPII trained model 129 | config.TEST.IMAGE_THRE = 0.0 130 | config.TEST.NMS_THRE = 1.0 131 | 132 | # debug 133 | config.DEBUG = edict() 134 | config.DEBUG.DEBUG = False 135 | config.DEBUG.SAVE_BATCH_IMAGES_GT = False 136 | config.DEBUG.SAVE_BATCH_IMAGES_PRED = False 137 | config.DEBUG.SAVE_HEATMAPS_GT = False 138 | config.DEBUG.SAVE_HEATMAPS_PRED = False 139 | 140 | 141 | def _update_dict(k, v): 142 | if k == 'DATASET': 143 | if 'MEAN' in v and v['MEAN']: 144 | v['MEAN'] = np.array([eval(x) if isinstance(x, str) else x 145 | for x in v['MEAN']]) 146 | if 'STD' in v and v['STD']: 147 | v['STD'] = np.array([eval(x) if isinstance(x, str) else x 148 | for x in v['STD']]) 149 | if k == 'MODEL': 150 | if 'EXTRA' in v and 'HEATMAP_SIZE' in v['EXTRA']: 151 | if isinstance(v['EXTRA']['HEATMAP_SIZE'], int): 152 | v['EXTRA']['HEATMAP_SIZE'] = np.array( 153 | [v['EXTRA']['HEATMAP_SIZE'], v['EXTRA']['HEATMAP_SIZE']]) 154 | else: 155 | v['EXTRA']['HEATMAP_SIZE'] = np.array( 156 | v['EXTRA']['HEATMAP_SIZE']) 157 | if 'IMAGE_SIZE' in v: 158 | if isinstance(v['IMAGE_SIZE'], int): 159 | v['IMAGE_SIZE'] = np.array([v['IMAGE_SIZE'], v['IMAGE_SIZE']]) 160 | else: 161 | v['IMAGE_SIZE'] = np.array(v['IMAGE_SIZE']) 162 | for vk, vv in v.items(): 163 | if vk in config[k]: 164 | config[k][vk] = vv 165 | else: 166 | raise ValueError("{}.{} not exist in pose_config.py".format(k, vk)) 167 | 168 | 169 | def update_config(config_file): 170 | exp_config = None 171 | with open(config_file) as f: 172 | exp_config = edict(yaml.load(f)) 173 | for k, v in exp_config.items(): 174 | if k in config: 175 | if isinstance(v, dict): 176 | _update_dict(k, v) 177 | else: 178 | if k == 'SCALES': 179 | config[k][0] = (tuple(v)) 180 | else: 181 | config[k] = v 182 | else: 183 | raise ValueError("{} not exist in pose_config.py".format(k)) 184 | 185 | 186 | def gen_config(config_file): 187 | cfg = dict(config) 188 | for k, v in cfg.items(): 189 | if isinstance(v, edict): 190 | cfg[k] = dict(v) 191 | 192 | with open(config_file, 'w') as f: 193 | yaml.dump(dict(cfg), f, default_flow_style=False) 194 | 195 | 196 | def update_dir(model_dir, log_dir, data_dir): 197 | if model_dir: 198 | config.OUTPUT_DIR = model_dir 199 | 200 | if log_dir: 201 | config.LOG_DIR = log_dir 202 | 203 | if data_dir: 204 | config.DATA_DIR = data_dir 205 | 206 | config.DATASET.ROOT = os.path.join( 207 | config.DATA_DIR, config.DATASET.ROOT) 208 | 209 | config.TEST.COCO_BBOX_FILE = os.path.join( 210 | config.DATA_DIR, config.TEST.COCO_BBOX_FILE) 211 | 212 | config.MODEL.PRETRAINED = os.path.join( 213 | config.DATA_DIR, config.MODEL.PRETRAINED) 214 | 215 | 216 | def get_model_name(cfg): 217 | name = cfg.MODEL.NAME 218 | full_name = cfg.MODEL.NAME 219 | extra = cfg.MODEL.EXTRA 220 | if name in ['pose_resnet']: 221 | name = '{model}_{num_layers}'.format( 222 | model=name, 223 | num_layers=extra.NUM_LAYERS) 224 | deconv_suffix = ''.join( 225 | 'd{}'.format(num_filters) 226 | for num_filters in extra.NUM_DECONV_FILTERS) 227 | full_name = '{height}x{width}_{name}_{deconv_suffix}'.format( 228 | height=cfg.MODEL.IMAGE_SIZE[1], 229 | width=cfg.MODEL.IMAGE_SIZE[0], 230 | name=name, 231 | deconv_suffix=deconv_suffix) 232 | else: 233 | raise ValueError('Unkown model: {}'.format(cfg.MODEL)) 234 | 235 | return name, full_name 236 | 237 | 238 | if __name__ == '__main__': 239 | import sys 240 | 241 | gen_config(sys.argv[1]) 242 | -------------------------------------------------------------------------------- /core/models/model_keypoints/pose_processor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | import math 6 | 7 | from .config import cfg as pose_config 8 | from .gaussian_blur import GaussianBlur 9 | 10 | 11 | class HeatmapProcessor(nn.Module): 12 | """post process of the heatmap, group and normalize""" 13 | 14 | def __init__(self, normalize_heatmap=False, group_mode="sum", gaussion_smooth=None): 15 | super(HeatmapProcessor, self).__init__() 16 | self.num_joints = pose_config.MODEL.NUM_JOINTS 17 | self.groups = pose_config.MODEL.JOINTS_GROUPS 18 | self.gaussion_smooth = gaussion_smooth 19 | assert group_mode in ['sum', 'max'], "only support sum or max" 20 | self.group_mode = group_mode 21 | print("groupmod", self.group_mode) 22 | self.normalize_heatmap = normalize_heatmap 23 | if self.normalize_heatmap: 24 | print("normalize scoremap") 25 | else: 26 | print("no normalize scoremap") 27 | if self.gaussion_smooth: 28 | kernel, sigma = self.gaussion_smooth 29 | self.gaussion_blur = GaussianBlur(kernel, sigma) 30 | print("gaussian blur:", kernel, sigma) 31 | else: 32 | self.gaussion_blur = None 33 | print("no gaussian blur") 34 | 35 | def forward(self, x): 36 | n, c, h, w = x.shape 37 | x = F.interpolate(x, [16, 8], mode='bilinear', align_corners=False) 38 | n, c, h, w = x.shape 39 | 40 | if not self.training: 41 | # if in eval phase, we calculate the max value and its position of each channel of heatmap 42 | n, c, h, w = x.shape 43 | 44 | x_reshaped = x.reshape((n, c, -1)) 45 | idx = torch.argmax(x_reshaped, 2) 46 | max_response, _ = torch.max(x_reshaped, 2) 47 | 48 | idx = idx.reshape((n, c, 1)) 49 | max_response = max_response.reshape((n, c)) 50 | max_index = torch.empty((n, c, 2)) 51 | max_index[:, :, 0] = idx[:, :, 0] % w # column 52 | max_index[:, :, 1] = idx[:, :, 0] // w # row 53 | 54 | if self.gaussion_blur: 55 | x = self.gaussion_blur(x) 56 | 57 | if self.group_mode == 'sum': 58 | heatmap = torch.sum(x[:, self.groups[0]], dim=1, keepdim=True) 59 | max_response_2 = torch.mean(max_response[:, self.groups[0]], dim=1, keepdim=True) 60 | 61 | for i in range(1, len(self.groups)): 62 | heatmapi = torch.sum(x[:, self.groups[i]], dim=1, keepdim=True) 63 | heatmap = torch.cat((heatmap, heatmapi), dim=1) 64 | 65 | max_response_i = torch.mean(max_response[:, self.groups[i]], dim=1, keepdim=True) 66 | max_response_2 = torch.cat((max_response_2, max_response_i), dim=1) 67 | 68 | 69 | elif self.group_mode == 'max': 70 | heatmap, _ = torch.max(x[:, self.groups[0]], dim=1, keepdim=True) 71 | max_response_2, _ = torch.max(max_response[:, self.groups[0]], dim=1, keepdim=True) 72 | 73 | for i in range(1, len(self.groups)): 74 | heatmapi, _ = torch.max(x[:, self.groups[i]], dim=1, keepdim=True) 75 | heatmap = torch.cat((heatmap, heatmapi), dim=1) 76 | 77 | max_response_i, _ = torch.max(max_response[:, self.groups[i]], dim=1, keepdim=True) 78 | max_response_2 = torch.cat((max_response_2, max_response_i), dim=1) 79 | 80 | if self.normalize_heatmap: 81 | heatmap = self.normalize(heatmap) 82 | 83 | if self.training: 84 | return heatmap 85 | else: 86 | return heatmap, max_response_2, max_index 87 | 88 | def normalize(self, in_tensor): 89 | n, c, h, w = in_tensor.shape 90 | in_tensor_reshape = in_tensor.reshape((n, c, -1)) 91 | 92 | normalized_tensor = F.softmax(in_tensor_reshape, dim=2) 93 | normalized_tensor = normalized_tensor.reshape((n, c, h, w)) 94 | 95 | return normalized_tensor 96 | 97 | 98 | class HeatmapProcessor2: 99 | 100 | def __init__(self, normalize_heatmap=True, group_mode="sum", norm_scale=1.0): 101 | 102 | self.num_joints = pose_config.MODEL.NUM_JOINTS 103 | self.groups = pose_config.MODEL.JOINTS_GROUPS 104 | 105 | self.group_mode = group_mode 106 | self.normalize_heatmap = normalize_heatmap 107 | self.norm_scale = norm_scale 108 | assert group_mode in ['sum', 'max'], "only support sum or max" 109 | 110 | def __call__(self, x): 111 | x = F.interpolate(x, [16, 8], mode='bilinear', align_corners=False) 112 | n, c, h, w = x.shape 113 | 114 | x_reshaped = x.reshape((n, c, -1)) 115 | idx = torch.argmax(x_reshaped, 2) 116 | max_response, _ = torch.max(x_reshaped, 2) 117 | 118 | idx = idx.reshape((n, c, 1)) 119 | max_response = max_response.reshape((n, c)) 120 | max_index = torch.empty((n, c, 2)) 121 | max_index[:, :, 0] = idx[:, :, 0] % w # column 122 | max_index[:, :, 1] = idx[:, :, 0] // w # row 123 | 124 | if self.group_mode == 'sum': 125 | heatmap = torch.sum(x[:, self.groups[0]], dim=1, keepdim=True) 126 | max_response_2 = torch.mean(max_response[:, self.groups[0]], dim=1, keepdim=True) 127 | 128 | for i in range(1, len(self.groups)): 129 | heatmapi = torch.sum(x[:, self.groups[i]], dim=1, keepdim=True) 130 | heatmap = torch.cat((heatmap, heatmapi), dim=1) 131 | 132 | max_response_i = torch.mean(max_response[:, self.groups[i]], dim=1, keepdim=True) 133 | max_response_2 = torch.cat((max_response_2, max_response_i), dim=1) 134 | 135 | elif self.group_mode == 'max': 136 | heatmap, _ = torch.max(x[:, self.groups[0]], dim=1, keepdim=True) 137 | max_response_2, _ = torch.max(max_response[:, self.groups[0]], dim=1, keepdim=True) 138 | 139 | for i in range(1, len(self.groups)): 140 | heatmapi, _ = torch.max(x[:, self.groups[i]], dim=1, keepdim=True) 141 | heatmap = torch.cat((heatmap, heatmapi), dim=1) 142 | 143 | max_response_i, _ = torch.max(max_response[:, self.groups[i]], dim=1, keepdim=True) 144 | max_response_2 = torch.cat((max_response_2, max_response_i), dim=1) 145 | 146 | if self.normalize_heatmap: 147 | heatmap = self.normalize(heatmap, self.norm_scale) 148 | 149 | return heatmap, max_response_2, max_index 150 | 151 | def normalize(self, in_tensor, norm_scale): 152 | n, c, h, w = in_tensor.shape 153 | in_tensor_reshape = in_tensor.reshape((n, c, -1)) 154 | 155 | normalized_tensor = F.softmax(norm_scale * in_tensor_reshape, dim=2) 156 | normalized_tensor = normalized_tensor.reshape((n, c, h, w)) 157 | 158 | return normalized_tensor 159 | -------------------------------------------------------------------------------- /core/models/model_keypoints/pose_resnet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | 14 | import torch 15 | import torch.nn as nn 16 | from collections import OrderedDict 17 | 18 | BN_MOMENTUM = 0.1 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | def conv3x3(in_planes, out_planes, stride=1): 23 | """3x3 convolution with padding""" 24 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 25 | padding=1, bias=False) 26 | 27 | 28 | class BasicBlock(nn.Module): 29 | expansion = 1 30 | 31 | def __init__(self, inplanes, planes, stride=1, downsample=None): 32 | super(BasicBlock, self).__init__() 33 | self.conv1 = conv3x3(inplanes, planes, stride) 34 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 35 | self.relu = nn.ReLU(inplace=True) 36 | self.conv2 = conv3x3(planes, planes) 37 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 38 | self.downsample = downsample 39 | self.stride = stride 40 | 41 | def forward(self, x): 42 | residual = x 43 | 44 | out = self.conv1(x) 45 | out = self.bn1(out) 46 | out = self.relu(out) 47 | 48 | out = self.conv2(out) 49 | out = self.bn2(out) 50 | 51 | if self.downsample is not None: 52 | residual = self.downsample(x) 53 | 54 | out += residual 55 | out = self.relu(out) 56 | 57 | return out 58 | 59 | 60 | class Bottleneck(nn.Module): 61 | expansion = 4 62 | 63 | def __init__(self, inplanes, planes, stride=1, downsample=None): 64 | super(Bottleneck, self).__init__() 65 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 66 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 67 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 68 | padding=1, bias=False) 69 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 70 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 71 | bias=False) 72 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 73 | momentum=BN_MOMENTUM) 74 | self.relu = nn.ReLU(inplace=True) 75 | self.downsample = downsample 76 | self.stride = stride 77 | 78 | def forward(self, x): 79 | residual = x 80 | 81 | out = self.conv1(x) 82 | out = self.bn1(out) 83 | out = self.relu(out) 84 | 85 | out = self.conv2(out) 86 | out = self.bn2(out) 87 | out = self.relu(out) 88 | 89 | out = self.conv3(out) 90 | out = self.bn3(out) 91 | 92 | if self.downsample is not None: 93 | residual = self.downsample(x) 94 | 95 | out += residual 96 | out = self.relu(out) 97 | 98 | return out 99 | 100 | 101 | class Bottleneck_CAFFE(nn.Module): 102 | expansion = 4 103 | 104 | def __init__(self, inplanes, planes, stride=1, downsample=None): 105 | super(Bottleneck_CAFFE, self).__init__() 106 | # add stride to conv1x1 107 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) 108 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 109 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, 110 | padding=1, bias=False) 111 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 112 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 113 | bias=False) 114 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 115 | momentum=BN_MOMENTUM) 116 | self.relu = nn.ReLU(inplace=True) 117 | self.downsample = downsample 118 | self.stride = stride 119 | 120 | def forward(self, x): 121 | residual = x 122 | 123 | out = self.conv1(x) 124 | out = self.bn1(out) 125 | out = self.relu(out) 126 | 127 | out = self.conv2(out) 128 | out = self.bn2(out) 129 | out = self.relu(out) 130 | 131 | out = self.conv3(out) 132 | out = self.bn3(out) 133 | 134 | if self.downsample is not None: 135 | residual = self.downsample(x) 136 | 137 | out += residual 138 | out = self.relu(out) 139 | 140 | return out 141 | 142 | 143 | class PoseResNet(nn.Module): 144 | 145 | def __init__(self, block, layers, cfg, **kwargs): 146 | self.inplanes = 64 147 | extra = cfg.MODEL.EXTRA 148 | self.deconv_with_bias = extra.DECONV_WITH_BIAS 149 | 150 | super(PoseResNet, self).__init__() 151 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 152 | bias=False) 153 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 154 | self.relu = nn.ReLU(inplace=True) 155 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 156 | self.layer1 = self._make_layer(block, 64, layers[0]) 157 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 158 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 159 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 160 | 161 | # used for deconv layers 162 | self.deconv_layers = self._make_deconv_layer( 163 | extra.NUM_DECONV_LAYERS, 164 | extra.NUM_DECONV_FILTERS, 165 | extra.NUM_DECONV_KERNELS, 166 | ) 167 | 168 | self.final_layer = nn.Conv2d( 169 | in_channels=extra.NUM_DECONV_FILTERS[-1], 170 | out_channels=cfg.MODEL.NUM_JOINTS, 171 | kernel_size=extra.FINAL_CONV_KERNEL, 172 | stride=1, 173 | padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 174 | ) 175 | 176 | def _make_layer(self, block, planes, blocks, stride=1): 177 | downsample = None 178 | if stride != 1 or self.inplanes != planes * block.expansion: 179 | downsample = nn.Sequential( 180 | nn.Conv2d(self.inplanes, planes * block.expansion, 181 | kernel_size=1, stride=stride, bias=False), 182 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 183 | ) 184 | 185 | layers = [] 186 | layers.append(block(self.inplanes, planes, stride, downsample)) 187 | self.inplanes = planes * block.expansion 188 | for i in range(1, blocks): 189 | layers.append(block(self.inplanes, planes)) 190 | 191 | return nn.Sequential(*layers) 192 | 193 | def _get_deconv_cfg(self, deconv_kernel, index): 194 | if deconv_kernel == 4: 195 | padding = 1 196 | output_padding = 0 197 | elif deconv_kernel == 3: 198 | padding = 1 199 | output_padding = 1 200 | elif deconv_kernel == 2: 201 | padding = 0 202 | output_padding = 0 203 | 204 | return deconv_kernel, padding, output_padding 205 | 206 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 207 | assert num_layers == len(num_filters), \ 208 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 209 | assert num_layers == len(num_kernels), \ 210 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 211 | 212 | layers = [] 213 | for i in range(num_layers): 214 | kernel, padding, output_padding = \ 215 | self._get_deconv_cfg(num_kernels[i], i) 216 | 217 | planes = num_filters[i] 218 | layers.append( 219 | nn.ConvTranspose2d( 220 | in_channels=self.inplanes, 221 | out_channels=planes, 222 | kernel_size=kernel, 223 | stride=2, 224 | padding=padding, 225 | output_padding=output_padding, 226 | bias=self.deconv_with_bias)) 227 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 228 | layers.append(nn.ReLU(inplace=True)) 229 | self.inplanes = planes 230 | 231 | return nn.Sequential(*layers) 232 | 233 | def forward(self, x): 234 | x = self.conv1(x) 235 | x = self.bn1(x) 236 | x = self.relu(x) 237 | x = self.maxpool(x) 238 | 239 | x = self.layer1(x) 240 | x = self.layer2(x) 241 | x = self.layer3(x) 242 | x = self.layer4(x) 243 | 244 | x = self.deconv_layers(x) 245 | x = self.final_layer(x) 246 | 247 | return x 248 | 249 | def init_weights(self, pretrained=''): 250 | if os.path.isfile(pretrained): 251 | logger.info('=> init deconv weights from normal distribution') 252 | for name, m in self.deconv_layers.named_modules(): 253 | if isinstance(m, nn.ConvTranspose2d): 254 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 255 | logger.info('=> init {}.bias as 0'.format(name)) 256 | nn.init.normal_(m.weight, std=0.001) 257 | if self.deconv_with_bias: 258 | nn.init.constant_(m.bias, 0) 259 | elif isinstance(m, nn.BatchNorm2d): 260 | logger.info('=> init {}.weight as 1'.format(name)) 261 | logger.info('=> init {}.bias as 0'.format(name)) 262 | nn.init.constant_(m.weight, 1) 263 | nn.init.constant_(m.bias, 0) 264 | logger.info('=> init final conv weights from normal distribution') 265 | for m in self.final_layer.modules(): 266 | if isinstance(m, nn.Conv2d): 267 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 268 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 269 | logger.info('=> init {}.bias as 0'.format(name)) 270 | nn.init.normal_(m.weight, std=0.001) 271 | nn.init.constant_(m.bias, 0) 272 | 273 | # pretrained_state_dict = torch.load(pretrained) 274 | logger.info('=> loading pretrained model {}'.format(pretrained)) 275 | # self.load_state_dict(pretrained_state_dict, strict=False) 276 | checkpoint = torch.load(pretrained) 277 | if isinstance(checkpoint, OrderedDict): 278 | state_dict = checkpoint 279 | elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint: 280 | state_dict_old = checkpoint['state_dict'] 281 | state_dict = OrderedDict() 282 | # delete 'module.' because it is saved from DataParallel module 283 | for key in state_dict_old.keys(): 284 | if key.startswith('module.'): 285 | # state_dict[key[7:]] = state_dict[key] 286 | # state_dict.pop(key) 287 | state_dict[key[7:]] = state_dict_old[key] 288 | else: 289 | state_dict[key] = state_dict_old[key] 290 | else: 291 | raise RuntimeError( 292 | 'No state_dict found in checkpoint file {}'.format(pretrained)) 293 | self.load_state_dict(state_dict, strict=False) 294 | else: 295 | logger.error('=> imagenet pretrained model dose not exist') 296 | logger.error('=> please download it first') 297 | raise ValueError('imagenet pretrained model does not exist') 298 | 299 | 300 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), 301 | 34: (BasicBlock, [3, 4, 6, 3]), 302 | 50: (Bottleneck, [3, 4, 6, 3]), 303 | 101: (Bottleneck, [3, 4, 23, 3]), 304 | 152: (Bottleneck, [3, 8, 36, 3])} 305 | 306 | 307 | def get_pose_net(cfg, is_train, **kwargs): 308 | num_layers = cfg.MODEL.EXTRA.NUM_LAYERS 309 | style = cfg.MODEL.STYLE 310 | 311 | block_class, layers = resnet_spec[num_layers] 312 | 313 | if style == 'caffe': 314 | block_class = Bottleneck_CAFFE 315 | 316 | model = PoseResNet(block_class, layers, cfg, **kwargs) 317 | 318 | if is_train and cfg.MODEL.INIT_WEIGHTS: 319 | model.init_weights(cfg.MODEL.PRETRAINED) 320 | 321 | return model 322 | -------------------------------------------------------------------------------- /core/models/model_keypoints/pose_resnet2.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | 14 | import torch 15 | import torch.nn as nn 16 | 17 | BN_MOMENTUM = 0.1 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | def conv3x3(in_planes, out_planes, stride=1): 22 | """3x3 convolution with padding""" 23 | return nn.Conv2d( 24 | in_planes, out_planes, kernel_size=3, stride=stride, 25 | padding=1, bias=False 26 | ) 27 | 28 | 29 | class BasicBlock(nn.Module): 30 | expansion = 1 31 | 32 | def __init__(self, inplanes, planes, stride=1, downsample=None): 33 | super(BasicBlock, self).__init__() 34 | self.conv1 = conv3x3(inplanes, planes, stride) 35 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 36 | self.relu = nn.ReLU(inplace=True) 37 | self.conv2 = conv3x3(planes, planes) 38 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 39 | self.downsample = downsample 40 | self.stride = stride 41 | 42 | def forward(self, x): 43 | residual = x 44 | 45 | out = self.conv1(x) 46 | out = self.bn1(out) 47 | out = self.relu(out) 48 | 49 | out = self.conv2(out) 50 | out = self.bn2(out) 51 | 52 | if self.downsample is not None: 53 | residual = self.downsample(x) 54 | 55 | out += residual 56 | out = self.relu(out) 57 | 58 | return out 59 | 60 | 61 | class Bottleneck(nn.Module): 62 | expansion = 4 63 | 64 | def __init__(self, inplanes, planes, stride=1, downsample=None): 65 | super(Bottleneck, self).__init__() 66 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 67 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 68 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 69 | padding=1, bias=False) 70 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 71 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 72 | bias=False) 73 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 74 | momentum=BN_MOMENTUM) 75 | self.relu = nn.ReLU(inplace=True) 76 | self.downsample = downsample 77 | self.stride = stride 78 | 79 | def forward(self, x): 80 | residual = x 81 | 82 | out = self.conv1(x) 83 | out = self.bn1(out) 84 | out = self.relu(out) 85 | 86 | out = self.conv2(out) 87 | out = self.bn2(out) 88 | out = self.relu(out) 89 | 90 | out = self.conv3(out) 91 | out = self.bn3(out) 92 | 93 | if self.downsample is not None: 94 | residual = self.downsample(x) 95 | 96 | out += residual 97 | out = self.relu(out) 98 | 99 | return out 100 | 101 | 102 | class PoseResNet(nn.Module): 103 | 104 | def __init__(self, block, layers, cfg, **kwargs): 105 | self.inplanes = 64 106 | extra = cfg.MODEL.EXTRA 107 | self.deconv_with_bias = extra.DECONV_WITH_BIAS 108 | 109 | super(PoseResNet, self).__init__() 110 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 111 | bias=False) 112 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 113 | self.relu = nn.ReLU(inplace=True) 114 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 115 | self.layer1 = self._make_layer(block, 64, layers[0]) 116 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 117 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 118 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 119 | 120 | # used for deconv layers 121 | self.deconv_layers = self._make_deconv_layer( 122 | extra.NUM_DECONV_LAYERS, 123 | extra.NUM_DECONV_FILTERS, 124 | extra.NUM_DECONV_KERNELS, 125 | ) 126 | 127 | self.final_layer = nn.Conv2d( 128 | in_channels=extra.NUM_DECONV_FILTERS[-1], 129 | out_channels=cfg.MODEL.NUM_JOINTS, 130 | kernel_size=extra.FINAL_CONV_KERNEL, 131 | stride=1, 132 | padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 133 | ) 134 | 135 | def _make_layer(self, block, planes, blocks, stride=1): 136 | downsample = None 137 | if stride != 1 or self.inplanes != planes * block.expansion: 138 | downsample = nn.Sequential( 139 | nn.Conv2d(self.inplanes, planes * block.expansion, 140 | kernel_size=1, stride=stride, bias=False), 141 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 142 | ) 143 | 144 | layers = [] 145 | layers.append(block(self.inplanes, planes, stride, downsample)) 146 | self.inplanes = planes * block.expansion 147 | for i in range(1, blocks): 148 | layers.append(block(self.inplanes, planes)) 149 | 150 | return nn.Sequential(*layers) 151 | 152 | def _get_deconv_cfg(self, deconv_kernel, index): 153 | if deconv_kernel == 4: 154 | padding = 1 155 | output_padding = 0 156 | elif deconv_kernel == 3: 157 | padding = 1 158 | output_padding = 1 159 | elif deconv_kernel == 2: 160 | padding = 0 161 | output_padding = 0 162 | 163 | return deconv_kernel, padding, output_padding 164 | 165 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 166 | assert num_layers == len(num_filters), \ 167 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 168 | assert num_layers == len(num_kernels), \ 169 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 170 | 171 | layers = [] 172 | for i in range(num_layers): 173 | kernel, padding, output_padding = \ 174 | self._get_deconv_cfg(num_kernels[i], i) 175 | 176 | planes = num_filters[i] 177 | layers.append( 178 | nn.ConvTranspose2d( 179 | in_channels=self.inplanes, 180 | out_channels=planes, 181 | kernel_size=kernel, 182 | stride=2, 183 | padding=padding, 184 | output_padding=output_padding, 185 | bias=self.deconv_with_bias)) 186 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 187 | layers.append(nn.ReLU(inplace=True)) 188 | self.inplanes = planes 189 | 190 | return nn.Sequential(*layers) 191 | 192 | def forward(self, x): 193 | x = self.conv1(x) 194 | x = self.bn1(x) 195 | x = self.relu(x) 196 | x = self.maxpool(x) 197 | 198 | x = self.layer1(x) 199 | x = self.layer2(x) 200 | x = self.layer3(x) 201 | x = self.layer4(x) 202 | 203 | x = self.deconv_layers(x) 204 | x = self.final_layer(x) 205 | 206 | return x 207 | 208 | def init_weights(self, pretrained=''): 209 | if os.path.isfile(pretrained): 210 | logger.info('=> init deconv weights from normal distribution') 211 | for name, m in self.deconv_layers.named_modules(): 212 | if isinstance(m, nn.ConvTranspose2d): 213 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 214 | logger.info('=> init {}.bias as 0'.format(name)) 215 | nn.init.normal_(m.weight, std=0.001) 216 | if self.deconv_with_bias: 217 | nn.init.constant_(m.bias, 0) 218 | elif isinstance(m, nn.BatchNorm2d): 219 | logger.info('=> init {}.weight as 1'.format(name)) 220 | logger.info('=> init {}.bias as 0'.format(name)) 221 | nn.init.constant_(m.weight, 1) 222 | nn.init.constant_(m.bias, 0) 223 | logger.info('=> init final conv weights from normal distribution') 224 | for m in self.final_layer.modules(): 225 | if isinstance(m, nn.Conv2d): 226 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 227 | logger.info('=> init {}.weight as normal(0, 0.001)'.format(name)) 228 | logger.info('=> init {}.bias as 0'.format(name)) 229 | nn.init.normal_(m.weight, std=0.001) 230 | nn.init.constant_(m.bias, 0) 231 | 232 | pretrained_state_dict = torch.load(pretrained) 233 | logger.info('=> loading pretrained model {}'.format(pretrained)) 234 | self.load_state_dict(pretrained_state_dict, strict=False) 235 | else: 236 | logger.info('=> init weights from normal distribution') 237 | for m in self.modules(): 238 | if isinstance(m, nn.Conv2d): 239 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 240 | nn.init.normal_(m.weight, std=0.001) 241 | # nn.init.constant_(m.bias, 0) 242 | elif isinstance(m, nn.BatchNorm2d): 243 | nn.init.constant_(m.weight, 1) 244 | nn.init.constant_(m.bias, 0) 245 | elif isinstance(m, nn.ConvTranspose2d): 246 | nn.init.normal_(m.weight, std=0.001) 247 | if self.deconv_with_bias: 248 | nn.init.constant_(m.bias, 0) 249 | 250 | 251 | resnet_spec = { 252 | 18: (BasicBlock, [2, 2, 2, 2]), 253 | 34: (BasicBlock, [3, 4, 6, 3]), 254 | 50: (Bottleneck, [3, 4, 6, 3]), 255 | 101: (Bottleneck, [3, 4, 23, 3]), 256 | 152: (Bottleneck, [3, 8, 36, 3]) 257 | } 258 | 259 | 260 | def get_pose_net(cfg, is_train, **kwargs): 261 | num_layers = cfg.MODEL.EXTRA.NUM_LAYERS 262 | 263 | block_class, layers = resnet_spec[num_layers] 264 | 265 | model = PoseResNet(block_class, layers, cfg, **kwargs) 266 | 267 | if is_train and cfg.MODEL.INIT_WEIGHTS: 268 | model.init_weights(cfg.MODEL.PRETRAINED) 269 | 270 | return model 271 | -------------------------------------------------------------------------------- /core/models/model_reid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | def weights_init_kaiming(m): 7 | classname = m.__class__.__name__ 8 | if classname.find('Linear') != -1: 9 | nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') 10 | nn.init.constant_(m.bias, 0.0) 11 | elif classname.find('Conv') != -1: 12 | nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') 13 | if m.bias is not None: 14 | nn.init.constant_(m.bias, 0.0) 15 | elif classname.find('BatchNorm') != -1: 16 | if m.affine: 17 | nn.init.constant_(m.weight, 1.0) 18 | nn.init.constant_(m.bias, 0.0) 19 | 20 | 21 | def weights_init_classifier(m): 22 | classname = m.__class__.__name__ 23 | if classname.find('Linear') != -1: 24 | nn.init.normal_(m.weight, std=0.001) 25 | if m.bias: 26 | nn.init.constant_(m.bias, 0.0) 27 | 28 | 29 | class Encoder(nn.Module): 30 | 31 | def __init__(self, class_num): 32 | super(Encoder, self).__init__() 33 | 34 | self.class_num = class_num 35 | 36 | # backbone and optimize its architecture 37 | resnet = torchvision.models.resnet50(pretrained=True) 38 | resnet.layer4[0].conv2.stride = (1, 1) 39 | resnet.layer4[0].downsample[0].stride = (1, 1) 40 | 41 | # cnn backbone 42 | self.resnet_conv = nn.Sequential( 43 | resnet.conv1, resnet.bn1, resnet.maxpool, # no relu 44 | resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4) 45 | # self.gap = nn.AdaptiveAvgPool2d(1) 46 | 47 | def forward(self, x): 48 | feature_map = self.resnet_conv(x) 49 | return feature_map 50 | 51 | 52 | class BNClassifier(nn.Module): 53 | 54 | def __init__(self, in_dim, class_num): 55 | super(BNClassifier, self).__init__() 56 | 57 | self.in_dim = in_dim 58 | self.class_num = class_num 59 | 60 | self.bn = nn.BatchNorm1d(self.in_dim) 61 | self.bn.bias.requires_grad_(False) 62 | self.classifier = nn.Linear(self.in_dim, self.class_num, bias=False) 63 | 64 | self.bn.apply(weights_init_kaiming) 65 | self.classifier.apply(weights_init_classifier) 66 | 67 | def forward(self, x): 68 | feature = self.bn(x) 69 | cls_score = self.classifier(feature) 70 | return feature, cls_score 71 | 72 | 73 | class BNClassifiers(nn.Module): 74 | 75 | def __init__(self, in_dim, class_num, branch_num): 76 | super(BNClassifiers, self).__init__() 77 | 78 | self.in_dim = in_dim 79 | self.class_num = class_num 80 | self.branch_num = branch_num 81 | 82 | for i in range(self.branch_num): 83 | setattr(self, 'classifier_{}'.format(i), BNClassifier(self.in_dim, self.class_num)) 84 | 85 | def __call__(self, feature_vector_list): 86 | 87 | assert len(feature_vector_list) == self.branch_num 88 | 89 | # bnneck for each sub_branch_feature 90 | bned_feature_vector_list, cls_score_list = [], [] 91 | for i in range(self.branch_num): 92 | feature_vector_i = feature_vector_list[i] 93 | classifier_i = getattr(self, 'classifier_{}'.format(i)) 94 | bned_feature_vector_i, cls_score_i = classifier_i(feature_vector_i) 95 | 96 | bned_feature_vector_list.append(bned_feature_vector_i) 97 | cls_score_list.append(cls_score_i) 98 | 99 | return bned_feature_vector_list, cls_score_list 100 | -------------------------------------------------------------------------------- /core/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from tools import time_now, NumpyCatMeter, TorchCatMeter, CMC, CMCWithVer 4 | 5 | 6 | def testwithVer2(config, logger, base, loaders, test_dataset, use_gcn, use_gm): 7 | base.set_eval() 8 | 9 | # meters 10 | query_features_meter, query_features2_meter, query_pids_meter, query_cids_meter = NumpyCatMeter(), NumpyCatMeter(), NumpyCatMeter(), NumpyCatMeter() 11 | gallery_features_meter, gallery_features2_meter, gallery_pids_meter, gallery_cids_meter = NumpyCatMeter(), NumpyCatMeter(), NumpyCatMeter(), NumpyCatMeter() 12 | 13 | # init dataset 14 | if test_dataset == 'market': 15 | loaders = [loaders.market_query_loader, loaders.market_gallery_loader] 16 | elif test_dataset == 'duke': 17 | loaders = [loaders.duke_query_loader, loaders.duke_gallery_loader] 18 | 19 | # compute query and gallery features 20 | with torch.no_grad(): 21 | for loader_id, loader in enumerate(loaders): 22 | for data in loader: 23 | # compute feautres 24 | images, pids, cids = data 25 | images, pids, cids = images.to(base.device), pids.to(base.device), cids.to(base.device) 26 | info, gcned_info = base.forward(images, pids, training=False) 27 | features_stage1, features_stage2 = info 28 | gcned_features_stage1, gcned_features_stage2 = gcned_info 29 | if use_gcn: 30 | features_stage1 = gcned_features_stage1 31 | features_stage2 = gcned_features_stage2 32 | else: 33 | features_stage1 = features_stage1 34 | features_stage2 = features_stage2 35 | 36 | # save as query features 37 | if loader_id == 0: 38 | query_features_meter.update(features_stage1.data.cpu().numpy()) 39 | query_features2_meter.update(features_stage2.data.cpu().numpy()) 40 | query_pids_meter.update(pids.cpu().numpy()) 41 | query_cids_meter.update(cids.cpu().numpy()) 42 | # save as gallery features 43 | elif loader_id == 1: 44 | gallery_features_meter.update(features_stage1.data.cpu().numpy()) 45 | gallery_features2_meter.update(features_stage2.data.cpu().numpy()) 46 | gallery_pids_meter.update(pids.cpu().numpy()) 47 | gallery_cids_meter.update(cids.cpu().numpy()) 48 | 49 | # 50 | query_features = query_features_meter.get_val() 51 | query_features2 = query_features2_meter.get_val() 52 | gallery_features = gallery_features_meter.get_val() 53 | gallery_features2 = gallery_features2_meter.get_val() 54 | 55 | # compute mAP and rank@k 56 | query_info = (query_features, query_features2, query_cids_meter.get_val(), query_pids_meter.get_val()) 57 | gallery_info = (gallery_features, gallery_features2, gallery_cids_meter.get_val(), gallery_pids_meter.get_val()) 58 | 59 | alpha = 0.1 if use_gm else 1.0 60 | topk = 8 61 | mAP, cmc = CMCWithVer()(query_info, gallery_info, base.verificator, base.gmnet, topk, alpha) 62 | 63 | return mAP, cmc 64 | -------------------------------------------------------------------------------- /core/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .models import mining_hard_pairs, analyze_ver_prob 3 | from tools import * 4 | 5 | 6 | def train_an_epoch(config, base, loaders, epoch): 7 | base.set_train() 8 | meter = MultiItemAverageMeter() 9 | 10 | ### we assume 200 iterations as an epoch 11 | for _ in range(200): 12 | ### load a batch data 13 | imgs, pids, _ = loaders.train_iter.next_one() 14 | imgs, pids = imgs.to(base.device), pids.to(base.device) 15 | 16 | ### forward 17 | feature_info, cls_score_info, ver_probs, gmp_info, gmn_info, keypoints_confidence = base.forward(imgs, pids, training=True) 18 | 19 | feature_vector_list, gcned_feature_vector_list = feature_info 20 | cls_score_list, gcned_cls_score_list = cls_score_info 21 | ver_prob_p, ver_prob_n = ver_probs 22 | s_p, emb_p, emb_pp = gmp_info 23 | s_n, emb_n, emb_nn = gmn_info 24 | 25 | ### loss 26 | ide_loss = base.compute_ide_loss(cls_score_list, pids, keypoints_confidence) 27 | triplet_loss = base.compute_triplet_loss(feature_vector_list, pids) 28 | ### gcn loss 29 | gcned_ide_loss = base.compute_ide_loss(gcned_cls_score_list, pids, keypoints_confidence) 30 | gcned_triplet_loss = base.compute_triplet_loss(gcned_feature_vector_list, pids) 31 | ### graph matching loss 32 | s_gt = torch.eye(14).unsqueeze(0).repeat([s_p.shape[0], 1, 1]).detach().to(base.device) 33 | pp_loss = base.permutation_loss(s_p, s_gt) 34 | pn_loss = base.permutation_loss(s_n, s_gt) 35 | p_loss = pp_loss # + pn_loss 36 | ### verification loss 37 | ver_loss = base.bce_loss(ver_prob_p, torch.ones_like(ver_prob_p)) + base.bce_loss(ver_prob_n, torch.zeros_like(ver_prob_n)) 38 | 39 | # overall loss 40 | loss = ide_loss + gcned_ide_loss + triplet_loss + gcned_triplet_loss 41 | if epoch >= config.use_gm_after: 42 | loss += \ 43 | config.weight_p_loss * p_loss + \ 44 | config.weight_ver_loss * ver_loss 45 | acc = base.compute_accuracy(cls_score_list, pids) 46 | gcned_acc = base.compute_accuracy(gcned_cls_score_list, pids) 47 | ver_p_ana = analyze_ver_prob(ver_prob_p, True) 48 | ver_n_ana = analyze_ver_prob(ver_prob_n, False) 49 | 50 | ### optimize 51 | base.optimizer.zero_grad() 52 | loss.backward() 53 | base.optimizer.step() 54 | 55 | ### recored 56 | meter.update({'ide_loss': ide_loss.data.cpu().numpy(), 'gcned_ide_loss': gcned_ide_loss.data.cpu().numpy(), 57 | 'triplet_loss': triplet_loss.data.cpu().numpy(), 'gcned_triplet_loss': gcned_triplet_loss.data.cpu().numpy(), 58 | 'acc': acc, 'gcned_acc': gcned_acc, 59 | 'ver_loss': ver_loss.data.cpu().numpy(), 'ver_p_ana': torch.tensor(ver_p_ana).data.cpu().numpy(), 'ver_n_ana': torch.tensor(ver_n_ana).data.cpu().numpy(), 60 | 'pp_loss': pp_loss.data.cpu().numpy(), 'pn_loss': pn_loss.data.cpu().numpy()}) 61 | 62 | return meter.get_val(), meter.get_str() 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /core/visualize.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from tools import cosine_dist, visualize_ranked_results, time_now, NumpyCatMeter, TorchCatMeter 4 | 5 | 6 | def visualize_ranked_images(config, base, loaders, dataset): 7 | 8 | base.set_eval() 9 | 10 | # init dataset 11 | if dataset == 'market': 12 | _datasets = [loaders.market_query_samples.samples, loaders.market_gallery_samples.samples] 13 | _loaders = [loaders.market_query_loader, loaders.market_gallery_loader] 14 | save_visualize_path = base.save_visualize_market_path 15 | elif dataset == 'duke': 16 | _datasets = [loaders.duke_query_samples.samples, loaders.duke_gallery_samples.samples] 17 | _loaders = [loaders.duke_query_loader, loaders.duke_gallery_loader] 18 | save_visualize_path = base.save_visualize_duke_path 19 | 20 | # compute featuress 21 | query_features, query_features2, gallery_features, gallery_features2 = compute_features(base, _loaders, True) 22 | 23 | # compute cosine similarity 24 | cosine_similarity = cosine_dist( 25 | torch.tensor(query_features).cuda(), 26 | torch.tensor(gallery_features).cuda()).data.cpu().numpy() 27 | 28 | # compute verification score 29 | ver_scores = compute_ver_scores(cosine_similarity, query_features2, gallery_features2, base.verificator, topk=25, sort='descend') 30 | 31 | # visualize 32 | visualize_ranked_results(cosine_similarity, ver_scores, _datasets, save_dir=save_visualize_path, topk=20, sort='descend') 33 | 34 | 35 | 36 | def compute_features(base, loaders, use_gcn): 37 | 38 | # meters 39 | query_features_meter, query_features2_meter, query_pids_meter, query_cids_meter = NumpyCatMeter(), NumpyCatMeter(), NumpyCatMeter(), NumpyCatMeter() 40 | gallery_features_meter, gallery_features2_meter, gallery_pids_meter, gallery_cids_meter = NumpyCatMeter(), NumpyCatMeter(), NumpyCatMeter(), NumpyCatMeter() 41 | 42 | # compute query and gallery features 43 | with torch.no_grad(): 44 | for loader_id, loader in enumerate(loaders): 45 | for data in loader: 46 | # compute feautres 47 | images, pids, cids = data 48 | images, pids, cids = images.to(base.device), pids.to(base.device), cids.to(base.device) 49 | info, gcned_info = base.forward(images, pids, training=False) 50 | features_stage1, features_stage2 = info 51 | gcned_features_stage1, gcned_features_stage2 = gcned_info 52 | if use_gcn: 53 | features_stage1 = gcned_features_stage1 54 | features_stage2 = gcned_features_stage2 55 | else: 56 | features_stage1 = features_stage1 57 | features_stage2 = features_stage2 58 | 59 | # save as query features 60 | if loader_id == 0: 61 | query_features_meter.update(features_stage1.data.cpu().numpy()) 62 | query_features2_meter.update(features_stage2.data.cpu().numpy()) 63 | query_pids_meter.update(pids.cpu().numpy()) 64 | query_cids_meter.update(cids.cpu().numpy()) 65 | # save as gallery features 66 | elif loader_id == 1: 67 | gallery_features_meter.update(features_stage1.data.cpu().numpy()) 68 | gallery_features2_meter.update(features_stage2.data.cpu().numpy()) 69 | gallery_pids_meter.update(pids.cpu().numpy()) 70 | gallery_cids_meter.update(cids.cpu().numpy()) 71 | 72 | # 73 | query_features = query_features_meter.get_val() 74 | query_features2 = query_features2_meter.get_val() 75 | gallery_features = gallery_features_meter.get_val() 76 | gallery_features2 = gallery_features2_meter.get_val() 77 | 78 | return query_features, query_features2, gallery_features, gallery_features2 79 | 80 | 81 | def compute_ver_scores(cosine_similarity, query_features_stage2, gallery_features_stage2, verificator, topk, sort='descend'): 82 | assert sort in ['ascend', 'descend'] 83 | ver_scores_list = [] 84 | distance_stage1 = cosine_similarity 85 | # 86 | for sample_idnex in range(distance_stage1.shape[0]): 87 | # stage 1, compute distance, return index and topk 88 | a_sample_distance_stage1 = distance_stage1[sample_idnex] 89 | if sort == 'descend': 90 | a_sample_index_stage1 = np.argsort(a_sample_distance_stage1)[::-1] 91 | elif sort == 'ascend': 92 | a_sample_index_stage1 = np.argsort(a_sample_distance_stage1) 93 | a_sample_topk_index_stage1 = a_sample_index_stage1[:topk] 94 | # stage2: feature extract topk features 95 | a_sample_query_feature_stage2 = query_features_stage2[sample_idnex] 96 | topk_gallery_features_stage2 = gallery_features_stage2[a_sample_topk_index_stage1] 97 | a_sample_query_feature_stage2 = \ 98 | torch.Tensor(a_sample_query_feature_stage2).cuda().unsqueeze(0).repeat([topk, 1, 1]) 99 | topk_gallery_features_stage2 = torch.Tensor(topk_gallery_features_stage2).cuda() 100 | 101 | # stage2: compute verification score 102 | with torch.no_grad(): 103 | probs = verificator(a_sample_query_feature_stage2, topk_gallery_features_stage2) 104 | probs = probs.detach().view([-1]).cpu().data.numpy() 105 | 106 | ver_scores_list.append(np.expand_dims(probs, axis=0)) 107 | 108 | ver_scores = np.concatenate(ver_scores_list, axis=0) 109 | return ver_scores 110 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import torchvision.transforms as transforms 2 | 3 | import argparse 4 | import os 5 | import ast 6 | 7 | from core import Loaders, Base, train_an_epoch, testwithVer2, visualize_ranked_images 8 | from tools import make_dirs, Logger, os_walk, time_now 9 | 10 | 11 | def main(config): 12 | 13 | # init loaders and base 14 | loaders = Loaders(config) 15 | base = Base(config, loaders) 16 | 17 | # make directions 18 | make_dirs(base.output_path) 19 | make_dirs(base.save_model_path) 20 | make_dirs(base.save_logs_path) 21 | make_dirs(base.save_visualize_market_path) 22 | make_dirs(base.save_visualize_duke_path) 23 | 24 | # init logger 25 | logger = Logger(os.path.join(os.path.join(config.output_path, 'logs/'), 'log.txt')) 26 | logger('\n'*3) 27 | logger(config) 28 | 29 | 30 | if config.mode == 'train': # train mode 31 | 32 | # resume model from the resume_train_epoch 33 | start_train_epoch = 0 34 | 35 | # automatically resume model from the latest one 36 | if config.auto_resume_training_from_lastest_steps: 37 | root, _, files = os_walk(base.save_model_path) 38 | if len(files) > 0: 39 | # get indexes of saved models 40 | indexes = [] 41 | for file in files: 42 | indexes.append(int(file.replace('.pkl', '').split('_')[-1])) 43 | indexes = sorted(list(set(indexes)), reverse=False) 44 | # resume model from the latest model 45 | base.resume_model(indexes[-1]) 46 | # 47 | start_train_epoch = indexes[-1] 48 | logger('Time: {}, automatically resume training from the latest step (model {})'.format(time_now(), indexes[-1])) 49 | 50 | # main loop 51 | for current_epoch in range(start_train_epoch, config.total_train_epochs): 52 | # save model 53 | base.save_model(current_epoch) 54 | # train 55 | base.lr_scheduler.step(current_epoch) 56 | _, results = train_an_epoch(config, base, loaders, current_epoch) 57 | logger('Time: {}; Epoch: {}; {}'.format(time_now(), current_epoch, results)) 58 | # test 59 | testwithVer2(config, logger, base, loaders, 'duke', use_gcn=True, use_gm=True) 60 | 61 | 62 | elif config.mode == 'test': # test mode 63 | # resume from the resume_test_epoch 64 | if config.resume_test_path != '' and config.resume_test_epoch != 0: 65 | base.resume_model_from_path(config.resume_test_path, config.resume_test_epoch) 66 | else: 67 | assert 0, 'please set resume_test_path and resume_test_epoch ' 68 | # test 69 | duke_map, duke_rank = testwithVer2(config, logger, base, loaders, 'duke', use_gcn=False, use_gm=False) 70 | logger('Time: {}, base, Dataset: Duke \nmAP: {} \nRank: {}'.format(time_now(), duke_map, duke_rank)) 71 | duke_map, duke_rank = testwithVer2(config, logger, base, loaders, 'duke', use_gcn=True, use_gm=False) 72 | logger('Time: {}, base+gcn, Dataset: Duke \nmAP: {} \nRank: {}'.format(time_now(), duke_map, duke_rank)) 73 | duke_map, duke_rank = testwithVer2(config, logger, base, loaders, 'duke', use_gcn=True, use_gm=True) 74 | logger('Time: {}, base+gcn+gm, Dataset: Duke \nmAP: {} \nRank: {}'.format(time_now(), duke_map, duke_rank)) 75 | logger('') 76 | 77 | 78 | elif config.mode == 'visualize': # visualization mode 79 | # resume from the resume_visualize_epoch 80 | if config.resume_visualize_path != '' and config.resume_visualize_epoch != 0: 81 | base.resume_model_from_path(config.resume_visualize_path, config.resume_visualize_epoch) 82 | print('Time: {}, resume model from {} {}'.format(time_now(), config.resume_visualize_path, config.resume_visualize_epoch)) 83 | # visualization 84 | if 'market' in config.train_dataset: 85 | visualize_ranked_images(config, base, loaders, 'market') 86 | elif 'duke' in config.train_dataset: 87 | visualize_ranked_images(config, base, loaders, 'duke') 88 | else: 89 | assert 0 90 | 91 | if __name__ == '__main__': 92 | 93 | parser = argparse.ArgumentParser() 94 | 95 | # 96 | parser.add_argument('--cuda', type=str, default='cuda') 97 | parser.add_argument('--mode', type=str, default='train', help='train, test or visualize') 98 | parser.add_argument('--output_path', type=str, default='out/base/', help='path to save related informations') 99 | 100 | # dataset configuration 101 | parser.add_argument('--duke_path', type=str, default='path/to/occluded/duke') 102 | parser.add_argument('--train_dataset', type=str, default='duke', help='occluded_duke') 103 | parser.add_argument('--image_size', type=int, nargs='+', default=[256, 128]) 104 | parser.add_argument('--p', type=int, default=16, help='person count in a batch') 105 | parser.add_argument('--k', type=int, default=4, help='images count of a person in a batch') 106 | 107 | # model configuration 108 | parser.add_argument('--pid_num', type=int, default=702, help='702 DukeMTMC-reID') 109 | parser.add_argument('--margin', type=float, default=0.3, help='margin for the triplet loss with batch hard') 110 | parser.add_argument('--branch_num', type=int, default=14, help='') 111 | 112 | # keypoints model 113 | parser.add_argument('--weight_global_feature', type=float, default=1.0, help='') 114 | parser.add_argument('--norm_scale', type=float, default=10.0, help='') 115 | 116 | # gcn model 117 | parser.add_argument('--gcn_scale', type=float, default=20.0, help='') 118 | parser.add_argument('--gcn_lr_scale', type=float, default=0.1, help='') 119 | 120 | # graph matching model 121 | parser.add_argument('--use_gm_after', type=int, default=20, help='') 122 | parser.add_argument('--gm_lr_scale', type=float, default=1.0, help='') 123 | parser.add_argument('--weight_p_loss', type=float, default=1.0, help='') 124 | 125 | # verification model 126 | parser.add_argument('--weight_ver_loss', type=float, default=0.1, help='') 127 | parser.add_argument('--ver_lr_scale', type=float, default=1.0, help='') 128 | parser.add_argument('--ver_topk', type=int, default=1, help='') 129 | parser.add_argument('--ver_alpha', type=float, default=0.5, help='') 130 | parser.add_argument('--ver_in_scale', type=float, default=10.0, help='') 131 | 132 | 133 | # train configuration 134 | parser.add_argument('--milestones', nargs='+', type=int, default=[40, 70], help='milestones for the learning rate decay') 135 | parser.add_argument('--base_learning_rate', type=float, default=0.00035) 136 | parser.add_argument('--weight_decay', type=float, default=0.0005) 137 | parser.add_argument('--total_train_epochs', type=int, default=120) 138 | parser.add_argument('--auto_resume_training_from_lastest_steps', type=ast.literal_eval, default=True) 139 | parser.add_argument('--max_save_model_num', type=int, default=1, help='0 for max num is infinit') 140 | 141 | # test configuration 142 | parser.add_argument('--resume_test_path', type=str, default='', help=' for no resuming') 143 | parser.add_argument('--resume_test_epoch', type=int, default=0, help='0 for no resuming') 144 | 145 | # visualization configuration 146 | parser.add_argument('--resume_visualize_path', type=str, default='', help=' for no resuming') 147 | parser.add_argument('--resume_visualize_epoch', type=int, default=0, help='0 for no resuming') 148 | 149 | # main 150 | config = parser.parse_args() 151 | main(config) 152 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .logger import * 2 | from .utils import * 3 | from .evaluation import * 4 | from .transforms2 import * 5 | from .meter import * 6 | from .loss import * 7 | from .metric import * 8 | from .visualize import visualize_ranked_results -------------------------------------------------------------------------------- /tools/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .classification import * 2 | from .retrieval2 import * -------------------------------------------------------------------------------- /tools/evaluation/classification.py: -------------------------------------------------------------------------------- 1 | def accuracy(output, target, topk=[1]): 2 | """Computes the precision@k for the specified values of k""" 3 | maxk = max(topk) 4 | batch_size = target.size(0) 5 | 6 | _, pred = output.topk(maxk, 1, True, True) 7 | pred = pred.t() 8 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 9 | 10 | res = [] 11 | for k in topk: 12 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 13 | res.append(correct_k.mul_(100.0 / batch_size)) 14 | return res -------------------------------------------------------------------------------- /tools/evaluation/retrieval2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import metrics as sk_metrics 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | 8 | class CMC: 9 | ''' 10 | Compute Rank@k and mean Average Precision (mAP) scores 11 | Used for Person ReID 12 | Test on MarKet and Duke 13 | ''' 14 | 15 | def __init__(self): 16 | pass 17 | 18 | def __call__(self, query_info, gallery_info, dist): 19 | 20 | query_feature, query_cam, query_label = query_info 21 | gallery_feature, gallery_cam, gallery_label = gallery_info 22 | assert dist in ['cosine', 'euclidean'] 23 | print(query_feature.shape, gallery_feature.shape) 24 | 25 | if dist == 'cosine': 26 | # distance = self.cosine_dist_torch( 27 | # torch.Tensor(query_feature).cuda(), 28 | # torch.Tensor(gallery_feature).cuda()).data.cpu().numpy() 29 | distance = self.cosine_dist(query_feature, gallery_feature) 30 | elif dist == 'euclidean': 31 | # distance = self.euclidean_dist_torch( 32 | # torch.Tensor(query_feature).cuda(), 33 | # torch.Tensor(gallery_feature).cuda()).data.cpu().numpy() 34 | distance = self.euclidean_dist(query_feature, gallery_feature) 35 | 36 | APs = [] 37 | CMC = [] 38 | query_num = query_feature.shape[0] 39 | for i in range(query_num): 40 | AP, cmc = self.evaluate( 41 | distance[i], 42 | query_cam[i], query_label[i], 43 | gallery_cam, gallery_label, dist) 44 | APs.append(AP) 45 | CMC.append(cmc) 46 | 47 | mAP = np.mean(np.array(APs)) 48 | 49 | min_len = 99999999 50 | for cmc in CMC: 51 | if len(cmc) < min_len: 52 | min_len = len(cmc) 53 | for i, cmc in enumerate(CMC): 54 | CMC[i] = cmc[0: min_len] 55 | CMC = np.mean(np.array(CMC), axis=0) 56 | 57 | return mAP, CMC 58 | 59 | 60 | def evaluate(self, distance, query_cam, query_label, gallery_cam, gallery_label, dist): 61 | 62 | if dist is 'cosine': 63 | index = np.argsort(distance)[::-1] 64 | elif dist is 'euclidean': 65 | index = np.argsort(distance) 66 | 67 | junk_index_1 = self.in1d(np.argwhere(query_label == gallery_label), np.argwhere(query_cam == gallery_cam)) 68 | junk_index_2 = np.argwhere(gallery_label == -1) 69 | junk_index = np.append(junk_index_1, junk_index_2) 70 | 71 | good_index = self.in1d(np.argwhere(query_label == gallery_label), np.argwhere(query_cam != gallery_cam)) 72 | index_wo_junk = self.notin1d(index, junk_index) 73 | 74 | return self.compute_AP(index_wo_junk, good_index) 75 | 76 | 77 | def compute_AP(self, index, good_index): 78 | ''' 79 | :param index: np.array, 1d 80 | :param good_index: np.array, 1d 81 | :return: 82 | ''' 83 | 84 | num_good = len(good_index) 85 | hit = np.in1d(index, good_index) 86 | index_hit = np.argwhere(hit == True).flatten() 87 | 88 | if len(index_hit) == 0: 89 | AP = 0 90 | cmc = np.zeros([len(index)]) 91 | else: 92 | precision = [] 93 | for i in range(num_good): 94 | precision.append(float(i+1) / float((index_hit[i]+1))) 95 | AP = np.mean(np.array(precision)) 96 | cmc = np.zeros([len(index)]) 97 | cmc[index_hit[0]: ] = 1 98 | 99 | return AP, cmc 100 | 101 | 102 | def in1d(self, array1, array2, invert=False): 103 | ''' 104 | :param set1: np.array, 1d 105 | :param set2: np.array, 1d 106 | :return: 107 | ''' 108 | mask = np.in1d(array1, array2, invert=invert) 109 | return array1[mask] 110 | 111 | 112 | def notin1d(self, array1, array2): 113 | return self.in1d(array1, array2, invert=True) 114 | 115 | 116 | def cosine_dist_torch(self, x, y): 117 | ''' 118 | :param x: torch.tensor, 2d 119 | :param y: torch.tensor, 2d 120 | :return: 121 | ''' 122 | x = F.normalize(x, dim=1) 123 | y = F.normalize(y, dim=1) 124 | return torch.mm(x, y.transpose(0, 1)) 125 | 126 | 127 | def euclidean_dist_torch(self, mtx1, mtx2): 128 | """ 129 | mtx1 is an autograd.Variable with shape of (n,d) 130 | mtx1 is an autograd.Variable with shape of (n,d) 131 | return a nxn distance matrix dist 132 | dist[i,j] represent the L2 distance between mtx1[i] and mtx2[j] 133 | """ 134 | m = mtx1.size(0) 135 | p = mtx1.size(1) 136 | mmtx1 = torch.stack([mtx1] * m) 137 | mmtx2 = torch.stack([mtx2] * m).transpose(0, 1) 138 | dist = torch.sum((mmtx1 - mmtx2) ** 2, 2).squeeze() 139 | return dist 140 | 141 | 142 | def cosine_dist(self, x, y): 143 | return 1 - sk_metrics.pairwise.cosine_distances(x, y) 144 | 145 | 146 | def euclidean_dist(self, x, y): 147 | return sk_metrics.pairwise.euclidean_distances(x, y) 148 | 149 | 150 | 151 | 152 | class CMCWithVer(CMC): 153 | ''' 154 | Compute Rank@k and mean Average Precision (mAP) scores 155 | Used for Person ReID 156 | Test on MarKet and Duke 157 | ''' 158 | 159 | 160 | def __call__(self, query_info, gallery_info, verificator, gmnet, topk, alpha): 161 | ''' 162 | use cosine + verfication loss as distance 163 | ''' 164 | 165 | query_features_stage1, query_features_stage2, query_cam, query_label = query_info 166 | gallery_features_stage1, gallery_features_stage2, gallery_cam, gallery_label = gallery_info 167 | 168 | APs = [] 169 | CMC = [] 170 | 171 | # compute distance 172 | # distance_stage1 = self.cosine_dist_torch( 173 | # torch.Tensor(query_features_stage1).cuda(), 174 | # torch.Tensor(gallery_features_stage1).cuda()).data.cpu().numpy() 175 | distance_stage1 = self.cosine_dist(query_features_stage1, gallery_features_stage1) 176 | 177 | # 178 | for sample_idnex in range(distance_stage1.shape[0]): 179 | a_sample_query_cam = query_cam[sample_idnex] 180 | a_sample_query_label = query_label[sample_idnex] 181 | 182 | # stage 1, compute distance, return index and topk 183 | a_sample_distance_stage1 = distance_stage1[sample_idnex] 184 | a_sample_index_stage1 = np.argsort(a_sample_distance_stage1)[::-1] 185 | a_sample_topk_index_stage1 = a_sample_index_stage1[:topk] 186 | 187 | # stage2: feature extract topk features 188 | a_sample_query_feature_stage2 = query_features_stage2[sample_idnex] 189 | topk_gallery_features_stage2 = gallery_features_stage2[a_sample_topk_index_stage1] 190 | a_sample_query_feature_stage2 = \ 191 | torch.Tensor(a_sample_query_feature_stage2).cuda().unsqueeze(0).repeat([topk, 1, 1]) 192 | topk_gallery_features_stage2 = torch.Tensor(topk_gallery_features_stage2).cuda() 193 | 194 | # stage2: compute verification score 195 | with torch.no_grad(): 196 | _, a_sample_query_feature_stage2, topk_gallery_features_stage2 = \ 197 | gmnet(a_sample_query_feature_stage2, topk_gallery_features_stage2, None) 198 | probs = verificator(a_sample_query_feature_stage2, topk_gallery_features_stage2) 199 | probs = probs.detach().view([-1]).cpu().data.numpy() 200 | 201 | # stage2 index 202 | # print(a_sample_distance_stage1[a_sample_topk_index_stage1]) 203 | # print(probs) 204 | # print(1-probs) 205 | # print('*******') 206 | topk_distance_stage2 = alpha * a_sample_distance_stage1[a_sample_topk_index_stage1] + (1 - alpha) * (1-probs) 207 | topk_index_stage2 = np.argsort(topk_distance_stage2)[::-1] 208 | topk_index_stage2 = a_sample_topk_index_stage1[topk_index_stage2.tolist()] 209 | a_sample_index_stage2 = np.concatenate([topk_index_stage2, a_sample_index_stage1[topk:]]) 210 | 211 | # 212 | ap, cmc = self.evaluate( 213 | a_sample_index_stage2, a_sample_query_cam, a_sample_query_label, gallery_cam, gallery_label, 'cosine') 214 | APs.append(ap) 215 | CMC.append(cmc) 216 | 217 | mAP = np.mean(np.array(APs)) 218 | 219 | min_len = 99999999 220 | for cmc in CMC: 221 | if len(cmc) < min_len: 222 | min_len = len(cmc) 223 | for i, cmc in enumerate(CMC): 224 | CMC[i] = cmc[0: min_len] 225 | CMC = np.mean(np.array(CMC), axis=0) 226 | 227 | return mAP, CMC 228 | 229 | 230 | 231 | def evaluate(self, index, query_cam, query_label, gallery_cam, gallery_label, dist): 232 | 233 | junk_index_1 = self.in1d(np.argwhere(query_label == gallery_label), np.argwhere(query_cam == gallery_cam)) 234 | junk_index_2 = np.argwhere(gallery_label == -1) 235 | junk_index = np.append(junk_index_1, junk_index_2) 236 | 237 | good_index = self.in1d(np.argwhere(query_label == gallery_label), np.argwhere(query_cam != gallery_cam)) 238 | index_wo_junk = self.notin1d(index, junk_index) 239 | 240 | return self.compute_AP(index_wo_junk, good_index) -------------------------------------------------------------------------------- /tools/logger.py: -------------------------------------------------------------------------------- 1 | 2 | class Logger: 3 | 4 | def __init__(self, logger_path): 5 | self.logger_path = logger_path 6 | 7 | def __call__(self, input): 8 | input = str(input) 9 | with open(self.logger_path, 'a') as f: 10 | f.writelines(input+'\n') 11 | print(input) -------------------------------------------------------------------------------- /tools/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .metric import * 4 | 5 | 6 | class CrossEntropyLabelSmooth(nn.Module): 7 | """Cross entropy loss with label smoothing regularizer. 8 | 9 | Reference: 10 | Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016. 11 | Equation: y = (1 - epsilon) * y + epsilon / K. 12 | 13 | Args: 14 | num_classes (int): number of classes. 15 | epsilon (float): weight. 16 | """ 17 | 18 | def __init__(self, num_classes, epsilon=0.1, reduce=True, use_gpu=True): 19 | super(CrossEntropyLabelSmooth, self).__init__() 20 | self.num_classes = num_classes 21 | self.epsilon = epsilon 22 | self.reduce = reduce 23 | self.use_gpu = use_gpu 24 | self.logsoftmax = nn.LogSoftmax(dim=1) 25 | 26 | def forward(self, inputs, targets): 27 | """ 28 | Args: 29 | inputs: prediction matrix (before softmax) with shape (batch_size, num_classes) 30 | targets: ground truth labels with shape (num_classes) 31 | """ 32 | log_probs = self.logsoftmax(inputs) 33 | targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data.cpu(), 1) 34 | if self.use_gpu: targets = targets.to(torch.device('cuda')) 35 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 36 | if self.reduce: 37 | loss = (- targets * log_probs).mean(0).sum() 38 | else: 39 | loss = (- targets * log_probs).sum(1) 40 | 41 | return loss 42 | 43 | 44 | class RankingLoss: 45 | 46 | def __init__(self): 47 | pass 48 | 49 | def _label2similarity(sekf, label1, label2): 50 | ''' 51 | compute similarity matrix of label1 and label2 52 | :param label1: torch.Tensor, [m] 53 | :param label2: torch.Tensor, [n] 54 | :return: torch.Tensor, [m, n], {0, 1} 55 | ''' 56 | m, n = len(label1), len(label2) 57 | l1 = label1.view(m, 1).expand([m, n]) 58 | l2 = label2.view(n, 1).expand([n, m]).t() 59 | similarity = l1 == l2 60 | return similarity 61 | 62 | def _batch_hard(self, mat_distance, mat_similarity, more_similar): 63 | 64 | if more_similar is 'smaller': 65 | sorted_mat_distance, _ = torch.sort(mat_distance + (-9999999.) * (1 - mat_similarity), dim=1,descending=True) 66 | hard_p = sorted_mat_distance[:, 0] 67 | sorted_mat_distance, _ = torch.sort(mat_distance + (9999999.) * (mat_similarity), dim=1, descending=False) 68 | hard_n = sorted_mat_distance[:, 0] 69 | return hard_p, hard_n 70 | 71 | elif more_similar is 'larger': 72 | sorted_mat_distance, _ = torch.sort(mat_distance + (9999999.) * (1 - mat_similarity), dim=1, descending=False) 73 | hard_p = sorted_mat_distance[:, 0] 74 | sorted_mat_distance, _ = torch.sort(mat_distance + (-9999999.) * (mat_similarity), dim=1, descending=True) 75 | hard_n = sorted_mat_distance[:, 0] 76 | return hard_p, hard_n 77 | 78 | 79 | class TripletLoss(RankingLoss): 80 | ''' 81 | Compute Triplet loss augmented with Batch Hard 82 | Details can be seen in 'In defense of the Triplet Loss for Person Re-Identification' 83 | ''' 84 | 85 | def __init__(self, margin, metric): 86 | ''' 87 | :param margin: float or 'soft', for MarginRankingLoss with margin and soft margin 88 | :param bh: batch hard 89 | :param metric: l2 distance or cosine distance 90 | ''' 91 | self.margin = margin 92 | self.margin_loss = nn.MarginRankingLoss(margin=margin) 93 | self.metric = metric 94 | 95 | def __call__(self, emb1, emb2, emb3, label1, label2, label3): 96 | ''' 97 | 98 | :param emb1: torch.Tensor, [m, dim] 99 | :param emb2: torch.Tensor, [n, dim] 100 | :param label1: torch.Tensor, [m] 101 | :param label2: torch.Tensor, [b] 102 | :return: 103 | ''' 104 | 105 | if self.metric == 'cosine': 106 | mat_dist = cosine_dist(emb1, emb2) 107 | mat_sim = self._label2similarity(label1, label2) 108 | hard_p, _ = self._batch_hard(mat_dist, mat_sim.float(), more_similar='larger') 109 | 110 | mat_dist = cosine_dist(emb1, emb3) 111 | mat_sim = self._label2similarity(label1, label3) 112 | _, hard_n = self._batch_hard(mat_dist, mat_sim.float(), more_similar='larger') 113 | 114 | margin_label = -torch.ones_like(hard_p) 115 | 116 | elif self.metric == 'euclidean': 117 | mat_dist = euclidean_dist(emb1, emb2) 118 | mat_sim = self._label2similarity(label1, label2) 119 | hard_p, _ = self._batch_hard(mat_dist, mat_sim.float(), more_similar='smaller') 120 | 121 | mat_dist = euclidean_dist(emb1, emb3) 122 | mat_sim = self._label2similarity(label1, label3) 123 | _, hard_n = self._batch_hard(mat_dist, mat_sim.float(), more_similar='smaller') 124 | 125 | margin_label = torch.ones_like(hard_p) 126 | 127 | return self.margin_loss(hard_n, hard_p, margin_label) 128 | 129 | -------------------------------------------------------------------------------- /tools/meter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | class NumpyCatMeter: 5 | ''' 6 | Concatenate Meter for np.array 7 | ''' 8 | def __init__(self): 9 | self.reset() 10 | 11 | def reset(self): 12 | self.val = None 13 | 14 | def update(self, val): 15 | if self.val is None: 16 | self.val = val 17 | else: 18 | self.val = np.concatenate([self.val, val], axis=0) 19 | 20 | def get_val(self): 21 | return self.val 22 | 23 | 24 | class TorchCatMeter: 25 | ''' 26 | Concatenate Meter for torch.Tensor 27 | ''' 28 | def __init__(self): 29 | self.reset() 30 | 31 | def reset(self): 32 | self.val = None 33 | 34 | def update(self, val): 35 | if self.val is None: 36 | self.val = val 37 | else: 38 | self.val = torch.cat([self.val, val], dim=0) 39 | 40 | def get_val(self): 41 | return self.val 42 | 43 | def get_val_numpy(self): 44 | return self.val.data.cpu().numpy() 45 | 46 | 47 | class MultiItemAverageMeter: 48 | 49 | def __init__(self): 50 | self.content = {} 51 | 52 | def update(self, val): 53 | ''' 54 | :param val: dict, keys are strs, values are torch.Tensor or np.array 55 | ''' 56 | for key in list(val.keys()): 57 | value = val[key] 58 | if key not in list(self.content.keys()): 59 | self.content[key] = {'avg': value, 'sum': value, 'count': 1.0} 60 | else: 61 | self.content[key]['sum'] += value 62 | self.content[key]['count'] += 1.0 63 | self.content[key]['avg'] = self.content[key]['sum'] / self.content[key]['count'] 64 | 65 | def get_val(self): 66 | keys = list(self.content.keys()) 67 | values = [] 68 | for key in keys: 69 | try: 70 | values.append(self.content[key]['avg'].data.cpu().numpy()) 71 | except: 72 | values.append(self.content[key]['avg']) 73 | return keys, values 74 | 75 | def get_str(self): 76 | 77 | result = '' 78 | keys, values = self.get_val() 79 | 80 | for key, value in zip(keys, values): 81 | result += key 82 | result += ': ' 83 | result += str(value) 84 | result += '; ' 85 | 86 | return result 87 | 88 | -------------------------------------------------------------------------------- /tools/metric.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def cosine_dist(x, y): 5 | ''' 6 | :param x: torch.tensor, 2d 7 | :param y: torch.tensor, 2d 8 | :return: 9 | ''' 10 | 11 | bs1 = x.size()[0] 12 | bs2 = y.size()[0] 13 | 14 | frac_up = torch.matmul(x, y.transpose(0, 1)) 15 | frac_down = (torch.sqrt(torch.sum(torch.pow(x, 2), 1))).view(bs1, 1).repeat(1, bs2) * \ 16 | (torch.sqrt(torch.sum(torch.pow(y, 2), 1))).view(1, bs2).repeat(bs1, 1) 17 | cosine = frac_up / frac_down 18 | 19 | return cosine 20 | 21 | 22 | def euclidean_dist(x, y): 23 | """ 24 | Args: 25 | x: pytorch Variable, with shape [m, d] 26 | y: pytorch Variable, with shape [n, d] 27 | Returns: 28 | dist: pytorch Variable, with shape [m, n] 29 | """ 30 | m, n = x.size(0), y.size(0) 31 | xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) 32 | yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() 33 | dist = xx + yy 34 | dist.addmm_(1, -2, x, y.t()) 35 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability 36 | return dist 37 | -------------------------------------------------------------------------------- /tools/transforms2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import math 4 | 5 | 6 | class RandomErasing(object): 7 | """ Randomly selects a rectangle region in an image and erases its pixels. 8 | 'Random Erasing Data Augmentation' by Zhong et al. 9 | See https://arxiv.org/pdf/1708.04896.pdf 10 | Args: 11 | probability: The probability that the Random Erasing operation will be performed. 12 | sl: Minimum proportion of erased area against input image. 13 | sh: Maximum proportion of erased area against input image. 14 | r1: Minimum aspect ratio of erased area. 15 | mean: Erasing value. 16 | """ 17 | 18 | def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=(0.4914, 0.4822, 0.4465)): 19 | self.probability = probability 20 | self.mean = mean 21 | self.sl = sl 22 | self.sh = sh 23 | self.r1 = r1 24 | 25 | def __call__(self, img): 26 | 27 | if random.uniform(0, 1) >= self.probability: 28 | return img 29 | 30 | for attempt in range(100): 31 | area = img.size()[1] * img.size()[2] 32 | 33 | target_area = random.uniform(self.sl, self.sh) * area 34 | aspect_ratio = random.uniform(self.r1, 1 / self.r1) 35 | 36 | h = int(round(math.sqrt(target_area * aspect_ratio))) 37 | w = int(round(math.sqrt(target_area / aspect_ratio))) 38 | 39 | if w < img.size()[2] and h < img.size()[1]: 40 | x1 = random.randint(0, img.size()[1] - h) 41 | y1 = random.randint(0, img.size()[2] - w) 42 | if img.size()[0] == 3: 43 | img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] 44 | img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] 45 | img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] 46 | else: 47 | img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] 48 | return img 49 | 50 | return img -------------------------------------------------------------------------------- /tools/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | 5 | 6 | def os_walk(folder_dir): 7 | for root, dirs, files in os.walk(folder_dir): 8 | files = sorted(files, reverse=True) 9 | dirs = sorted(dirs, reverse=True) 10 | return root, dirs, files 11 | 12 | 13 | def time_now(): 14 | return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) 15 | 16 | 17 | def make_dirs(dir): 18 | if not os.path.exists(dir): 19 | os.makedirs(dir) 20 | print('Successfully make dirs: {}'.format(dir)) 21 | else: 22 | print('Existed dirs: {}'.format(dir)) 23 | 24 | 25 | def label2similarity(label1, label2): 26 | ''' 27 | compute similarity matrix of label1 and label2 28 | :param label1: torch.Tensor, [m] 29 | :param label2: torch.Tensor, [n] 30 | :return: torch.Tensor, [m, n], {0, 1} 31 | ''' 32 | m, n = len(label1), len(label2) 33 | l1 = label1.view(m, 1).expand([m, n]) 34 | l2 = label2.view(n, 1).expand([n, m]).t() 35 | similarity = l1 == l2 36 | return similarity -------------------------------------------------------------------------------- /tools/visualize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import os.path as osp 4 | import shutil 5 | import sys 6 | from PIL import Image, ImageOps, ImageDraw 7 | 8 | from .utils import make_dirs 9 | 10 | 11 | # def visualize_ranked_results(distmat, dataset, save_dir='', topk=20): 12 | # """Visualizes ranked results. 13 | # 14 | # Supports both image-reid and video-reid. 15 | # 16 | # Args: 17 | # distmat (numpy.ndarray): distance matrix of shape (num_query, num_gallery). 18 | # dataset (tuple): a 2-tuple containing (query, gallery), each of which contains 19 | # tuples of (img_path(s), pid, camid). 20 | # save_dir (str): directory to save output images. 21 | # topk (int, optional): denoting top-k images in the rank list to be visualized. 22 | # """ 23 | # num_q, num_g = distmat.shape 24 | # 25 | # print('Visualizing top-{} ranks'.format(topk)) 26 | # print('# query: {}\n# gallery {}'.format(num_q, num_g)) 27 | # print('Saving images to "{}"'.format(save_dir)) 28 | # 29 | # query, gallery = dataset 30 | # assert num_q == len(query) 31 | # assert num_g == len(gallery) 32 | # 33 | # indices = np.argsort(distmat, axis=1) 34 | # make_dirs(save_dir) 35 | # 36 | # def _cp_img_to(src, dst, rank, prefix): 37 | # """ 38 | # Args: 39 | # src: image path or tuple (for vidreid) 40 | # dst: target directory 41 | # rank: int, denoting ranked position, starting from 1 42 | # prefix: string 43 | # """ 44 | # if isinstance(src, tuple) or isinstance(src, list): 45 | # dst = osp.join(dst, prefix + '_top' + str(rank).zfill(3)) 46 | # make_dirs(dst) 47 | # for img_path in src: 48 | # shutil.copy(img_path, dst) 49 | # else: 50 | # dst = osp.join(dst, prefix + '_top' + str(rank).zfill(3) + '_name_' + osp.basename(src)) 51 | # shutil.copy(src, dst) 52 | # 53 | # for q_idx in range(num_q): 54 | # qimg_path, qpid, qcamid = query[q_idx] 55 | # if isinstance(qimg_path, tuple) or isinstance(qimg_path, list): 56 | # qdir = osp.join(save_dir, osp.basename(qimg_path[0])) 57 | # else: 58 | # qdir = osp.join(save_dir, osp.basename(qimg_path)) 59 | # make_dirs(qdir) 60 | # _cp_img_to(qimg_path, qdir, rank=0, prefix='query') 61 | # 62 | # rank_idx = 1 63 | # for g_idx in indices[q_idx, :]: 64 | # gimg_path, gpid, gcamid = gallery[g_idx] 65 | # invalid = (qpid == gpid) & (qcamid == gcamid) 66 | # if not invalid: 67 | # _cp_img_to(gimg_path, qdir, rank=rank_idx, prefix='gallery') 68 | # rank_idx += 1 69 | # if rank_idx > topk: 70 | # break 71 | # 72 | # print("Done") 73 | 74 | 75 | 76 | 77 | # def visualize_ranked_results(distmat, dataset, save_dir='', topk=20, sort='ascend'): 78 | # """Visualizes ranked results. 79 | # 80 | # Supports both image-reid and video-reid. 81 | # 82 | # Args: 83 | # distmat (numpy.ndarray): distance matrix of shape (num_query, num_gallery). 84 | # dataset (tuple): a 2-tuple containing (query, gallery), each of which contains 85 | # tuples of (img_path(s), pid, camid). 86 | # save_dir (str): directory to save output images. 87 | # topk (int, optional): denoting top-k images in the rank list to be visualized. 88 | # """ 89 | # num_q, num_g = distmat.shape 90 | # 91 | # print('Visualizing top-{} ranks'.format(topk)) 92 | # print('# query: {}\n# gallery {}'.format(num_q, num_g)) 93 | # print('Saving images to "{}"'.format(save_dir)) 94 | # 95 | # query, gallery = dataset 96 | # assert num_q == len(query) 97 | # assert num_g == len(gallery) 98 | # assert sort in ['descend', 'ascend'] 99 | # 100 | # if sort is 'ascend': 101 | # indices = np.argsort(distmat, axis=1) 102 | # elif sort is 'descend': 103 | # indices = np.argsort(distmat, axis=1)[:, ::-1] 104 | # 105 | # make_dirs(save_dir) 106 | # 107 | # 108 | # def cat_imgs_to(image_list, hit_list, text_list, target_dir): 109 | # 110 | # images = [] 111 | # for img, hit, text in zip(image_list, hit_list, text_list): 112 | # img = Image.open(img).resize((64, 128)) 113 | # d = ImageDraw.Draw(img) 114 | # d.text((3, 3), "{:.4}".format(text), fill=(255, 255, 0)) 115 | # if hit: 116 | # img = ImageOps.expand(img, border=4, fill='green') 117 | # else: 118 | # img = ImageOps.expand(img, border=4, fill='red') 119 | # images.append(img) 120 | # 121 | # widths, heights = zip(*(i.size for i in images)) 122 | # total_width = sum(widths) 123 | # max_height = max(heights) 124 | # new_im = Image.new('RGB', (total_width, max_height)) 125 | # x_offset = 0 126 | # for im in images: 127 | # new_im.paste(im, (x_offset, 0)) 128 | # x_offset += im.size[0] 129 | # 130 | # new_im.save(target_dir) 131 | # 132 | # counts = 0 133 | # for q_idx in range(num_q): 134 | # flag = True 135 | # 136 | # image_list = [] 137 | # hit_list = [] 138 | # text_list = [] 139 | # 140 | # # query image 141 | # qimg_path, qpid, qcamid = query[q_idx] 142 | # image_list.append(qimg_path) 143 | # hit_list.append(True) 144 | # text_list.append(0.0) 145 | # 146 | # # target dir 147 | # if isinstance(qimg_path, tuple) or isinstance(qimg_path, list): 148 | # qdir = osp.join(save_dir, osp.basename(qimg_path[0])) 149 | # else: 150 | # qdir = osp.join(save_dir, osp.basename(qimg_path)) 151 | # 152 | # # matched images 153 | # rank_idx = 1 154 | # for g_idx in indices[q_idx, :]: 155 | # gimg_path, gpid, gcamid = gallery[g_idx] 156 | # invalid = (qpid == gpid and qcamid == gcamid) or (gpid == -1 or gpid == 0) 157 | # if not invalid: 158 | # if rank_idx == 1 and qpid == gpid: 159 | # flag = False 160 | # image_list.append(gimg_path) 161 | # hit_list.append(qpid == gpid) 162 | # text_list.append(distmat[q_idx, g_idx]) 163 | # rank_idx += 1 164 | # if rank_idx > topk: 165 | # break 166 | # 167 | # if flag: 168 | # counts += 1 169 | # cat_imgs_to(image_list, hit_list, text_list, qdir) 170 | # print(counts, qdir) 171 | 172 | 173 | 174 | 175 | 176 | def visualize_ranked_results(distmat1, distmat2, dataset, save_dir='', topk=20, sort='ascend'): 177 | """Visualizes ranked results. 178 | Supports both image-reid and video-reid. 179 | ` Args: 180 | distmat1 (numpy.ndarray): distance matrix of shape (num_query, num_gallery). 181 | dataset (tuple): a 2-tuple containing (query, gallery), each of which contains 182 | tuples of (img_path(s), pid, camid). 183 | save_dir (str): directory to save output images. 184 | topk (int, optional): denoting top-k images in the rank list to be visualized. 185 | """ 186 | num_q, num_g = distmat1.shape 187 | 188 | print('Visualizing top-{} ranks'.format(topk)) 189 | print('# query: {}\n# gallery {}'.format(num_q, num_g)) 190 | print('Saving images to "{}"'.format(save_dir)) 191 | 192 | query, gallery = dataset 193 | assert num_q == len(query) 194 | assert num_g == len(gallery) 195 | assert sort in ['descend', 'ascend'] 196 | 197 | if sort is 'ascend': 198 | indices = np.argsort(distmat1, axis=1) 199 | elif sort is 'descend': 200 | indices = np.argsort(distmat1, axis=1)[:, ::-1] 201 | 202 | make_dirs(save_dir) 203 | 204 | 205 | def cat_imgs_to(image_list, hit_list, text_list, text2_list, target_dir): 206 | 207 | images = [] 208 | for img, hit, text, text2 in zip(image_list, hit_list, text_list, text2_list): 209 | img = Image.open(img).resize((64, 128)) 210 | d = ImageDraw.Draw(img) 211 | d.text((3, 1), "{:.3}".format(text), fill=(255, 255, 0)) 212 | d.text((3, 10), "{:.3}".format(text2), fill=(255, 255, 0)) 213 | if hit: 214 | img = ImageOps.expand(img, border=4, fill='green') 215 | else: 216 | img = ImageOps.expand(img, border=4, fill='red') 217 | images.append(img) 218 | 219 | widths, heights = zip(*(i.size for i in images)) 220 | total_width = sum(widths) 221 | max_height = max(heights) 222 | new_im = Image.new('RGB', (total_width, max_height)) 223 | x_offset = 0 224 | for im in images: 225 | new_im.paste(im, (x_offset, 0)) 226 | x_offset += im.size[0] 227 | 228 | new_im.save(target_dir) 229 | 230 | counts = 0 231 | for q_idx in range(num_q): 232 | flag = True 233 | 234 | image_list = [] 235 | hit_list = [] 236 | text_list = [] 237 | text2_list = [] 238 | 239 | # query image 240 | qimg_path, qpid, qcamid = query[q_idx] 241 | image_list.append(qimg_path) 242 | hit_list.append(True) 243 | text_list.append(0.0) 244 | text2_list.append(0.0) 245 | 246 | # target dir 247 | if isinstance(qimg_path, tuple) or isinstance(qimg_path, list): 248 | qdir = osp.join(save_dir, osp.basename(qimg_path[0])) 249 | else: 250 | qdir = osp.join(save_dir, osp.basename(qimg_path)) 251 | 252 | # matched images 253 | rank_idx = 1 254 | for ii, g_idx in enumerate(indices[q_idx, :]): 255 | gimg_path, gpid, gcamid = gallery[g_idx] 256 | invalid = (qpid == gpid and qcamid == gcamid) or (gpid == -1 or gpid == 0) 257 | if not invalid: 258 | if rank_idx == 1 and qpid == gpid: 259 | flag = False 260 | image_list.append(gimg_path) 261 | hit_list.append(qpid == gpid) 262 | text_list.append(distmat1[q_idx, g_idx]) 263 | try: 264 | text2_list.append(distmat2[q_idx, ii]) 265 | except: 266 | text2_list.append(0.0) 267 | rank_idx += 1 268 | if rank_idx > topk: 269 | break 270 | 271 | if flag: 272 | counts += 1 273 | cat_imgs_to(image_list, hit_list, text_list, text2_list, qdir) 274 | print(counts, qdir) --------------------------------------------------------------------------------