├── README.md ├── ResNet.py ├── __init__.py ├── data_manager.py ├── dataset_loader.py ├── distance.py ├── eval_metrics.py ├── losses.py ├── optimizers.py ├── train_class.py ├── transforms.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # Person-ReID 2 | 重庆理工大学本科毕业设计 题目:基于深度学习的行人重识别 3 | 学习了浙江大学罗浩老师的开源课程 4 | -------------------------------------------------------------------------------- /ResNet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import torchvision 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | # from IPython import embed 8 | 9 | # class ResNet50(nn.Module): 10 | # def __init__(self, num_classes, loss = {'softmax, metric'}, **kwargs): 11 | # super(ResNet50, self).__init__() 12 | # resnet50 = torchvision.models.resnet50(pretrained = True) 13 | # self.base = nn.Sequential(*list(resnet50.children())[:-2]) #去掉最后两层,定义自己的分类器 14 | # self.classifier = nn.Linear(2048, num_classes) 15 | 16 | # def forward(self, x): 17 | # x = self.base(x) 18 | 19 | # return x 20 | 21 | import torch.nn as nn 22 | 23 | class HorizontalMaxPool2d(nn.Module): 24 | def __init__(self): 25 | super(HorizontalMaxPool2d, self).__init__() 26 | 27 | 28 | def forward(self, x): 29 | inp_size = x.size() 30 | return nn.functional.max_pool2d(input=x,kernel_size= (1, inp_size[3])) 31 | 32 | 33 | class ResNet50(nn.Module): 34 | def __init__(self, num_classes, loss={'softmax'}, aligned=False, **kwargs): 35 | super(ResNet50, self).__init__() 36 | self.loss = loss 37 | resnet50 = torchvision.models.resnet50(pretrained=True) 38 | self.base = nn.Sequential(*list(resnet50.children())[:-2]) 39 | self.classifier = nn.Linear(2048, num_classes) 40 | self.feat_dim = 2048 # feature dimension 41 | self.aligned = aligned 42 | self.horizon_pool = HorizontalMaxPool2d() 43 | if self.aligned: 44 | self.bn = nn.BatchNorm2d(2048) 45 | self.relu = nn.ReLU(inplace=True) 46 | self.conv1 = nn.Conv2d(2048, 128, kernel_size=1, stride=1, padding=0, bias=True) 47 | 48 | def forward(self, x): 49 | 50 | x = self.base(x) 51 | if not self.training: 52 | lf = self.horizon_pool(x) 53 | if self.aligned and self.training: 54 | lf = self.bn(x) 55 | lf = self.relu(lf) 56 | lf = self.horizon_pool(lf) 57 | lf = self.conv1(lf) 58 | if self.aligned or not self.training: 59 | lf = lf.view(lf.size()[0:3]) 60 | lf = lf / torch.pow(lf,2).sum(dim=1, keepdim=True).clamp(min=1e-12).sqrt() 61 | x = F.avg_pool2d(x, x.size()[2:]) 62 | f = x.view(x.size(0), -1) 63 | #f = 1. * f / (torch.norm(f, 2, dim=-1, keepdim=True).expand_as(f) + 1e-12) 64 | if not self.training: 65 | return f,lf 66 | y = self.classifier(f) 67 | if self.loss == {'softmax'}: 68 | return y 69 | elif self.loss == {'metric'}: 70 | if self.aligned: return f, lf 71 | return f 72 | elif self.loss == {'softmax', 'metric'}: 73 | if self.aligned: return y, f, lf 74 | return y, f 75 | else: 76 | raise KeyError("Unsupported loss: {}".format(self.loss)) 77 | 78 | 79 | 80 | if __name__ == '__main__': 81 | model = ResNet50(num_classes=751) 82 | imgs = torch.Tensor(32, 3, 256 , 128) 83 | f = model(imgs) -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PengYiXiang1016/Person-ReID/ebe713cbd744231747cf7785b3d10ea921e8faad/__init__.py -------------------------------------------------------------------------------- /data_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import numpy as np 4 | import glob 5 | # from IPython import embed 6 | import re 7 | 8 | class Market1501(object): 9 | 10 | dataset_dir = 'Market-1501-v15.09.15' 11 | 12 | def __init__(self, root = 'data',**kwargs): 13 | self.dataset_dir = osp.join(root,self.dataset_dir) 14 | self.train_dir = osp.join(self.dataset_dir,'bounding_box_train') 15 | self.query_dir = osp.join(self.dataset_dir,'query') 16 | self.gallery_dir = osp.join(self.dataset_dir,'bounding_box_test') 17 | 18 | self._check_before_run() 19 | 20 | # 文件路径,标注信息(ID,CAMID),图片数量 21 | train,num_train_pids,num_train_imgs = self._process_dir(self.train_dir, relabel=True) 22 | query, num_query_pids, num_query_imgs = self._process_dir(self.query_dir) 23 | gallery, num_gallery_pids, num_gallery_imgs = self._process_dir(self.gallery_dir) 24 | num_total_pids = num_train_pids + num_query_pids 25 | num_total_imgs = num_train_imgs + num_query_imgs + num_gallery_imgs 26 | 27 | 28 | print("=> Market1501 laoded") 29 | print("Dataset statistics:") 30 | print(" -------------------------------") 31 | print(" subset | #ids | # images") 32 | print(" -------------------------------") 33 | print(" train | {:5d} | {:8d}".format(num_train_pids,num_train_imgs)) 34 | print(" query | {:5d} | {:8d}".format(num_query_pids,num_query_imgs)) 35 | print(" gallery | {:5d} | {:8d}".format(num_gallery_pids,num_gallery_imgs)) 36 | print(" ------------------------------") 37 | print(" total | {:5d} | {:8d}".format(num_total_pids,num_total_imgs)) 38 | print(" ------------------------------") 39 | 40 | self.train = train 41 | self.query = query 42 | self.gallery = gallery 43 | 44 | self.num_train_pids = num_train_pids 45 | self.num_query_pids = num_query_pids 46 | self.num_gallery_pids = num_gallery_pids 47 | 48 | 49 | def _check_before_run(self): 50 | if not os.path.exists(self.dataset_dir): 51 | raise RuntimeError("'{}'is not available!".format(self.dataset_dir)) 52 | if not os.path.exists(self.train_dir): 53 | raise RuntimeError("'{}'is not available!".format(self.train_dir)) 54 | if not os.path.exists(self.query_dir): 55 | raise RuntimeError("'{}'is not available!".format(self.query_dir)) 56 | if not os.path.exists(self.gallery_dir): 57 | raise RuntimeError("'{}'is not available!".format(self.gallery_dir)) 58 | 59 | def _process_dir(self,dir_path,relabel = False): 60 | img_paths = glob.glob(osp.join(dir_path,"*.jpg")) #拿出所有.jpg文件 61 | pattern = re.compile(r'([-\d]+)_c(\d)') 62 | pid_container = set() #set() 函数创建一个无序不重复元素集,可进行关系测试,删除重复数据,还可以计算交集、差集、并集等。 63 | dataset = [] 64 | for img_path in img_paths: 65 | pid, camid = map(int, pattern.search(img_path).groups()) #这里的map将字符串转换成int型,然后只取第一个也就是id 66 | if pid == -1: 67 | continue 68 | assert 0<=pid<=1501 69 | assert 1<=camid<=6 70 | camid -= -1 71 | pid_container.add(pid) 72 | pid2label = {pid:label for label, pid in enumerate(pid_container)} #enumerate同时列出数据和数据下标 73 | if relabel: 74 | pid = pid2label[pid] 75 | dataset.append((img_path, pid, camid)) #把图片路径,人id,摄像机id加入dataset数组 76 | 77 | num_pids = len(pid_container) 78 | num_imgs = len(img_paths) 79 | 80 | 81 | 82 | return dataset,num_pids,num_imgs 83 | 84 | 85 | 86 | 87 | if __name__ == '__main__': 88 | data = Market1501(root = 'data') -------------------------------------------------------------------------------- /dataset_loader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import os 3 | from PIL import Image 4 | import numpy 5 | import os.path as osp 6 | import torch 7 | from torch.utils.data import Dataset 8 | 9 | def read_image(img_path): 10 | got_img = False 11 | 12 | if not osp.exists(img_path): 13 | raise IOError("{} dose not exist".format(img_path)) 14 | while not got_img: 15 | try: 16 | img = Image.open(img_path).convert('RGB') 17 | got_img = True 18 | except IOError: 19 | print('dose not read image') 20 | pass 21 | return img 22 | 23 | class ImageDataset(Dataset): 24 | def __init__(self,dataset, transform = None): 25 | self.dataset = dataset 26 | self.transform = transform 27 | 28 | def __len__(self): 29 | return len(self.dataset) 30 | 31 | 32 | def __getitem__(self, index): 33 | img_path, pid, camid = self.dataset[index] 34 | img = read_image(img_path) 35 | if self.transform is not None: 36 | img = self.transform(img) 37 | return img, pid ,camid 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | if __name__ == '__main__': 46 | import data_manager 47 | dataset = data_manager.Market1501(root = 'data') 48 | train_loader = ImageDataset(dataset.train) 49 | # from IPython import embed 50 | 51 | 52 | -------------------------------------------------------------------------------- /distance.py: -------------------------------------------------------------------------------- 1 | """Numpy version of euclidean distance, shortest distance, etc. 2 | Notice the input/output shape of methods, so that you can better understand 3 | the meaning of these methods.""" 4 | import numpy as np 5 | 6 | 7 | def normalize(nparray, order=2, axis=0): 8 | """Normalize a N-D numpy array along the specified axis.""" 9 | norm = np.linalg.norm(nparray, ord=order, axis=axis, keepdims=True) 10 | return nparray / (norm + np.finfo(np.float32).eps) 11 | 12 | 13 | def compute_dist(array1, array2, type='euclidean'): 14 | """Compute the euclidean or cosine distance of all pairs. 15 | Args: 16 | array1: numpy array with shape [m1, n] 17 | array2: numpy array with shape [m2, n] 18 | type: one of ['cosine', 'euclidean'] 19 | Returns: 20 | numpy array with shape [m1, m2] 21 | """ 22 | assert type in ['cosine', 'euclidean'] 23 | if type == 'cosine': 24 | array1 = normalize(array1, axis=1) 25 | array2 = normalize(array2, axis=1) 26 | dist = np.matmul(array1, array2.T) 27 | return dist 28 | else: 29 | # shape [m1, 1] 30 | square1 = np.sum(np.square(array1), axis=1)[..., np.newaxis] 31 | # shape [1, m2] 32 | square2 = np.sum(np.square(array2), axis=1)[np.newaxis, ...] 33 | squared_dist = - 2 * np.matmul(array1, array2.T) + square1 + square2 34 | squared_dist[squared_dist < 0] = 0 35 | dist = np.sqrt(squared_dist) 36 | return dist 37 | 38 | 39 | def shortest_dist(dist_mat): 40 | """Parallel version. 41 | Args: 42 | dist_mat: numpy array, available shape 43 | 1) [m, n] 44 | 2) [m, n, N], N is batch size 45 | 3) [m, n, *], * can be arbitrary additional dimensions 46 | Returns: 47 | dist: three cases corresponding to `dist_mat` 48 | 1) scalar 49 | 2) numpy array, with shape [N] 50 | 3) numpy array with shape [*] 51 | """ 52 | m, n = dist_mat.shape[:2] 53 | dist = np.zeros_like(dist_mat) 54 | for i in range(m): 55 | for j in range(n): 56 | if (i == 0) and (j == 0): 57 | dist[i, j] = dist_mat[i, j] 58 | elif (i == 0) and (j > 0): 59 | dist[i, j] = dist[i, j - 1] + dist_mat[i, j] 60 | elif (i > 0) and (j == 0): 61 | dist[i, j] = dist[i - 1, j] + dist_mat[i, j] 62 | else: 63 | dist[i, j] = \ 64 | np.min(np.stack([dist[i - 1, j], dist[i, j - 1]], axis=0), axis=0) \ 65 | + dist_mat[i, j] 66 | # I ran into memory disaster when returning this reference! I still don't 67 | # know why. 68 | # dist = dist[-1, -1] 69 | dist = dist[-1, -1].copy() 70 | return dist 71 | 72 | def unaligned_dist(dist_mat): 73 | """Parallel version. 74 | Args: 75 | dist_mat: numpy array, available shape 76 | 1) [m, n] 77 | 2) [m, n, N], N is batch size 78 | 3) [m, n, *], * can be arbitrary additional dimensions 79 | Returns: 80 | dist: three cases corresponding to `dist_mat` 81 | 1) scalar 82 | 2) numpy array, with shape [N] 83 | 3) numpy array with shape [*] 84 | """ 85 | 86 | m = dist_mat.shape[0] 87 | dist = np.zeros_like(dist_mat[0]) 88 | for i in range(m): 89 | dist[i] = dist_mat[i][i] 90 | dist = np.sum(dist, axis=0).copy() 91 | return dist 92 | 93 | 94 | def meta_local_dist(x, y, aligned): 95 | """ 96 | Args: 97 | x: numpy array, with shape [m, d] 98 | y: numpy array, with shape [n, d] 99 | Returns: 100 | dist: scalar 101 | """ 102 | eu_dist = compute_dist(x, y, 'euclidean') 103 | dist_mat = (np.exp(eu_dist) - 1.) / (np.exp(eu_dist) + 1.) 104 | if aligned: 105 | dist = shortest_dist(dist_mat[np.newaxis])[0] 106 | else: 107 | dist = unaligned_dist(dist_mat[np.newaxis])[0] 108 | return dist 109 | 110 | 111 | # Tooooooo slow! 112 | def serial_local_dist(x, y): 113 | """ 114 | Args: 115 | x: numpy array, with shape [M, m, d] 116 | y: numpy array, with shape [N, n, d] 117 | Returns: 118 | dist: numpy array, with shape [M, N] 119 | """ 120 | M, N = x.shape[0], y.shape[0] 121 | dist_mat = np.zeros([M, N]) 122 | for i in range(M): 123 | for j in range(N): 124 | dist_mat[i, j] = meta_local_dist(x[i], y[j]) 125 | return dist_mat 126 | 127 | 128 | def parallel_local_dist(x, y, aligned): 129 | """Parallel version. 130 | Args: 131 | x: numpy array, with shape [M, m, d] 132 | y: numpy array, with shape [N, n, d] 133 | Returns: 134 | dist: numpy array, with shape [M, N] 135 | """ 136 | M, m, d = x.shape 137 | N, n, d = y.shape 138 | x = x.reshape([M * m, d]) 139 | y = y.reshape([N * n, d]) 140 | # shape [M * m, N * n] 141 | dist_mat = compute_dist(x, y, type='euclidean') 142 | dist_mat = (np.exp(dist_mat) - 1.) / (np.exp(dist_mat) + 1.) 143 | # shape [M * m, N * n] -> [M, m, N, n] -> [m, n, M, N] 144 | dist_mat = dist_mat.reshape([M, m, N, n]).transpose([1, 3, 0, 2]) 145 | # shape [M, N] 146 | if aligned: 147 | dist_mat = shortest_dist(dist_mat) 148 | else: 149 | dist_mat = unaligned_dist(dist_mat) 150 | return dist_mat 151 | 152 | 153 | def local_dist(x, y, aligned): 154 | if (x.ndim == 2) and (y.ndim == 2): 155 | return meta_local_dist(x, y, aligned) 156 | elif (x.ndim == 3) and (y.ndim == 3): 157 | return parallel_local_dist(x, y, aligned) 158 | else: 159 | raise NotImplementedError('Input shape not supported.') 160 | 161 | 162 | def low_memory_matrix_op( 163 | func, 164 | x, y, 165 | x_split_axis, y_split_axis, 166 | x_num_splits, y_num_splits, 167 | verbose=False, aligned=True): 168 | """ 169 | For matrix operation like multiplication, in order not to flood the memory 170 | with huge data, split matrices into smaller parts (Divide and Conquer). 171 | 172 | Note: 173 | If still out of memory, increase `*_num_splits`. 174 | 175 | Args: 176 | func: a matrix function func(x, y) -> z with shape [M, N] 177 | x: numpy array, the dimension to split has length M 178 | y: numpy array, the dimension to split has length N 179 | x_split_axis: The axis to split x into parts 180 | y_split_axis: The axis to split y into parts 181 | x_num_splits: number of splits. 1 <= x_num_splits <= M 182 | y_num_splits: number of splits. 1 <= y_num_splits <= N 183 | verbose: whether to print the progress 184 | 185 | Returns: 186 | mat: numpy array, shape [M, N] 187 | """ 188 | 189 | if verbose: 190 | import sys 191 | import time 192 | printed = False 193 | st = time.time() 194 | last_time = time.time() 195 | 196 | mat = [[] for _ in range(x_num_splits)] 197 | for i, part_x in enumerate( 198 | np.array_split(x, x_num_splits, axis=x_split_axis)): 199 | for j, part_y in enumerate( 200 | np.array_split(y, y_num_splits, axis=y_split_axis)): 201 | part_mat = func(part_x, part_y, aligned) 202 | mat[i].append(part_mat) 203 | 204 | if verbose: 205 | if not printed: 206 | printed = True 207 | else: 208 | # Clean the current line 209 | sys.stdout.write("\033[F\033[K") 210 | print('Matrix part ({}, {}) / ({}, {}), +{:.2f}s, total {:.2f}s' 211 | .format(i + 1, j + 1, x_num_splits, y_num_splits, 212 | time.time() - last_time, time.time() - st)) 213 | last_time = time.time() 214 | mat[i] = np.concatenate(mat[i], axis=1) 215 | mat = np.concatenate(mat, axis=0) 216 | return mat 217 | 218 | 219 | def low_memory_local_dist(x, y, aligned=True): 220 | print('Computing local distance...') 221 | x_num_splits = int(len(x) / 200) + 1 222 | y_num_splits = int(len(y) / 200) + 1 223 | z = low_memory_matrix_op(local_dist, x, y, 0, 0, x_num_splits, y_num_splits, verbose=True, aligned=aligned) 224 | return z -------------------------------------------------------------------------------- /eval_metrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import numpy as np 3 | import copy 4 | from collections import defaultdict 5 | import sys 6 | 7 | def eval_cuhk03(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, N=100): 8 | """Evaluation with cuhk03 metric 9 | Key: one image for each gallery identity is randomly sampled for each query identity. 10 | Random sampling is performed N times (default: N=100). 11 | """ 12 | num_q, num_g = distmat.shape 13 | if num_g < max_rank: 14 | max_rank = num_g 15 | print("Note: number of gallery samples is quite small, got {}".format(num_g)) 16 | indices = np.argsort(distmat, axis=1) 17 | matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) 18 | 19 | # compute cmc curve for each query 20 | all_cmc = [] 21 | all_AP = [] 22 | num_valid_q = 0. # number of valid query 23 | for q_idx in range(num_q): 24 | # get query pid and camid 25 | q_pid = q_pids[q_idx] 26 | q_camid = q_camids[q_idx] 27 | 28 | # remove gallery samples that have the same pid and camid with query 29 | order = indices[q_idx] 30 | remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) 31 | keep = np.invert(remove) 32 | 33 | # compute cmc curve 34 | orig_cmc = matches[q_idx][keep] # binary vector, positions with value 1 are correct matches 35 | if not np.any(orig_cmc): 36 | # this condition is true when query identity does not appear in gallery 37 | continue 38 | 39 | kept_g_pids = g_pids[order][keep] 40 | g_pids_dict = defaultdict(list) 41 | for idx, pid in enumerate(kept_g_pids): 42 | g_pids_dict[pid].append(idx) 43 | 44 | cmc, AP = 0., 0. 45 | for repeat_idx in range(N): 46 | mask = np.zeros(len(orig_cmc), dtype=np.bool) 47 | for _, idxs in g_pids_dict.items(): 48 | # randomly sample one image for each gallery person 49 | rnd_idx = np.random.choice(idxs) 50 | mask[rnd_idx] = True 51 | masked_orig_cmc = orig_cmc[mask] 52 | _cmc = masked_orig_cmc.cumsum() 53 | _cmc[_cmc > 1] = 1 54 | cmc += _cmc[:max_rank].astype(np.float32) 55 | # compute AP 56 | num_rel = masked_orig_cmc.sum() 57 | tmp_cmc = masked_orig_cmc.cumsum() 58 | tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)] 59 | tmp_cmc = np.asarray(tmp_cmc) * masked_orig_cmc 60 | AP += tmp_cmc.sum() / num_rel 61 | cmc /= N 62 | AP /= N 63 | all_cmc.append(cmc) 64 | all_AP.append(AP) 65 | num_valid_q += 1. 66 | 67 | assert num_valid_q > 0, "Error: all query identities do not appear in gallery" 68 | 69 | all_cmc = np.asarray(all_cmc).astype(np.float32) 70 | all_cmc = all_cmc.sum(0) / num_valid_q 71 | mAP = np.mean(all_AP) 72 | 73 | return all_cmc, mAP 74 | 75 | def eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, max_rank): 76 | """Evaluation with market1501 metric 77 | Key: for each query identity, its gallery images from the same camera view are discarded. 78 | """ 79 | num_q, num_g = distmat.shape 80 | if num_g < max_rank: 81 | max_rank = num_g 82 | print("Note: number of gallery samples is quite small, got {}".format(num_g)) 83 | indices = np.argsort(distmat, axis=1) 84 | matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) 85 | 86 | # compute cmc curve for each query 87 | all_cmc = [] 88 | all_AP = [] 89 | num_valid_q = 0. # number of valid query 90 | for q_idx in range(num_q): 91 | # get query pid and camid 92 | q_pid = q_pids[q_idx] 93 | q_camid = q_camids[q_idx] 94 | 95 | # remove gallery samples that have the same pid and camid with query 96 | order = indices[q_idx] 97 | remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) 98 | keep = np.invert(remove) 99 | 100 | # compute cmc curve 101 | orig_cmc = matches[q_idx][keep] # binary vector, positions with value 1 are correct matches 102 | if not np.any(orig_cmc): 103 | # this condition is true when query identity does not appear in gallery 104 | continue 105 | 106 | cmc = orig_cmc.cumsum() 107 | cmc[cmc > 1] = 1 108 | 109 | all_cmc.append(cmc[:max_rank]) 110 | num_valid_q += 1. 111 | 112 | # compute average precision 113 | # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision 114 | num_rel = orig_cmc.sum() 115 | tmp_cmc = orig_cmc.cumsum() 116 | tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)] 117 | tmp_cmc = np.asarray(tmp_cmc) * orig_cmc 118 | AP = tmp_cmc.sum() / num_rel 119 | all_AP.append(AP) 120 | 121 | assert num_valid_q > 0, "Error: all query identities do not appear in gallery" 122 | 123 | all_cmc = np.asarray(all_cmc).astype(np.float32) 124 | all_cmc = all_cmc.sum(0) / num_valid_q 125 | mAP = np.mean(all_AP) 126 | 127 | return all_cmc, mAP 128 | 129 | def evaluate(distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50, use_metric_cuhk03=False): 130 | if use_metric_cuhk03: 131 | return eval_cuhk03(distmat, q_pids, g_pids, q_camids, g_camids, max_rank) 132 | else: 133 | return eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, max_rank) -------------------------------------------------------------------------------- /losses.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import torch 4 | from torch import nn 5 | 6 | """ 7 | Shorthands for loss: 8 | - CrossEntropyLabelSmooth: xent 9 | - TripletLoss: htri 10 | - CenterLoss: cent 11 | """ 12 | __all__ = ['DeepSupervision', 'CrossEntropyLoss','CrossEntropyLabelSmooth', 'TripletLoss', 'CenterLoss', 'RingLoss'] 13 | 14 | def DeepSupervision(criterion, xs, y): 15 | """ 16 | Args: 17 | criterion: loss function 18 | xs: tuple of inputs 19 | y: ground truth 20 | """ 21 | loss = 0. 22 | for x in xs: 23 | loss += criterion(x, y) 24 | return loss 25 | 26 | class CrossEntropyLoss(nn.Module): 27 | """Cross entropy loss. 28 | 29 | """ 30 | def __init__(self, use_gpu=True): 31 | super(CrossEntropyLoss, self).__init__() 32 | self.use_gpu = use_gpu 33 | self.crossentropy_loss = nn.CrossEntropyLoss() 34 | 35 | def forward(self, inputs, targets): 36 | """ 37 | Args: 38 | inputs: prediction matrix (before softmax) with shape (batch_size, num_classes) 39 | targets: ground truth labels with shape (num_classes) 40 | """ 41 | if self.use_gpu: targets = targets.cuda() 42 | loss = self.crossentropy_loss(inputs, targets) 43 | return loss 44 | 45 | class CrossEntropyLabelSmooth(nn.Module): 46 | """Cross entropy loss with label smoothing regularizer. 47 | 48 | Reference: 49 | Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016. 50 | Equation: y = (1 - epsilon) * y + epsilon / K. 51 | 52 | Args: 53 | num_classes (int): number of classes. 54 | epsilon (float): weight. 55 | """ 56 | def __init__(self, num_classes, epsilon=0.1, use_gpu=True): 57 | super(CrossEntropyLabelSmooth, self).__init__() 58 | self.num_classes = num_classes 59 | self.epsilon = epsilon 60 | self.use_gpu = use_gpu 61 | self.logsoftmax = nn.LogSoftmax(dim=1) 62 | 63 | def forward(self, inputs, targets): 64 | """ 65 | Args: 66 | inputs: prediction matrix (before softmax) with shape (batch_size, num_classes) 67 | targets: ground truth labels with shape (num_classes) 68 | """ 69 | log_probs = self.logsoftmax(inputs) 70 | targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).data.cpu(), 1) 71 | if self.use_gpu: targets = targets.cuda() 72 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 73 | loss = (- targets * log_probs).mean(0).sum() 74 | return loss 75 | 76 | class TripletLoss(nn.Module): 77 | """Triplet loss with hard positive/negative mining. 78 | 79 | Reference: 80 | Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737. 81 | 82 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py. 83 | 84 | Args: 85 | margin (float): margin for triplet. 86 | """ 87 | def __init__(self, margin=0.3, mutual_flag = False): 88 | super(TripletLoss, self).__init__() 89 | self.margin = margin 90 | self.ranking_loss = nn.MarginRankingLoss(margin=margin) 91 | self.mutual = mutual_flag 92 | 93 | def forward(self, inputs, targets): 94 | """ 95 | Args: 96 | inputs: feature matrix with shape (batch_size, feat_dim) 97 | targets: ground truth labels with shape (num_classes) 98 | """ 99 | n = inputs.size(0) 100 | # inputs = 1. * inputs / (torch.norm(inputs, 2, dim=-1, keepdim=True).expand_as(inputs) + 1e-12) 101 | # Compute pairwise distance, replace by the official when merged 102 | dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n) 103 | dist = dist + dist.t() 104 | dist.addmm_(1, -2, inputs, inputs.t()) 105 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability 106 | # For each anchor, find the hardest positive and negative 107 | mask = targets.expand(n, n).eq(targets.expand(n, n).t()) 108 | dist_ap, dist_an = [], [] 109 | for i in range(n): 110 | dist_ap.append(dist[i][mask[i]].max().unsqueeze(0)) 111 | dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0)) 112 | dist_ap = torch.cat(dist_ap) 113 | dist_an = torch.cat(dist_an) 114 | # Compute ranking hinge loss 115 | y = torch.ones_like(dist_an) 116 | loss = self.ranking_loss(dist_an, dist_ap, y) 117 | if self.mutual: 118 | return loss, dist 119 | return loss 120 | 121 | # class TripletLossAlignedReID(nn.Module): 122 | # """Triplet loss with hard positive/negative mining. 123 | 124 | # Reference: 125 | # Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737. 126 | 127 | # Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py. 128 | 129 | # Args: 130 | # margin (float): margin for triplet. 131 | # """ 132 | # def __init__(self, margin=0.3, mutual_flag = False): 133 | # super(TripletLossAlignedReID, self).__init__() 134 | # self.margin = margin 135 | # self.ranking_loss = nn.MarginRankingLoss(margin=margin) 136 | # self.ranking_loss_local = nn.MarginRankingLoss(margin=margin) 137 | # self.mutual = mutual_flag 138 | 139 | # def forward(self, inputs, targets, local_features): 140 | # """ 141 | # Args: 142 | # inputs: feature matrix with shape (batch_size, feat_dim) 143 | # targets: ground truth labels with shape (num_classes) 144 | # """ 145 | # n = inputs.size(0) 146 | # #inputs = 1. * inputs / (torch.norm(inputs, 2, dim=-1, keepdim=True).expand_as(inputs) + 1e-12) 147 | # # Compute pairwise distance, replace by the official when merged 148 | # dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n) 149 | # dist = dist + dist.t() 150 | # dist.addmm_(1, -2, inputs, inputs.t()) 151 | # dist = dist.clamp(min=1e-12).sqrt() # for numerical stability 152 | # # For each anchor, find the hardest positive and negative 153 | # dist_ap,dist_an,p_inds,n_inds = hard_example_mining(dist,targets,return_inds=True) 154 | # local_features = local_features.permute(0,2,1) 155 | # p_local_features = local_features[p_inds] 156 | # n_local_features = local_features[n_inds] 157 | # local_dist_ap = batch_local_dist(local_features, p_local_features) 158 | # local_dist_an = batch_local_dist(local_features, n_local_features) 159 | 160 | # # Compute ranking hinge loss 161 | # y = torch.ones_like(dist_an) 162 | # global_loss = self.ranking_loss(dist_an, dist_ap, y) 163 | # local_loss = self.ranking_loss_local(local_dist_an,local_dist_ap, y) 164 | # if self.mutual: 165 | # return global_loss+local_loss,dist 166 | # return global_loss,local_loss 167 | 168 | class CenterLoss(nn.Module): 169 | """Center loss. 170 | 171 | Reference: 172 | Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016. 173 | 174 | Args: 175 | num_classes (int): number of classes. 176 | feat_dim (int): feature dimension. 177 | """ 178 | def __init__(self, num_classes=10, feat_dim=2, use_gpu=True): 179 | super(CenterLoss, self).__init__() 180 | self.num_classes = num_classes 181 | self.feat_dim = feat_dim 182 | self.use_gpu = use_gpu 183 | 184 | if self.use_gpu: 185 | self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).cuda()) 186 | else: 187 | self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim)) 188 | 189 | def forward(self, x, labels): 190 | """ 191 | Args: 192 | x: feature matrix with shape (batch_size, feat_dim). 193 | labels: ground truth labels with shape (num_classes). 194 | """ 195 | batch_size = x.size(0) 196 | distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \ 197 | torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t() 198 | distmat.addmm_(1, -2, x, self.centers.t()) 199 | 200 | classes = torch.arange(self.num_classes).long() 201 | if self.use_gpu: classes = classes.cuda() 202 | labels = labels.unsqueeze(1).expand(batch_size, self.num_classes) 203 | mask = labels.eq(classes.expand(batch_size, self.num_classes)) 204 | 205 | dist = [] 206 | for i in range(batch_size): 207 | value = distmat[i][mask[i]] 208 | value = value.clamp(min=1e-12, max=1e+12) # for numerical stability 209 | dist.append(value) 210 | dist = torch.cat(dist) 211 | loss = dist.mean() 212 | 213 | return loss 214 | 215 | class RingLoss(nn.Module): 216 | """Ring loss. 217 | 218 | Reference: 219 | Zheng et al. Ring loss: Convex Feature Normalization for Face Recognition. CVPR 2018. 220 | """ 221 | def __init__(self, weight_ring=1.): 222 | super(RingLoss, self).__init__() 223 | self.radius = nn.Parameter(torch.ones(1, dtype=torch.float)) 224 | self.weight_ring = weight_ring 225 | 226 | def forward(self, x): 227 | l = ((x.norm(p=2, dim=1) - self.radius)**2).mean() 228 | return l * self.weight_ring 229 | 230 | class KLMutualLoss(nn.Module): 231 | def __init__(self): 232 | super(KLMutualLoss,self).__init__() 233 | self.kl_loss = nn.KLDivLoss(size_average=False) 234 | self.log_softmax = nn.functional.log_softmax 235 | self.softmax = nn.functional.softmax 236 | def forward(self, pred1, pred2): 237 | pred1 = self.log_softmax(pred1, dim=1) 238 | pred2 = self.softmax(pred2, dim=1) 239 | #loss = self.kl_loss(pred1, torch.autograd.Variable(pred2.data)) 240 | loss = self.kl_loss(pred1, pred2.detach()) 241 | # from IPython import embed 242 | # embed() 243 | #print(loss) 244 | return loss 245 | 246 | class MetricMutualLoss(nn.Module): 247 | def __init__(self): 248 | super(MetricMutualLoss, self).__init__() 249 | self.l2_loss = nn.MSELoss() 250 | 251 | def forward(self, dist1, dist2,pids): 252 | loss = self.l2_loss(dist1, dist2) 253 | # from IPython import embed 254 | # embed() 255 | print(loss) 256 | return loss 257 | 258 | 259 | if __name__ == '__main__': 260 | pass -------------------------------------------------------------------------------- /optimizers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | __all__ = ['init_optim'] 4 | 5 | def init_optim(optim, params, lr, weight_decay): 6 | if optim == 'adam': 7 | return torch.optim.Adam(params, lr=lr, weight_decay=weight_decay) 8 | elif optim == 'sgd': 9 | return torch.optim.SGD(params, lr=lr, momentum=0.9, weight_decay=weight_decay) 10 | elif optim == 'rmsprop': 11 | return torch.optim.RMSprop(params, lr=lr, momentum=0.9, weight_decay=weight_decay) 12 | else: 13 | raise KeyError("Unsupported optim: {}".format(optim)) -------------------------------------------------------------------------------- /train_class.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os 3 | import sys 4 | import time 5 | import datetime 6 | import argparse 7 | import os.path as osp 8 | import numpy as np 9 | 10 | import torch 11 | import torch.nn as nn 12 | from torch.utils.data import DataLoader 13 | import torch.backends.cudnn as cudnn 14 | from torch.optim import lr_scheduler 15 | 16 | import ResNet 17 | import losses 18 | import data_manager 19 | import transforms as T 20 | from dataset_loader import ImageDataset 21 | from utils import Logger 22 | from utils import AverageMeter, Logger, save_checkpoint 23 | from eval_metrics import evaluate 24 | from optimizers import init_optim 25 | 26 | # from IPython import embed 27 | import torchvision.transforms.functional as f 28 | 29 | parser = argparse.ArgumentParser(description='Train AlignedReID with cross entropy loss and triplet hard loss') 30 | # Datasets 31 | parser.add_argument('--root', type=str, default='data', help="root path to data directory") 32 | # parser.add_argument('-d', '--dataset', type=str, default='market1501', 33 | # choices=data_manager.get_names()) 34 | parser.add_argument('-j', '--workers', default=4, type=int, 35 | help="number of data loading workers (default: 4)") 36 | parser.add_argument('--height', type=int, default=256, 37 | help="height of an image (default: 256)") 38 | parser.add_argument('--width', type=int, default=128, 39 | help="width of an image (default: 128)") 40 | parser.add_argument('--split-id', type=int, default=0, help="split index") 41 | 42 | parser.add_argument('--use-metric-cuhk03', action='store_true', 43 | help="whether to use cuhk03-metric (default: False)") 44 | 45 | parser.add_argument('--labelsmooth', action='store_true', help="label smooth") 46 | parser.add_argument('--optim', type=str, default='adam', help="optimization algorithm (see optimizers.py)") 47 | parser.add_argument('--max-epoch', default=300, type=int, 48 | help="maximum epochs to run") 49 | parser.add_argument('--start-epoch', default=0, type=int, 50 | help="manual epoch number (useful on restarts)") 51 | parser.add_argument('--train-batch', default=32, type=int, 52 | help="train batch size") 53 | parser.add_argument('--test-batch', default=32, type=int, help="test batch size") 54 | parser.add_argument('--lr', '--learning-rate', default=0.0002, type=float, 55 | help="initial learning rate") 56 | parser.add_argument('--stepsize', default=150, type=int, 57 | help="stepsize to decay learning rate (>0 means this is enabled)") 58 | parser.add_argument('--gamma', default=0.1, type=float, 59 | help="learning rate decay") 60 | parser.add_argument('--weight-decay', default=5e-04, type=float, 61 | help="weight decay (default: 5e-04)") 62 | # triplet hard loss 63 | parser.add_argument('--margin', type=float, default=0.3, help="margin for triplet loss") 64 | parser.add_argument('--num-instances', type=int, default=4, 65 | help="number of instances per identity") 66 | parser.add_argument('--htri-only', action='store_true', default=False, 67 | help="if this is True, only htri loss is used in training") 68 | 69 | # parser.add_argument('-a', '--arch', type=str, default='resnet50', choices=ResNet.get_names()) 70 | 71 | parser.add_argument('--print-freq', type=int, default=10, help="print frequency") 72 | parser.add_argument('--seed', type=int, default=1, help="manual seed") 73 | parser.add_argument('--resume', type=str, default='', metavar='PATH') 74 | parser.add_argument('--evaluate', action='store_true', help="evaluation only") 75 | parser.add_argument('--eval-step', type=int, default=20, 76 | help="run evaluation for every N epochs (set to -1 to test after training)") 77 | parser.add_argument('--start-eval', type=int, default=0, help="start to evaluate after specific epoch") 78 | parser.add_argument('--save-dir', type=str, default='model_save') 79 | parser.add_argument('--use_cpu', action='store_true', help="use cpu") 80 | parser.add_argument('--gpu-devices', default='0', type=str, help='gpu device ids for CUDA_VISIBLE_DEVICES') 81 | parser.add_argument('--reranking',action= 'store_true', help= 'result re_ranking') 82 | 83 | parser.add_argument('--test_distance',type = str, default='global', help= 'test distance type') 84 | parser.add_argument('--unaligned',action= 'store_true', help= 'test local feature with unalignment') 85 | 86 | args = parser.parse_args() 87 | 88 | 89 | 90 | def main(): 91 | use_gpu = torch.cuda.is_available() 92 | if args.use_cpu: 93 | use_gpu = False 94 | if use_gpu: 95 | pin_memory = True 96 | else: 97 | pin_memory = False 98 | 99 | if not args.evaluate: #如果不是测试模式,存为log_train.txt 100 | sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) 101 | else: #否则存为log_test.txt 102 | sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) 103 | print("==========\nArgs:{}\n==========".format(args)) 104 | 105 | if use_gpu: 106 | print("Currently using GPU {}".format(args.gpu_devices)) 107 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices 108 | cudnn.benchmark = True 109 | torch.cuda.manual_seed_all(args.seed) 110 | else: 111 | print("Currently using CPU (GPU is highly recommended)") 112 | 113 | dataset = data_manager.Market1501(root = 'data') 114 | 115 | transform_train = T.Compose([ 116 | T.Random2DTranslation(args.height, args.width), 117 | T.RandomHorizontalFlip(), 118 | T.ToTensor(), 119 | T.Normalize(mean=[0.485, 0.456, 0.406], std =[0.229, 0.224, 0.225]), #归一化 120 | ]) 121 | transform_test = T.Compose([ 122 | T.Resize([args.height, args.width]), 123 | # T.Resize(args.height, args.width,interpolation=f._interpolation_modes_from_int(0)), 124 | T.ToTensor(), 125 | T.Normalize(mean=[0.485, 0.456, 0.406], std =[0.229, 0.224, 0.225]), #归一化 126 | ]) 127 | 128 | trainloader = DataLoader( 129 | ImageDataset(dataset.train, transform = transform_train), 130 | shuffle=True, 131 | batch_size = args.train_batch, 132 | num_workers=args.workers, 133 | pin_memory = pin_memory, 134 | drop_last=True, 135 | ) 136 | 137 | queryloader = DataLoader( 138 | ImageDataset(dataset.query, transform = transform_test), 139 | shuffle=False, 140 | batch_size = args.test_batch, num_workers=args.workers, 141 | pin_memory = pin_memory, 142 | drop_last=False, 143 | ) 144 | 145 | galleryloader = DataLoader( 146 | ImageDataset(dataset.gallery, transform = transform_test), 147 | shuffle=False, 148 | batch_size = args.test_batch, num_workers=args.workers, 149 | pin_memory = pin_memory, 150 | drop_last=False, 151 | ) 152 | 153 | 154 | print("Initializing model: {}".format('market_1501')) 155 | model = ResNet.ResNet50(num_classes=751) 156 | print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0)) 157 | 158 | criterion_class = nn.CrossEntropyLoss() 159 | optimizer = torch.optim.Adam(model.parameters(), lr = args.lr, weight_decay = args.weight_decay) #如果只修改其中某几层参数就这么写nn.Sequential([ResNet.conv1, ResNet.conv2]) 160 | 161 | if args.stepsize > 0: 162 | scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma = args.gamma) 163 | 164 | start_epoch = args.start_epoch 165 | 166 | if args.resume: 167 | print("Loading checkpoint from '{}'".format(args.resume)) 168 | checkpoint = torch.load(args.resume) 169 | model.load_state_dict(checkpoint['state_dict']) 170 | start_epoch = checkpoint['epoch'] 171 | 172 | if use_gpu: 173 | # model = nn.DataParallel(model).cuda() #多GPU训练 174 | model = model.cuda() 175 | 176 | 177 | if args.evaluate: 178 | print('Evaluate only!') 179 | model.test(model, queryloader, galleryloader, use_gpu) 180 | return 0 181 | 182 | start_time = time.time() 183 | train_time = 0 184 | best_rank1 = -np.inf 185 | best_epoch = 0 186 | 187 | print('start training') 188 | 189 | for epoch in range(start_epoch, args.max_epoch): 190 | start_train_time = time.time() 191 | train(epoch, model, criterion_class, optimizer, trainloader, use_gpu) 192 | 193 | # model._save_to_state_dict() #保存模型 194 | 195 | train_time += round(time.time() - start_train_time) 196 | 197 | if args.stepsize > 0:scheduler.step() #学习率衰减 198 | 199 | if (epoch + 1) > args.start_eval and args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (epoch + 1) == args.max_epoch: 200 | 201 | print("===> Test") 202 | rank1 = test(model, queryloader, galleryloader, use_gpu) 203 | is_best = rank1 > best_rank1 204 | if is_best: 205 | best_rank1 = rank1 206 | best_epoch = epoch+1 207 | 208 | 209 | if use_gpu: 210 | state_dict = model.state_dict() 211 | else: 212 | state_dict = model.state_dict() 213 | save_checkpoint({ 214 | 'state_dict': state_dict, 215 | 'rank1': rank1, 216 | 'epoch': epoch, 217 | }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) 218 | print("==>Best Rank-1:{:.1%},chieved at epoch{}".format(best_rank1, best_epoch)) 219 | 220 | elapsed = round(time.time() - start_time) 221 | elapsed = str(datetime.timedelta(seconds = elapsed)) 222 | train_time = str(datetime.timedelta(seconds=train_time)) 223 | print("Finished Total elapsed time (H:M:S):{}. Training time (H:M:S):{}.".format(elapsed,train_time)) 224 | 225 | def train(epoch, model, criterion_class, optimizer, trainloader, use_gpu): 226 | model.train() 227 | 228 | losses = AverageMeter() 229 | batch_time = AverageMeter() 230 | data_time = AverageMeter() 231 | 232 | end = time.time() 233 | 234 | for batch_idx, (imgs, pids, _) in enumerate(trainloader): 235 | if use_gpu: 236 | imgs, pids = imgs.cuda(), pids.cuda() 237 | data_time.update(time.time()-end) 238 | 239 | outputs = model(imgs) 240 | loss = criterion_class(outputs, pids) 241 | optimizer.zero_grad() 242 | loss.backward() 243 | optimizer.step() 244 | 245 | batch_time.update(time.time() - end) 246 | end = time.time() 247 | losses.update(loss.item(), pids.size(0)) 248 | 249 | if (batch_idx+1) % args.print_freq == 0: 250 | print('Epoch: [{0}][{1}/{2}]\t' 251 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 252 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 253 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( 254 | epoch+1, batch_idx+1, len(trainloader), batch_time=batch_time,data_time=data_time, 255 | loss=losses)) 256 | 257 | 258 | 259 | 260 | def test(model, queryloader, galleryloader, use_gpu, ranks=[1, 5, 10, 20]): 261 | batch_time = AverageMeter() 262 | 263 | model.eval() 264 | 265 | with torch.no_grad(): 266 | qf, q_pids, q_camids, lqf = [], [], [], [] 267 | for batch_idx, (imgs, pids, camids) in enumerate(queryloader): 268 | if use_gpu: imgs = imgs.cuda() 269 | 270 | end = time.time() 271 | features, local_features = model(imgs) 272 | batch_time.update(time.time() - end) 273 | 274 | features = features.data.cpu() 275 | local_features = local_features.data.cpu() 276 | qf.append(features) 277 | lqf.append(local_features) 278 | q_pids.extend(pids) 279 | q_camids.extend(camids) 280 | qf = torch.cat(qf, 0) 281 | lqf = torch.cat(lqf,0) 282 | q_pids = np.asarray(q_pids) 283 | q_camids = np.asarray(q_camids) 284 | 285 | print("Extracted features for query set, obtained {}-by-{} matrix".format(qf.size(0), qf.size(1))) 286 | 287 | gf, g_pids, g_camids, lgf = [], [], [], [] 288 | end = time.time() 289 | for batch_idx, (imgs, pids, camids) in enumerate(galleryloader): 290 | if use_gpu: imgs = imgs.cuda() 291 | 292 | end = time.time() 293 | features, local_features = model(imgs) 294 | batch_time.update(time.time() - end) 295 | 296 | features = features.data.cuda() 297 | local_features = local_features.data.cuda() 298 | gf.append(features) 299 | lgf.append(local_features) 300 | g_pids.extend(pids) 301 | g_camids.extend(camids) 302 | gf = torch.cat(gf, 0) 303 | lgf = torch.cat(lgf,0) 304 | g_pids = np.asarray(g_pids) 305 | g_camids = np.asarray(g_camids) 306 | 307 | 308 | print("Extracted features for gallery set, obtained {}-by-{} matrix".format(gf.size(0), gf.size(1))) 309 | 310 | print("==> BatchTime(s)/BatchSize(img): {:.3f}/{}".format(batch_time.avg, args.test_batch)) 311 | # feature normlization 312 | qf = (1. * qf / (torch.norm(qf, 2, dim = -1, keepdim=True).expand_as(qf) + 1e-12)).cuda() 313 | gf = (1. * gf / (torch.norm(gf, 2, dim = -1, keepdim=True).expand_as(gf) + 1e-12)).cuda() 314 | m, n = torch.tensor(qf.size(0)).cuda(), torch.tensor(gf.size(0)).cuda() 315 | distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ 316 | torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() 317 | distmat.addmm_(qf, gf.t(), beta=1, alpha=-2) 318 | distmat = distmat.cpu().numpy() 319 | 320 | if not args.test_distance== 'global': 321 | print("Only using global branch") 322 | from distance import low_memory_local_dist 323 | lqf = lqf.permute(0,2,1) 324 | lgf = lgf.permute(0,2,1) 325 | local_distmat = low_memory_local_dist(lqf.numpy(),lgf.numpy(),aligned= not args.unaligned) 326 | if args.test_distance== 'local': 327 | print("Only using local branch") 328 | distmat = local_distmat 329 | if args.test_distance == 'global_local': 330 | print("Using global and local branches") 331 | distmat = local_distmat+distmat 332 | 333 | print("Computing CMC and mAP") 334 | cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids) 335 | 336 | print("Results ----------") 337 | print("mAP: {:.1%}".format(mAP)) 338 | print("CMC curve") 339 | for r in ranks: 340 | print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1])) 341 | print("------------------") 342 | 343 | 344 | return cmc[0] 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | if __name__ == '__main__': 353 | main() 354 | -------------------------------------------------------------------------------- /transforms.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from torchvision.transforms import * 4 | from PIL import Image 5 | import random 6 | import torchvision.transforms as transforms 7 | 8 | class Random2DTranslation(object): #数据增广:先增加1/8,再随机裁剪 9 | def __init__(self, height, width, p = 0.5, interpolation = Image.BILINEAR): 10 | self.height = height 11 | self.width = width 12 | self.p = p 13 | self.interpolation = interpolation 14 | 15 | def __call__(self, img): 16 | if random.random() < self.p: 17 | return img.resize((self.width,self.height),self.interpolation) 18 | new_width, new_height = int(round(self.width*1.125)), int(round(self.height*1.125)) 19 | resize_img = img.resize((new_width, new_height), self.interpolation) 20 | x_maxrange = new_width - self.width 21 | y_maxrange = new_height - self.height 22 | x1 = int(round(random.uniform(0, x_maxrange))) 23 | y1 = int(round(random.uniform(0, y_maxrange))) 24 | 25 | cropped_img = resize_img.crop((x1, y1, x1+self.width, y1+self.height)) 26 | 27 | return cropped_img 28 | 29 | 30 | 31 | # #测试 32 | # if __name__ == '__main__': 33 | # img = Image.open('data/Market-1501-v15.09.15/bounding_box_test/-1_c1s1_000401_03.jpg') 34 | # transforms = Random2DTranslation(256,128,0.5) 35 | # img_t = transforms(img) 36 | # import matplotlib.pyplot as plt 37 | 38 | 39 | # plt.figure(12) 40 | # plt.subplot(121) 41 | # plt.imshow(img) 42 | # plt.subplot(122) 43 | # plt.imshow(img_t) 44 | # plt.show() 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os 3 | import sys 4 | import errno 5 | import shutil 6 | import json 7 | import os.path as osp 8 | from PIL import Image 9 | import matplotlib.pyplot as plt 10 | import cv2 11 | import numpy as np 12 | from numpy import array,argmin 13 | 14 | import torch 15 | 16 | def mkdir_if_missing(directory): 17 | if not osp.exists(directory): 18 | try: 19 | os.makedirs(directory) 20 | except OSError as e: 21 | if e.errno != errno.EEXIST: 22 | raise 23 | 24 | class AverageMeter(object): 25 | """Computes and stores the average and current value. 26 | 27 | Code imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 28 | """ 29 | def __init__(self): 30 | self.reset() 31 | 32 | def reset(self): 33 | self.val = 0 34 | self.avg = 0 35 | self.sum = 0 36 | self.count = 0 37 | 38 | def update(self, val, n=1): 39 | self.val = val 40 | self.sum += val * n 41 | self.count += n 42 | self.avg = self.sum / self.count 43 | 44 | def save_checkpoint(state, is_best, fpath='checkpoint.pth.tar'): 45 | mkdir_if_missing(osp.dirname(fpath)) 46 | torch.save(state, fpath) 47 | if is_best: 48 | shutil.copy(fpath, osp.join(osp.dirname(fpath), 'best_model.pth.tar')) 49 | 50 | class Logger(object): 51 | """ 52 | Write console output to external text file. 53 | Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/logging.py. 54 | """ 55 | def __init__(self, fpath=None): 56 | self.console = sys.stdout 57 | self.file = None 58 | if fpath is not None: 59 | mkdir_if_missing(os.path.dirname(fpath)) 60 | self.file = open(fpath, 'w') 61 | 62 | def __del__(self): 63 | self.close() 64 | 65 | def __enter__(self): 66 | pass 67 | 68 | def __exit__(self, *args): 69 | self.close() 70 | 71 | def write(self, msg): 72 | self.console.write(msg) 73 | if self.file is not None: 74 | self.file.write(msg) 75 | 76 | def flush(self): 77 | self.console.flush() 78 | if self.file is not None: 79 | self.file.flush() 80 | os.fsync(self.file.fileno()) 81 | 82 | def close(self): 83 | self.console.close() 84 | if self.file is not None: 85 | self.file.close() 86 | 87 | def read_json(fpath): 88 | with open(fpath, 'r') as f: 89 | obj = json.load(f) 90 | return obj 91 | 92 | def write_json(obj, fpath): 93 | mkdir_if_missing(osp.dirname(fpath)) 94 | with open(fpath, 'w') as f: 95 | json.dump(obj, f, indent=4, separators=(',', ': ')) 96 | 97 | def _traceback(D): 98 | i,j = array(D.shape)-1 99 | p,q = [i],[j] 100 | while (i>0) or (j>0): 101 | tb = argmin((D[i,j-1], D[i-1,j])) 102 | if tb == 0: 103 | j -= 1 104 | else: #(tb==1) 105 | i -= 1 106 | p.insert(0,i) 107 | q.insert(0,j) 108 | return array(p), array(q) 109 | 110 | def dtw(dist_mat): 111 | m, n = dist_mat.shape[:2] 112 | dist = np.zeros_like(dist_mat) 113 | for i in range(m): 114 | for j in range(n): 115 | if (i == 0) and (j == 0): 116 | dist[i, j] = dist_mat[i, j] 117 | elif (i == 0) and (j > 0): 118 | dist[i, j] = dist[i, j - 1] + dist_mat[i, j] 119 | elif (i > 0) and (j == 0): 120 | dist[i, j] = dist[i - 1, j] + dist_mat[i, j] 121 | else: 122 | dist[i, j] = \ 123 | np.min(np.stack([dist[i - 1, j], dist[i, j - 1]], axis=0), axis=0) \ 124 | + dist_mat[i, j] 125 | path = _traceback(dist) 126 | return dist[-1,-1]/sum(dist.shape), dist, path 127 | 128 | def read_image(img_path): 129 | got_img = False 130 | if not osp.exists(img_path): 131 | raise IOError("{} does not exist".format(img_path)) 132 | while not got_img: 133 | try: 134 | img = Image.open(img_path).convert('RGB') 135 | got_img = True 136 | except IOError: 137 | print("IOError incurred when reading '{}'. Will Redo. Don't worry. Just chill".format(img_path)) 138 | pass 139 | return img 140 | 141 | def img_to_tensor(img,transform): 142 | img = transform(img) 143 | img = img.unsqueeze(0) 144 | return img 145 | 146 | def show_feature(x): 147 | for j in range(len(x)): 148 | for i in range(len(64)): 149 | ax = plt.subplot(4,16,i+1) 150 | ax.set_title('No #{}'.format(i)) 151 | ax.axis('off') 152 | plt.imshow(x[j].cpu().data.numpy()[0,i,:,:],cmap='jet') 153 | plt.show() 154 | 155 | def feat_flatten(feat): 156 | shp = feat.shape 157 | feat = feat.reshape(shp[0] * shp[1], shp[2]) 158 | return feat 159 | 160 | def show_similar(local_img_path, img_path, similarity, bbox): 161 | img1 = cv2.imread(local_img_path) 162 | img2 = cv2.imread(img_path) 163 | img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB) 164 | img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB) 165 | img1 = cv2.resize(img1, (64, 128)) 166 | img2 = cv2.resize(img2, (64, 128)) 167 | cv2.rectangle(img1, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 1) 168 | 169 | p = np.where(similarity == np.max(similarity)) 170 | y, x = p[0][0], p[1][0] 171 | cv2.rectangle(img2, (x - bbox[2] / 2, y - bbox[3] / 2), (x + bbox[2] / 2, y + bbox[3] / 2), (0, 255, 0), 1) 172 | plt.subplot(1, 3, 1).set_title('patch') 173 | plt.imshow(img1) 174 | plt.subplot(1, 3, 2).set_title(('max similarity: ' + str(np.max(similarity)))) 175 | plt.imshow(img2) 176 | plt.subplot(1, 3, 3).set_title('similarity') 177 | plt.imshow(similarity) 178 | 179 | def show_alignedreid(local_img_path, img_path, dist): 180 | def drow_line(img, similarity): 181 | for i in range(1, len(similarity)): 182 | cv2.line(img, (0, i*16), (63, i*16), color=(0,255,0)) 183 | cv2.line(img, (96, i*16), (160, i*16), color=(0,255,0)) 184 | def drow_path(img, path): 185 | for i in range(len(path[0])): 186 | cv2.line(img, (64, 8+16*path[0][i]), (96,8+16*path[1][i]), color=(255,255,0)) 187 | img1 = cv2.imread(local_img_path) 188 | img2 = cv2.imread(img_path) 189 | img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB) 190 | img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB) 191 | img1 = cv2.resize(img1, (64,128)) 192 | img2 = cv2.resize(img2, (64,128)) 193 | img = np.zeros((128,160,3)).astype(img1.dtype) 194 | img[:,:64,:] = img1 195 | img[:,-64:,:] = img2 196 | drow_line(img, dist) 197 | d,D,sp = dtw(dist) 198 | origin_dist = np.mean(np.diag(dist)) 199 | drow_path(img, sp) 200 | plt.subplot(1,2,1).set_title('Aligned distance: %.4f \n Original distance: %.4f' %(d,origin_dist)) 201 | plt.subplot(1,2,1).set_xlabel('Aligned Result') 202 | plt.imshow(img) 203 | plt.subplot(1,2,2).set_title('Distance Map') 204 | plt.subplot(1,2,2).set_xlabel('Right Image') 205 | plt.subplot(1,2,2).set_ylabel('Left Image') 206 | plt.imshow(dist) 207 | plt.subplots_adjust(bottom=0.1, left=0.075, right=0.85, top=0.9) 208 | cax = plt.axes([0.9, 0.25, 0.025, 0.5]) 209 | plt.colorbar(cax = cax) 210 | plt.show() 211 | 212 | def merge_feature(feature_list, shp, sample_rate = None): 213 | def pre_process(torch_feature_map): 214 | numpy_feature_map = torch_feature_map.cpu().data.numpy()[0] 215 | numpy_feature_map = numpy_feature_map.transpose(1,2,0) 216 | shp = numpy_feature_map.shape[:2] 217 | return numpy_feature_map, shp 218 | def resize_as(tfm, shp): 219 | nfm, shp2 = pre_process(tfm) 220 | scale = shp[0]/shp2[0] 221 | nfm1 = nfm.repeat(scale, axis = 0).repeat(scale, axis=1) 222 | return nfm1 223 | final_nfm = resize_as(feature_list[0], shp) 224 | for i in range(1, len(feature_list)): 225 | temp_nfm = resize_as(feature_list[i],shp) 226 | final_nfm = np.concatenate((final_nfm, temp_nfm),axis =-1) 227 | if sample_rate > 0: 228 | final_nfm = final_nfm[0:-1:sample_rate, 0:-1,sample_rate, :] 229 | return final_nfm --------------------------------------------------------------------------------