├── reid ├── lib │ ├── __init__.py │ ├── normalize.py │ ├── utils.py │ └── custom_transforms.py ├── utils │ ├── data │ │ ├── __init__.py │ │ ├── sampler.py │ │ ├── preprocessor.py │ │ └── transforms.py │ ├── osutils.py │ ├── meters.py │ ├── __init__.py │ ├── logging.py │ ├── serialization.py │ └── misc.py ├── datasets │ ├── __init__.py │ └── domain_adaptation.py ├── loss │ ├── __init__.py │ └── invariance.py ├── __init__.py ├── evaluation_metrics │ ├── __init__.py │ ├── classification.py │ └── ranking.py ├── models │ ├── __init__.py │ └── resnet.py ├── trainers.py └── evaluators.py ├── data └── ReadME.md ├── imgs ├── .DS_Store ├── msmt17.png ├── framework.png ├── 31520150150725 └── market_duke.png ├── README.md ├── main.py └── LICENSE /reid/lib/__init__.py: -------------------------------------------------------------------------------- 1 | # nothing 2 | -------------------------------------------------------------------------------- /data/ReadME.md: -------------------------------------------------------------------------------- 1 | add data to this folder 2 | -------------------------------------------------------------------------------- /imgs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhunzhong07/ECN/HEAD/imgs/.DS_Store -------------------------------------------------------------------------------- /imgs/msmt17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhunzhong07/ECN/HEAD/imgs/msmt17.png -------------------------------------------------------------------------------- /imgs/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhunzhong07/ECN/HEAD/imgs/framework.png -------------------------------------------------------------------------------- /imgs/31520150150725: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhunzhong07/ECN/HEAD/imgs/31520150150725 -------------------------------------------------------------------------------- /imgs/market_duke.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhunzhong07/ECN/HEAD/imgs/market_duke.png -------------------------------------------------------------------------------- /reid/utils/data/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .preprocessor import Preprocessor 4 | -------------------------------------------------------------------------------- /reid/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .domain_adaptation import DA 3 | 4 | 5 | __all__ = [ 6 | 'DA', 7 | ] 8 | -------------------------------------------------------------------------------- /reid/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .invariance import InvNet 4 | 5 | __all__ = [ 6 | 'InvNet', 7 | ] 8 | -------------------------------------------------------------------------------- /reid/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import datasets 4 | from . import evaluation_metrics 5 | from . import loss 6 | from . import models 7 | from . import utils 8 | from . import evaluators 9 | from . import trainers 10 | -------------------------------------------------------------------------------- /reid/evaluation_metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .classification import accuracy 4 | from .ranking import cmc, mean_ap, map_cmc 5 | 6 | __all__ = [ 7 | 'accuracy', 8 | 'cmc', 9 | 'mean_ap', 10 | 'map_cmc', 11 | ] 12 | -------------------------------------------------------------------------------- /reid/utils/osutils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os 3 | import errno 4 | 5 | 6 | def mkdir_if_missing(dir_path): 7 | try: 8 | os.makedirs(dir_path) 9 | except OSError as e: 10 | if e.errno != errno.EEXIST: 11 | raise 12 | -------------------------------------------------------------------------------- /reid/lib/normalize.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch import nn 4 | 5 | class Normalize(nn.Module): 6 | 7 | def __init__(self, power=2): 8 | super(Normalize, self).__init__() 9 | self.power = power 10 | 11 | def forward(self, x): 12 | norm = x.pow(self.power).sum(1, keepdim=True).pow(1./self.power) 13 | out = x.div(norm) 14 | return out 15 | -------------------------------------------------------------------------------- /reid/lib/utils.py: -------------------------------------------------------------------------------- 1 | class AverageMeter(object): 2 | """Computes and stores the average and current value""" 3 | def __init__(self): 4 | self.reset() 5 | 6 | def reset(self): 7 | self.val = 0 8 | self.avg = 0 9 | self.sum = 0 10 | self.count = 0 11 | 12 | def update(self, val, n=1): 13 | self.val = val 14 | self.sum += val * n 15 | self.count += n 16 | self.avg = self.sum / self.count 17 | -------------------------------------------------------------------------------- /reid/utils/meters.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | 4 | class AverageMeter(object): 5 | """Computes and stores the average and current value""" 6 | 7 | def __init__(self): 8 | self.val = 0 9 | self.avg = 0 10 | self.sum = 0 11 | self.count = 0 12 | 13 | def reset(self): 14 | self.val = 0 15 | self.avg = 0 16 | self.sum = 0 17 | self.count = 0 18 | 19 | def update(self, val, n=1): 20 | self.val = val 21 | self.sum += val * n 22 | self.count += n 23 | self.avg = self.sum / self.count 24 | -------------------------------------------------------------------------------- /reid/evaluation_metrics/classification.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from ..utils import to_torch 4 | 5 | 6 | def accuracy(output, target, topk=(1,)): 7 | output, target = to_torch(output), to_torch(target) 8 | maxk = max(topk) 9 | batch_size = target.size(0) 10 | 11 | _, pred = output.topk(maxk, 1, True, True) 12 | pred = pred.t() 13 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 14 | 15 | ret = [] 16 | for k in topk: 17 | correct_k = correct[:k].view(-1).float().sum(dim=0, keepdim=True) 18 | ret.append(correct_k.mul_(1. / batch_size)) 19 | return ret 20 | -------------------------------------------------------------------------------- /reid/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import torch 4 | 5 | 6 | def to_numpy(tensor): 7 | if torch.is_tensor(tensor): 8 | return tensor.cpu().numpy() 9 | elif type(tensor).__module__ != 'numpy': 10 | raise ValueError("Cannot convert {} to numpy array" 11 | .format(type(tensor))) 12 | return tensor 13 | 14 | 15 | def to_torch(ndarray): 16 | if type(ndarray).__module__ == 'numpy': 17 | return torch.from_numpy(ndarray) 18 | elif not torch.is_tensor(ndarray): 19 | raise ValueError("Cannot convert {} to torch tensor" 20 | .format(type(ndarray))) 21 | return ndarray 22 | -------------------------------------------------------------------------------- /reid/utils/logging.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os 3 | import sys 4 | 5 | from .osutils import mkdir_if_missing 6 | 7 | 8 | class Logger(object): 9 | def __init__(self, fpath=None): 10 | self.console = sys.stdout 11 | self.file = None 12 | if fpath is not None: 13 | mkdir_if_missing(os.path.dirname(fpath)) 14 | self.file = open(fpath, 'w') 15 | 16 | def __del__(self): 17 | self.close() 18 | 19 | def __enter__(self): 20 | pass 21 | 22 | def __exit__(self, *args): 23 | self.close() 24 | 25 | def write(self, msg): 26 | self.console.write(msg) 27 | if self.file is not None: 28 | self.file.write(msg) 29 | 30 | def flush(self): 31 | self.console.flush() 32 | if self.file is not None: 33 | self.file.flush() 34 | os.fsync(self.file.fileno()) 35 | 36 | def close(self): 37 | self.console.close() 38 | if self.file is not None: 39 | self.file.close() 40 | -------------------------------------------------------------------------------- /reid/utils/data/sampler.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from collections import defaultdict 3 | 4 | import numpy as np 5 | import torch 6 | from torch.utils.data.sampler import ( 7 | Sampler, SequentialSampler, RandomSampler, SubsetRandomSampler, 8 | WeightedRandomSampler) 9 | 10 | 11 | class RandomIdentitySampler(Sampler): 12 | def __init__(self, data_source, num_instances=1): 13 | self.data_source = data_source 14 | self.num_instances = num_instances 15 | self.index_dic = defaultdict(list) 16 | for index, (_, pid, _) in enumerate(data_source): 17 | self.index_dic[pid].append(index) 18 | self.pids = list(self.index_dic.keys()) 19 | self.num_samples = len(self.pids) 20 | 21 | def __len__(self): 22 | return self.num_samples * self.num_instances 23 | 24 | def __iter__(self): 25 | indices = torch.randperm(self.num_samples) 26 | ret = [] 27 | for i in indices: 28 | pid = self.pids[i] 29 | t = self.index_dic[pid] 30 | if len(t) >= self.num_instances: 31 | t = np.random.choice(t, size=self.num_instances, replace=False) 32 | else: 33 | t = np.random.choice(t, size=self.num_instances, replace=True) 34 | ret.extend(t) 35 | return iter(ret) 36 | -------------------------------------------------------------------------------- /reid/utils/serialization.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import json 3 | import os.path as osp 4 | import shutil 5 | 6 | import torch 7 | from torch.nn import Parameter 8 | 9 | from .osutils import mkdir_if_missing 10 | 11 | 12 | def save_checkpoint(state, fpath='checkpoint.pth.tar'): 13 | mkdir_if_missing(osp.dirname(fpath)) 14 | torch.save(state, fpath) 15 | 16 | 17 | def load_checkpoint(fpath): 18 | if osp.isfile(fpath): 19 | checkpoint = torch.load(fpath) 20 | print("=> Loaded checkpoint '{}'".format(fpath)) 21 | return checkpoint 22 | else: 23 | raise ValueError("=> No checkpoint found at '{}'".format(fpath)) 24 | 25 | 26 | def copy_state_dict(state_dict, model, strip=None): 27 | tgt_state = model.state_dict() 28 | copied_names = set() 29 | for name, param in state_dict.items(): 30 | if strip is not None and name.startswith(strip): 31 | name = name[len(strip):] 32 | if name not in tgt_state: 33 | continue 34 | if isinstance(param, Parameter): 35 | param = param.data 36 | if param.size() != tgt_state[name].size(): 37 | print('mismatch:', name, param.size(), tgt_state[name].size()) 38 | continue 39 | tgt_state[name].copy_(param) 40 | copied_names.add(name) 41 | 42 | missing = set(tgt_state.keys()) - copied_names 43 | if len(missing) > 0: 44 | print("missing keys in state_dict:", missing) 45 | 46 | return model 47 | -------------------------------------------------------------------------------- /reid/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .resnet import * 4 | 5 | __factory = { 6 | 'resnet18': resnet18, 7 | 'resnet34': resnet34, 8 | 'resnet50': resnet50, 9 | 'resnet101': resnet101, 10 | 'resnet152': resnet152, 11 | } 12 | 13 | 14 | def names(): 15 | return sorted(__factory.keys()) 16 | 17 | 18 | def create(name, *args, **kwargs): 19 | """ 20 | Create a model instance. 21 | 22 | Parameters 23 | ---------- 24 | name : str 25 | Model name. Can be one of 'inception', 'resnet18', 'resnet34', 26 | 'resnet50', 'resnet101', and 'resnet152'. 27 | pretrained : bool, optional 28 | Only applied for 'resnet*' models. If True, will use ImageNet pretrained 29 | model. Default: True 30 | cut_at_pooling : bool, optional 31 | If True, will cut the model before the last global pooling layer and 32 | ignore the remaining kwargs. Default: False 33 | num_features : int, optional 34 | If positive, will append a Linear layer after the global pooling layer, 35 | with this number of output units, followed by a BatchNorm layer. 36 | Otherwise these layers will not be appended. Default: 256 for 37 | 'inception', 0 for 'resnet*' 38 | norm : bool, optional 39 | If True, will normalize the feature to be unit L2-norm for each sample. 40 | Otherwise will append a ReLU layer after the above Linear layer if 41 | num_features > 0. Default: False 42 | dropout : float, optional 43 | If positive, will append a Dropout layer with this dropout rate. 44 | Default: 0 45 | num_classes : int, optional 46 | If positive, will append a Linear layer at the end as the classifier 47 | with this number of output units. Default: 0 48 | """ 49 | if name not in __factory: 50 | raise KeyError("Unknown model:", name) 51 | return __factory[name](*args, **kwargs) 52 | -------------------------------------------------------------------------------- /reid/utils/data/preprocessor.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import os.path as osp 3 | from PIL import Image 4 | from torchvision.transforms import functional as F 5 | import torch 6 | 7 | 8 | class Preprocessor(object): 9 | def __init__(self, dataset, root=None, transform=None): 10 | super(Preprocessor, self).__init__() 11 | self.dataset = dataset 12 | self.root = root 13 | self.transform = transform 14 | 15 | def __len__(self): 16 | return len(self.dataset) 17 | 18 | def __getitem__(self, indices): 19 | if isinstance(indices, (tuple, list)): 20 | return [self._get_single_item(index) for index in indices] 21 | return self._get_single_item(indices) 22 | 23 | def _get_single_item(self, index): 24 | fname, pid, camid = self.dataset[index] 25 | fpath = fname 26 | if self.root is not None: 27 | fpath = osp.join(self.root, fname) 28 | img = Image.open(fpath).convert('RGB') 29 | if self.transform is not None: 30 | img = self.transform(img) 31 | return img, fname, pid, camid 32 | 33 | 34 | class UnsupervisedCamStylePreprocessor(object): 35 | def __init__(self, dataset, root=None, camstyle_root=None, num_cam=6, transform=None): 36 | super(UnsupervisedCamStylePreprocessor, self).__init__() 37 | self.dataset = dataset 38 | self.root = root 39 | self.transform = transform 40 | self.num_cam = num_cam 41 | self.camstyle_root = camstyle_root 42 | 43 | def __len__(self): 44 | return len(self.dataset) 45 | 46 | def __getitem__(self, indices): 47 | if isinstance(indices, (tuple, list)): 48 | return [self._get_single_item(index) for index in indices] 49 | return self._get_single_item(indices) 50 | 51 | def _get_single_item(self, index): 52 | fname, pid, camid = self.dataset[index] 53 | sel_cam = torch.randperm(self.num_cam)[0] 54 | if sel_cam == camid: 55 | fpath = osp.join(self.root, fname) 56 | img = Image.open(fpath).convert('RGB') 57 | else: 58 | if 'msmt' in self.root: 59 | fname = fname[:-4] + '_fake_' + str(sel_cam.numpy() + 1) + '.jpg' 60 | else: 61 | fname = fname[:-4] + '_fake_' + str(camid + 1) + 'to' + str(sel_cam.numpy() + 1) + '.jpg' 62 | fpath = osp.join(self.camstyle_root, fname) 63 | img = Image.open(fpath).convert('RGB') 64 | if self.transform is not None: 65 | img = self.transform(img) 66 | return img, fname, pid, index 67 | 68 | -------------------------------------------------------------------------------- /reid/utils/data/transforms.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from torchvision.transforms import * 4 | from PIL import Image 5 | import random 6 | import math 7 | 8 | 9 | class RectScale(object): 10 | def __init__(self, height, width, interpolation=Image.BILINEAR): 11 | self.height = height 12 | self.width = width 13 | self.interpolation = interpolation 14 | 15 | def __call__(self, img): 16 | w, h = img.size 17 | if h == self.height and w == self.width: 18 | return img 19 | return img.resize((self.width, self.height), self.interpolation) 20 | 21 | 22 | class RandomSizedRectCrop(object): 23 | def __init__(self, height, width, interpolation=Image.BILINEAR): 24 | self.height = height 25 | self.width = width 26 | self.interpolation = interpolation 27 | 28 | def __call__(self, img): 29 | for attempt in range(10): 30 | area = img.size[0] * img.size[1] 31 | target_area = random.uniform(0.64, 1.0) * area 32 | aspect_ratio = random.uniform(2, 3) 33 | 34 | h = int(round(math.sqrt(target_area * aspect_ratio))) 35 | w = int(round(math.sqrt(target_area / aspect_ratio))) 36 | 37 | if w <= img.size[0] and h <= img.size[1]: 38 | x1 = random.randint(0, img.size[0] - w) 39 | y1 = random.randint(0, img.size[1] - h) 40 | 41 | img = img.crop((x1, y1, x1 + w, y1 + h)) 42 | assert(img.size == (w, h)) 43 | 44 | return img.resize((self.width, self.height), self.interpolation) 45 | 46 | # Fallback 47 | scale = RectScale(self.height, self.width, 48 | interpolation=self.interpolation) 49 | return scale(img) 50 | 51 | 52 | class RandomErasing(object): 53 | def __init__(self, EPSILON=0.5, mean=[0.485, 0.456, 0.406]): 54 | self.EPSILON = EPSILON 55 | self.mean = mean 56 | 57 | def __call__(self, img): 58 | 59 | if random.uniform(0, 1) > self.EPSILON: 60 | return img 61 | 62 | for attempt in range(100): 63 | area = img.size()[1] * img.size()[2] 64 | 65 | target_area = random.uniform(0.02, 0.2) * area 66 | aspect_ratio = random.uniform(0.3, 3) 67 | 68 | h = int(round(math.sqrt(target_area * aspect_ratio))) 69 | w = int(round(math.sqrt(target_area / aspect_ratio))) 70 | 71 | if w <= img.size()[2] and h <= img.size()[1]: 72 | x1 = random.randint(0, img.size()[1] - h) 73 | y1 = random.randint(0, img.size()[2] - w) 74 | img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] 75 | img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] 76 | img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] 77 | 78 | return img 79 | 80 | return img 81 | -------------------------------------------------------------------------------- /reid/loss/invariance.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn, autograd 4 | from torch.autograd import Variable, Function 5 | import numpy as np 6 | import math 7 | 8 | 9 | class ExemplarMemory(Function): 10 | def __init__(self, em, alpha=0.01): 11 | super(ExemplarMemory, self).__init__() 12 | self.em = em 13 | self.alpha = alpha 14 | 15 | def forward(self, inputs, targets): 16 | self.save_for_backward(inputs, targets) 17 | outputs = inputs.mm(self.em.t()) 18 | return outputs 19 | 20 | def backward(self, grad_outputs): 21 | inputs, targets = self.saved_tensors 22 | grad_inputs = None 23 | if self.needs_input_grad[0]: 24 | grad_inputs = grad_outputs.mm(self.em) 25 | for x, y in zip(inputs, targets): 26 | self.em[y] = self.alpha * self.em[y] + (1. - self.alpha) * x 27 | self.em[y] /= self.em[y].norm() 28 | return grad_inputs, None 29 | 30 | 31 | # Invariance learning loss 32 | class InvNet(nn.Module): 33 | def __init__(self, num_features, num_classes, beta=0.05, knn=6, alpha=0.01): 34 | super(InvNet, self).__init__() 35 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 36 | self.num_features = num_features 37 | self.num_classes = num_classes 38 | self.alpha = alpha # Memory update rate 39 | self.beta = beta # Temperature fact 40 | self.knn = knn # Knn for neighborhood invariance 41 | 42 | # Exemplar memory 43 | self.em = nn.Parameter(torch.zeros(num_classes, num_features)) 44 | 45 | def forward(self, inputs, targets, epoch=None): 46 | 47 | alpha = self.alpha * epoch 48 | inputs = ExemplarMemory(self.em, alpha=alpha)(inputs, targets) 49 | 50 | inputs /= self.beta 51 | if self.knn > 0 and epoch > 4: 52 | # With neighborhood invariance 53 | loss = self.smooth_loss(inputs, targets) 54 | else: 55 | # Without neighborhood invariance 56 | loss = F.cross_entropy(inputs, targets) 57 | return loss 58 | 59 | def smooth_loss(self, inputs, targets): 60 | targets = self.smooth_hot(inputs.detach().clone(), targets.detach().clone(), self.knn) 61 | outputs = F.log_softmax(inputs, dim=1) 62 | loss = - (targets * outputs) 63 | loss = loss.sum(dim=1) 64 | loss = loss.mean(dim=0) 65 | return loss 66 | 67 | def smooth_hot(self, inputs, targets, k=6): 68 | # Sort 69 | _, index_sorted = torch.sort(inputs, dim=1, descending=True) 70 | 71 | ones_mat = torch.ones(targets.size(0), k).to(self.device) 72 | targets = torch.unsqueeze(targets, 1) 73 | targets_onehot = torch.zeros(inputs.size()).to(self.device) 74 | 75 | weights = F.softmax(ones_mat, dim=1) 76 | targets_onehot.scatter_(1, index_sorted[:, 0:k], ones_mat * weights) 77 | targets_onehot.scatter_(1, targets, float(1)) 78 | 79 | return targets_onehot 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## [Invariance Matters: Exemplar Memory for Domain Adaptive Person Re-identification CVPR 2019](https://arxiv.org/abs/1904.01990) 2 | 3 | ![](imgs/framework.png) 4 | 5 | 6 | ### Preparation 7 | 8 | #### Requirements: Python=3.6 and Pytorch>=1.0.0 9 | 10 | 1. Install [Pytorch](http://pytorch.org/) 11 | 12 | 2. Download dataset 13 | 14 | - Market-1501 [[BaiduYun]](http://pan.baidu.com/s/1ntIi2Op) [[GoogleDriver]](https://drive.google.com/file/d/0B8-rUzbwVRk0c054eEozWG9COHM/view?usp=sharing) CamStyle (generated by CycleGAN) [[GoogleDriver]](https://drive.google.com/open?id=1klY3nBS2sD4pxcyUbSlhtfTk9ButMNW1) [[BaiduYun]](https://pan.baidu.com/s/1NHv1UfI9bKo1XrDx8g70ow) (password: 6bu4) 15 | 16 | - DukeMTMC-reID [[BaiduYun]](https://pan.baidu.com/s/1jS0XM7Var5nQGcbf9xUztw) (password: bhbh) [[GoogleDriver]](https://drive.google.com/open?id=1jjE85dRCMOgRtvJ5RQV9-Afs-2_5dY3O) CamStyle (generated by CycleGAN) [[GoogleDriver]](https://drive.google.com/open?id=1tNc-7C3mpSFa_xOti2PmUVXTEiqmJlUI) [[BaiduYun]](https://pan.baidu.com/s/1NHv1UfI9bKo1XrDx8g70ow) (password: 6bu4) 17 | 18 | - MSMT17 + CamStyle (generated by StarGAN) [[BaiduYun]](https://pan.baidu.com/s/1NHv1UfI9bKo1XrDx8g70ow) (password: 6bu4) [[GoogleDriver]](https://drive.google.com/open?id=11I7p0Dr-TCC9TnvY8rWp0B47gCB3K0T4) We reformulate the structure of MSMT17 the same as Market-1501. 19 | 20 | - Unzip each dataset and corresponding CamStyle under 'ECN/data/' 21 | 22 | Ensure the File structure is as follow: 23 | 24 | ``` 25 | ECN/data 26 | │ 27 | └───market OR duke OR msmt17 28 | │ 29 | └───bounding_box_train 30 | │ 31 | └───bounding_box_test 32 | │ 33 | └───bounding_box_train_camstyle 34 | | 35 | └───query 36 | ``` 37 | 38 | ### Training and test domain adaptation model for person re-ID 39 | 40 | ```Shell 41 | # For Duke to Market-1501 42 | python main.py -s duke -t market --logs-dir logs/duke2market-ECN 43 | 44 | # For Market-1501 to Duke 45 | python main.py -s market -t duke --logs-dir logs/market2duke-ECN 46 | 47 | # For Market-1501 to MSMT17 48 | python main.py -s market -t msmt17 --logs-dir logs/market2msmt17-ECN --re 0 49 | 50 | # For Duke to MSMT17 51 | python main.py -s duke -t msmt17 --logs-dir logs/duke2msmt17-ECN --re 0 52 | ``` 53 | 54 | 55 | ### Results 56 | 57 | ![](imgs/market_duke.png) 58 | 59 | ![](imgs/msmt17.png) 60 | 61 | 62 | ### References 63 | 64 | - [1] Our code is conducted based on [open-reid](https://github.com/Cysu/open-reid) 65 | 66 | - [2] Camera Style Adaptation for Person Re-identification. CVPR 2018. 67 | 68 | - [3] Generalizing A Person Retrieval Model Hetero- and Homogeneously. ECCV 2018. 69 | 70 | 71 | ### Citation 72 | 73 | If you find this code useful in your research, please consider citing: 74 | 75 | @inproceedings{zhong2019invariance, 76 | title={Invariance Matters: Exemplar Memory for Domain Adaptive Person Re-identification}, 77 | author={Zhong, Zhun and Zheng, Liang and Luo, Zhiming and Li, Shaozi and Yang, Yi}, 78 | booktitle={Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 79 | year={2019}, 80 | } 81 | 82 | 83 | ### Contact me 84 | 85 | If you have any questions about this code, please do not hesitate to contact me. 86 | 87 | [Zhun Zhong](http://zhunzhong.site) 88 | 89 | -------------------------------------------------------------------------------- /reid/datasets/domain_adaptation.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import os.path as osp 3 | import numpy as np 4 | import pdb 5 | from glob import glob 6 | import re 7 | 8 | 9 | class DA(object): 10 | 11 | def __init__(self, data_dir, source, target): 12 | 13 | # source / target image root 14 | self.source_images_dir = osp.join(data_dir, source) 15 | self.target_images_dir = osp.join(data_dir, target) 16 | # training image dir 17 | self.source_train_path = 'bounding_box_train' 18 | self.target_train_path = 'bounding_box_train' 19 | self.target_train_camstyle_path = 'bounding_box_train_camstyle' 20 | self.gallery_path = 'bounding_box_test' 21 | self.query_path = 'query' 22 | 23 | self.source_train, self.target_train, self.query, self.gallery = [], [], [], [] 24 | self.num_train_ids, self.num_query_ids, self.num_gallery_ids = 0, 0, 0 25 | 26 | self.cam_dict = self.set_cam_dict() 27 | self.target_num_cam = self.cam_dict[target] 28 | self.source_num_cam = self.cam_dict[source] 29 | 30 | self.load() 31 | 32 | def set_cam_dict(self): 33 | cam_dict = {} 34 | cam_dict['market'] = 6 35 | cam_dict['duke'] = 8 36 | cam_dict['msmt17'] = 15 37 | return cam_dict 38 | 39 | def preprocess(self, images_dir, path, relabel=True): 40 | pattern = re.compile(r'([-\d]+)_c([-\d]+)') 41 | all_pids = {} 42 | ret = [] 43 | if 'cuhk03' in images_dir: 44 | fpaths = sorted(glob(osp.join(images_dir, path, '*.png'))) 45 | else: 46 | fpaths = sorted(glob(osp.join(images_dir, path, '*.jpg'))) 47 | for fpath in fpaths: 48 | fname = osp.basename(fpath) 49 | if 'cuhk03' in images_dir: 50 | name = osp.splitext(fname)[0] 51 | pid, cam = map(int, pattern.search(fname).groups()) 52 | # bag, pid, cam, _ = map(int, name.split('_')) 53 | # pid += bag * 1000 54 | else: 55 | pid, cam = map(int, pattern.search(fname).groups()) 56 | if pid == -1: continue # junk images are just ignored 57 | if relabel: 58 | if pid not in all_pids: 59 | all_pids[pid] = len(all_pids) 60 | else: 61 | if pid not in all_pids: 62 | all_pids[pid] = pid 63 | pid = all_pids[pid] 64 | cam -= 1 65 | ret.append((fname, pid, cam)) 66 | return ret, int(len(all_pids)) 67 | 68 | def load(self): 69 | self.source_train, self.num_train_ids = self.preprocess(self.source_images_dir, self.source_train_path) 70 | self.target_train, _ = self.preprocess(self.target_images_dir, self.target_train_path) 71 | self.gallery, self.num_gallery_ids = self.preprocess(self.target_images_dir, self.gallery_path, False) 72 | self.query, self.num_query_ids = self.preprocess(self.target_images_dir, self.query_path, False) 73 | 74 | print(self.__class__.__name__, "dataset loaded") 75 | print(" subset | # ids | # images") 76 | print(" ---------------------------") 77 | print(" source train | {:5d} | {:8d}" 78 | .format(self.num_train_ids, len(self.source_train))) 79 | print(" target train | 'Unknown' | {:8d}" 80 | .format(len(self.target_train))) 81 | print(" query | {:5d} | {:8d}" 82 | .format(self.num_query_ids, len(self.query))) 83 | print(" gallery | {:5d} | {:8d}" 84 | .format(self.num_gallery_ids, len(self.gallery))) 85 | -------------------------------------------------------------------------------- /reid/trainers.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import time 3 | 4 | import torch 5 | from torch.autograd import Variable 6 | 7 | from .evaluation_metrics import accuracy 8 | from .utils.meters import AverageMeter 9 | import copy 10 | import numpy as np 11 | import visdom 12 | import os 13 | import torch.nn.functional as F 14 | 15 | 16 | class Trainer(object): 17 | def __init__(self, model, model_inv, lmd=0.3): 18 | super(Trainer, self).__init__() 19 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 20 | self.model = model 21 | self.model_inv = model_inv 22 | self.pid_criterion = torch.nn.CrossEntropyLoss().to(self.device) 23 | self.lmd = lmd 24 | 25 | def train(self, epoch, data_loader, target_train_loader, optimizer, print_freq=1): 26 | self.set_model_train() 27 | 28 | batch_time = AverageMeter() 29 | data_time = AverageMeter() 30 | losses = AverageMeter() 31 | precisions = AverageMeter() 32 | 33 | end = time.time() 34 | 35 | # Target iter 36 | target_iter = iter(target_train_loader) 37 | 38 | # Train 39 | for i, inputs in enumerate(data_loader): 40 | data_time.update(time.time() - end) 41 | 42 | # Source inputs 43 | inputs, pids = self._parse_data(inputs) 44 | 45 | # Target inputs 46 | try: 47 | inputs_target = next(target_iter) 48 | except: 49 | target_iter = iter(target_train_loader) 50 | inputs_target = next(target_iter) 51 | inputs_target, index_target = self._parse_tgt_data(inputs_target) 52 | 53 | # Source pid loss 54 | outputs = self.model(inputs) 55 | source_pid_loss = self.pid_criterion(outputs, pids) 56 | prec, = accuracy(outputs.data, pids.data) 57 | prec1 = prec[0] 58 | 59 | # Target invariance loss 60 | outputs = self.model(inputs_target, 'tgt_feat') 61 | 62 | loss_un = self.model_inv(outputs, index_target, epoch=epoch) 63 | 64 | loss = (1 - self.lmd) * source_pid_loss + self.lmd * loss_un 65 | 66 | loss_print = {} 67 | loss_print['s_pid_loss'] = source_pid_loss.item() 68 | loss_print['t_un_loss'] = loss_un.item() 69 | 70 | losses.update(loss.item(), outputs.size(0)) 71 | precisions.update(prec1, outputs.size(0)) 72 | 73 | optimizer.zero_grad() 74 | loss.backward() 75 | optimizer.step() 76 | 77 | batch_time.update(time.time() - end) 78 | end = time.time() 79 | 80 | if (i + 1) % print_freq == 0: 81 | log = "Epoch: [{}][{}/{}], Time {:.3f} ({:.3f}), Data {:.3f} ({:.3f}), Loss {:.3f} ({:.3f}), Prec {:.2%} ({:.2%})" \ 82 | .format(epoch, i + 1, len(data_loader), 83 | batch_time.val, batch_time.avg, 84 | data_time.val, data_time.avg, 85 | losses.val, losses.avg, 86 | precisions.val, precisions.avg) 87 | 88 | for tag, value in loss_print.items(): 89 | log += ", {}: {:.4f}".format(tag, value) 90 | print(log) 91 | 92 | def _parse_data(self, inputs): 93 | imgs, _, pids, _ = inputs 94 | inputs = imgs.to(self.device) 95 | pids = pids.to(self.device) 96 | return inputs, pids 97 | 98 | def _parse_tgt_data(self, inputs_target): 99 | inputs, _, _, index = inputs_target 100 | inputs = inputs.to(self.device) 101 | index = index.to(self.device) 102 | return inputs, index 103 | 104 | def set_model_train(self): 105 | self.model.train() 106 | 107 | # Fix first BN 108 | fixed_bns = [] 109 | for idx, (name, module) in enumerate(self.model.module.named_modules()): 110 | if name.find("layer3") != -1: 111 | # assert len(fixed_bns) == 22 112 | break 113 | if name.find("bn") != -1: 114 | fixed_bns.append(name) 115 | module.eval() 116 | -------------------------------------------------------------------------------- /reid/evaluators.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import time 3 | from collections import OrderedDict 4 | import pdb 5 | 6 | import torch 7 | import numpy as np 8 | 9 | from .evaluation_metrics import cmc, mean_ap, map_cmc 10 | from .utils.meters import AverageMeter 11 | 12 | from torch.autograd import Variable 13 | from .utils import to_torch 14 | from .utils import to_numpy 15 | import os.path as osp 16 | from PIL import Image 17 | from torchvision.transforms import functional as F 18 | import pdb 19 | import visdom 20 | 21 | 22 | def extract_cnn_feature(model, inputs, output_feature=None): 23 | model.eval() 24 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 25 | inputs = to_torch(inputs) 26 | inputs = inputs.to(device) 27 | outputs = model(inputs, output_feature) 28 | outputs = outputs.data.cpu() 29 | return outputs 30 | 31 | 32 | def extract_features(model, data_loader, print_freq=1, output_feature=None): 33 | model.eval() 34 | batch_time = AverageMeter() 35 | data_time = AverageMeter() 36 | 37 | features = OrderedDict() 38 | labels = OrderedDict() 39 | 40 | end = time.time() 41 | for i, (imgs, fnames, pids, _) in enumerate(data_loader): 42 | data_time.update(time.time() - end) 43 | 44 | outputs = extract_cnn_feature(model, imgs, output_feature) 45 | for fname, output, pid in zip(fnames, outputs, pids): 46 | features[fname] = output 47 | labels[fname] = pid 48 | 49 | batch_time.update(time.time() - end) 50 | end = time.time() 51 | 52 | if (i + 1) % print_freq == 0: 53 | print('Extract Features: [{}/{}]\t' 54 | 'Time {:.3f} ({:.3f})\t' 55 | 'Data {:.3f} ({:.3f})\t' 56 | .format(i + 1, len(data_loader), 57 | batch_time.val, batch_time.avg, 58 | data_time.val, data_time.avg)) 59 | 60 | return features, labels 61 | 62 | 63 | def pairwise_distance(query_features, gallery_features, query=None, gallery=None): 64 | x = torch.cat([query_features[f].unsqueeze(0) for f, _, _ in query], 0) 65 | y = torch.cat([gallery_features[f].unsqueeze(0) for f, _, _ in gallery], 0) 66 | 67 | m, n = x.size(0), y.size(0) 68 | x = x.view(m, -1) 69 | y = y.view(n, -1) 70 | 71 | dist = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(m, n) + \ 72 | torch.pow(y, 2).sum(dim=1, keepdim=True).expand(n, m).t() 73 | dist.addmm_(1, -2, x, y.t()) 74 | # We use clamp to keep numerical stability 75 | dist = torch.clamp(dist, 1e-8, np.inf) 76 | return dist 77 | 78 | 79 | def evaluate_all(distmat, query=None, gallery=None, 80 | query_ids=None, gallery_ids=None, 81 | query_cams=None, gallery_cams=None, 82 | cmc_topk=(1, 5, 10, 20)): 83 | if query is not None and gallery is not None: 84 | query_ids = [pid for _, pid, _ in query] 85 | gallery_ids = [pid for _, pid, _ in gallery] 86 | query_cams = [cam for _, _, cam in query] 87 | gallery_cams = [cam for _, _, cam in gallery] 88 | else: 89 | assert (query_ids is not None and gallery_ids is not None 90 | and query_cams is not None and gallery_cams is not None) 91 | 92 | # Evaluation 93 | mAP, all_cmc = map_cmc(distmat, query_ids, gallery_ids, query_cams, gallery_cams) 94 | print('Mean AP: {:4.1%}'.format(mAP)) 95 | print('CMC Scores') 96 | for k in cmc_topk: 97 | print(' top-{:<4}{:12.1%}' 98 | .format(k, all_cmc[k - 1])) 99 | return 100 | 101 | # Traditional evaluation 102 | # Compute mean AP 103 | # mAP = mean_ap(distmat, query_ids, gallery_ids, query_cams, gallery_cams) 104 | # print('Mean AP: {:4.1%}'.format(mAP)) 105 | # 106 | # # Compute CMC scores 107 | # cmc_configs = { 108 | # 'market1501': dict(separate_camera_set=False, 109 | # single_gallery_shot=False, 110 | # first_match_break=True)} 111 | # cmc_scores = {name: cmc(distmat, query_ids, gallery_ids, 112 | # query_cams, gallery_cams, **params) 113 | # for name, params in cmc_configs.items()} 114 | # 115 | # print('CMC Scores') 116 | # for k in cmc_topk: 117 | # print(' top-{:<4}{:12.1%}' 118 | # .format(k, cmc_scores['market1501'][k - 1])) 119 | # 120 | # return cmc_scores['market1501'][0] 121 | 122 | 123 | class Evaluator(object): 124 | def __init__(self, model): 125 | super(Evaluator, self).__init__() 126 | self.model = model 127 | 128 | def evaluate(self, query_loader, gallery_loader, query, gallery, output_feature=None): 129 | query_features, _ = extract_features(self.model, query_loader, 1, output_feature) 130 | gallery_features, _ = extract_features(self.model, gallery_loader, 1, output_feature) 131 | distmat = pairwise_distance(query_features, gallery_features, query, gallery) 132 | return evaluate_all(distmat, query=query, gallery=gallery) 133 | -------------------------------------------------------------------------------- /reid/models/resnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from torch import nn 4 | from torch.nn import functional as F 5 | from torch.nn import init 6 | import torchvision 7 | from reid.lib.normalize import Normalize 8 | 9 | 10 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 11 | 'resnet152'] 12 | 13 | 14 | class ResNet(nn.Module): 15 | __factory = { 16 | 18: torchvision.models.resnet18, 17 | 34: torchvision.models.resnet34, 18 | 50: torchvision.models.resnet50, 19 | 101: torchvision.models.resnet101, 20 | 152: torchvision.models.resnet152, 21 | } 22 | 23 | def __init__(self, depth, pretrained=True, cut_at_pooling=False, 24 | num_features=0, norm=False, dropout=0, num_classes=0, num_triplet_features=0): 25 | super(ResNet, self).__init__() 26 | 27 | self.depth = depth 28 | self.pretrained = pretrained 29 | self.cut_at_pooling = cut_at_pooling 30 | 31 | # Construct base (pretrained) resnet 32 | if depth not in ResNet.__factory: 33 | raise KeyError("Unsupported depth:", depth) 34 | self.base = ResNet.__factory[depth](pretrained=pretrained) 35 | 36 | # Fix layers [conv1 ~ layer2] 37 | fixed_names = [] 38 | for name, module in self.base._modules.items(): 39 | if name == "layer3": 40 | # assert fixed_names == ["conv1", "bn1", "relu", "maxpool", "layer1", "layer2"] 41 | break 42 | fixed_names.append(name) 43 | for param in module.parameters(): 44 | param.requires_grad = False 45 | 46 | if not self.cut_at_pooling: 47 | self.num_features = num_features 48 | self.norm = norm 49 | self.dropout = dropout 50 | self.has_embedding = num_features > 0 51 | self.num_classes = num_classes 52 | self.num_triplet_features = num_triplet_features 53 | 54 | self.l2norm = Normalize(2) 55 | 56 | out_planes = self.base.fc.in_features 57 | 58 | # Append new layers 59 | if self.has_embedding: 60 | self.feat = nn.Linear(out_planes, self.num_features) 61 | self.feat_bn = nn.BatchNorm1d(self.num_features) 62 | init.kaiming_normal_(self.feat.weight, mode='fan_out') 63 | init.constant_(self.feat.bias, 0) 64 | init.constant_(self.feat_bn.weight, 1) 65 | init.constant_(self.feat_bn.bias, 0) 66 | else: 67 | # Change the num_features to CNN output channels 68 | self.num_features = out_planes 69 | if self.dropout >= 0: 70 | self.drop = nn.Dropout(self.dropout) 71 | if self.num_classes > 0: 72 | self.classifier = nn.Linear(self.num_features, self.num_classes) 73 | init.normal_(self.classifier.weight, std=0.001) 74 | init.constant_(self.classifier.bias, 0) 75 | 76 | if not self.pretrained: 77 | self.reset_params() 78 | 79 | def forward(self, x, output_feature=None): 80 | for name, module in self.base._modules.items(): 81 | if name == 'avgpool': 82 | break 83 | else: 84 | x = module(x) 85 | 86 | if self.cut_at_pooling: 87 | return x 88 | 89 | x = F.avg_pool2d(x, x.size()[2:]) 90 | x = x.view(x.size(0), -1) 91 | 92 | if output_feature == 'pool5': 93 | x = F.normalize(x) 94 | return x 95 | 96 | if self.has_embedding: 97 | x = self.feat(x) 98 | x = self.feat_bn(x) 99 | tgt_feat = F.normalize(x) 100 | tgt_feat = self.drop(tgt_feat) 101 | if output_feature == 'tgt_feat': 102 | return tgt_feat 103 | if self.norm: 104 | x = F.normalize(x) 105 | elif self.has_embedding: 106 | x = F.relu(x) 107 | if self.dropout > 0: 108 | x = self.drop(x) 109 | if self.num_classes > 0: 110 | x = self.classifier(x) 111 | return x 112 | 113 | def reset_params(self): 114 | for m in self.modules(): 115 | if isinstance(m, nn.Conv2d): 116 | init.kaiming_normal(m.weight, mode='fan_out') 117 | if m.bias is not None: 118 | init.constant(m.bias, 0) 119 | elif isinstance(m, nn.BatchNorm2d): 120 | init.constant(m.weight, 1) 121 | init.constant(m.bias, 0) 122 | elif isinstance(m, nn.Linear): 123 | init.normal(m.weight, std=0.001) 124 | if m.bias is not None: 125 | init.constant(m.bias, 0) 126 | 127 | 128 | def resnet18(**kwargs): 129 | return ResNet(18, **kwargs) 130 | 131 | 132 | def resnet34(**kwargs): 133 | return ResNet(34, **kwargs) 134 | 135 | 136 | def resnet50(**kwargs): 137 | return ResNet(50, **kwargs) 138 | 139 | 140 | def resnet101(**kwargs): 141 | return ResNet(101, **kwargs) 142 | 143 | 144 | def resnet152(**kwargs): 145 | return ResNet(152, **kwargs) 146 | -------------------------------------------------------------------------------- /reid/evaluation_metrics/ranking.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from collections import defaultdict 3 | 4 | import numpy as np 5 | from sklearn.metrics.base import _average_binary_score 6 | from sklearn.metrics import precision_recall_curve, auc 7 | # from sklearn.metrics import average_precision_score 8 | 9 | 10 | from ..utils import to_numpy 11 | 12 | 13 | def _unique_sample(ids_dict, num): 14 | mask = np.zeros(num, dtype=np.bool) 15 | for _, indices in ids_dict.items(): 16 | i = np.random.choice(indices) 17 | mask[i] = True 18 | return mask 19 | 20 | 21 | def average_precision_score(y_true, y_score, average="macro", 22 | sample_weight=None): 23 | def _binary_average_precision(y_true, y_score, sample_weight=None): 24 | precision, recall, thresholds = precision_recall_curve( 25 | y_true, y_score, sample_weight=sample_weight) 26 | return auc(recall, precision) 27 | 28 | return _average_binary_score(_binary_average_precision, y_true, y_score, 29 | average, sample_weight=sample_weight) 30 | 31 | 32 | def map_cmc(distmat, query_ids=None, gallery_ids=None, 33 | query_cams=None, gallery_cams=None, topk=100): 34 | distmat = to_numpy(distmat) 35 | m, n = distmat.shape 36 | # Fill up default values 37 | if query_ids is None: 38 | query_ids = np.arange(m) 39 | if gallery_ids is None: 40 | gallery_ids = np.arange(n) 41 | if query_cams is None: 42 | query_cams = np.zeros(m).astype(np.int32) 43 | if gallery_cams is None: 44 | gallery_cams = np.ones(n).astype(np.int32) 45 | # Ensure numpy array 46 | query_ids = np.asarray(query_ids) 47 | gallery_ids = np.asarray(gallery_ids) 48 | query_cams = np.asarray(query_cams) 49 | gallery_cams = np.asarray(gallery_cams) 50 | # Sort and find correct matches 51 | indices = np.argsort(distmat, axis=1) 52 | matches = (gallery_ids[indices] == query_ids[:, np.newaxis]) 53 | # Compute mAP and CMC for each query 54 | ret = np.zeros(topk) 55 | aps = [] 56 | num_valid_queries = 0 57 | for i in range(m): 58 | # Filter out the same id and same camera 59 | valid = ((gallery_ids[indices[i]] != query_ids[i]) | 60 | (gallery_cams[indices[i]] != query_cams[i])) 61 | if not np.any(matches[i, valid]): continue 62 | 63 | # Compute mAP 64 | y_true = matches[i, valid] 65 | y_score = -distmat[i][indices[i]][valid] 66 | if not np.any(y_true): continue 67 | aps.append(average_precision_score(y_true, y_score)) 68 | 69 | # Compute CMC 70 | index = np.nonzero(matches[i, valid])[0] 71 | for j, k in enumerate(index): 72 | if k >= topk: break 73 | ret[k] += 1 74 | break 75 | num_valid_queries += 1 76 | if num_valid_queries == 0: 77 | raise RuntimeError("No valid query") 78 | return np.mean(aps), ret.cumsum() / num_valid_queries 79 | 80 | 81 | def cmc(distmat, query_ids=None, gallery_ids=None, 82 | query_cams=None, gallery_cams=None, topk=100, 83 | separate_camera_set=False, 84 | single_gallery_shot=False, 85 | first_match_break=False): 86 | distmat = to_numpy(distmat) 87 | m, n = distmat.shape 88 | # Fill up default values 89 | if query_ids is None: 90 | query_ids = np.arange(m) 91 | if gallery_ids is None: 92 | gallery_ids = np.arange(n) 93 | if query_cams is None: 94 | query_cams = np.zeros(m).astype(np.int32) 95 | if gallery_cams is None: 96 | gallery_cams = np.ones(n).astype(np.int32) 97 | # Ensure numpy array 98 | query_ids = np.asarray(query_ids) 99 | gallery_ids = np.asarray(gallery_ids) 100 | query_cams = np.asarray(query_cams) 101 | gallery_cams = np.asarray(gallery_cams) 102 | # Sort and find correct matches 103 | indices = np.argsort(distmat, axis=1) 104 | matches = (gallery_ids[indices] == query_ids[:, np.newaxis]) 105 | # Compute CMC for each query 106 | ret = np.zeros(topk) 107 | num_valid_queries = 0 108 | for i in range(m): 109 | # Filter out the same id and same camera 110 | valid = ((gallery_ids[indices[i]] != query_ids[i]) | 111 | (gallery_cams[indices[i]] != query_cams[i])) 112 | if separate_camera_set: 113 | # Filter out samples from same camera 114 | valid &= (gallery_cams[indices[i]] != query_cams[i]) 115 | if not np.any(matches[i, valid]): continue 116 | if single_gallery_shot: 117 | repeat = 10 118 | gids = gallery_ids[indices[i][valid]] 119 | inds = np.where(valid)[0] 120 | ids_dict = defaultdict(list) 121 | for j, x in zip(inds, gids): 122 | ids_dict[x].append(j) 123 | else: 124 | repeat = 1 125 | for _ in range(repeat): 126 | if single_gallery_shot: 127 | # Randomly choose one instance for each id 128 | sampled = (valid & _unique_sample(ids_dict, len(valid))) 129 | index = np.nonzero(matches[i, sampled])[0] 130 | else: 131 | index = np.nonzero(matches[i, valid])[0] 132 | delta = 1. / (len(index) * repeat) 133 | for j, k in enumerate(index): 134 | if k - j >= topk: break 135 | if first_match_break: 136 | ret[k - j] += 1 137 | break 138 | ret[k - j] += delta 139 | num_valid_queries += 1 140 | if num_valid_queries == 0: 141 | raise RuntimeError("No valid query") 142 | return ret.cumsum() / num_valid_queries 143 | 144 | 145 | def mean_ap(distmat, query_ids=None, gallery_ids=None, 146 | query_cams=None, gallery_cams=None): 147 | distmat = to_numpy(distmat) 148 | m, n = distmat.shape 149 | # Fill up default values 150 | if query_ids is None: 151 | query_ids = np.arange(m) 152 | if gallery_ids is None: 153 | gallery_ids = np.arange(n) 154 | if query_cams is None: 155 | query_cams = np.zeros(m).astype(np.int32) 156 | if gallery_cams is None: 157 | gallery_cams = np.ones(n).astype(np.int32) 158 | # Ensure numpy array 159 | query_ids = np.asarray(query_ids) 160 | gallery_ids = np.asarray(gallery_ids) 161 | query_cams = np.asarray(query_cams) 162 | gallery_cams = np.asarray(gallery_cams) 163 | # Sort and find correct matches 164 | indices = np.argsort(distmat, axis=1) 165 | matches = (gallery_ids[indices] == query_ids[:, np.newaxis]) 166 | # Compute AP for each query 167 | aps = [] 168 | for i in range(m): 169 | # Filter out the same id and same camera 170 | valid = ((gallery_ids[indices[i]] != query_ids[i]) | 171 | (gallery_cams[indices[i]] != query_cams[i])) 172 | y_true = matches[i, valid] 173 | y_score = -distmat[i][indices[i]][valid] 174 | if not np.any(y_true): continue 175 | aps.append(average_precision_score(y_true, y_score)) 176 | if len(aps) == 0: 177 | raise RuntimeError("No valid query") 178 | return np.mean(aps) 179 | 180 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | import argparse 3 | import os.path as osp 4 | 5 | import numpy as np 6 | import sys 7 | import torch 8 | from torch import nn 9 | from torch.backends import cudnn 10 | from torch.utils.data import DataLoader 11 | from reid.datasets.domain_adaptation import DA 12 | 13 | from reid import models 14 | from reid.trainers import Trainer 15 | from reid.evaluators import Evaluator 16 | from reid.utils.data import transforms as T 17 | from reid.utils.data.preprocessor import Preprocessor, UnsupervisedCamStylePreprocessor 18 | from reid.utils.logging import Logger 19 | from reid.utils.serialization import load_checkpoint, save_checkpoint 20 | from reid.loss import InvNet 21 | 22 | 23 | def get_data(data_dir, source, target, height, width, batch_size, re=0, workers=8): 24 | 25 | dataset = DA(data_dir, source, target) 26 | 27 | normalizer = T.Normalize(mean=[0.485, 0.456, 0.406], 28 | std=[0.229, 0.224, 0.225]) 29 | 30 | num_classes = dataset.num_train_ids 31 | 32 | train_transformer = T.Compose([ 33 | T.RandomSizedRectCrop(height, width), 34 | T.RandomHorizontalFlip(), 35 | T.ToTensor(), 36 | normalizer, 37 | T.RandomErasing(EPSILON=re), 38 | ]) 39 | 40 | test_transformer = T.Compose([ 41 | T.Resize((height, width), interpolation=3), 42 | T.ToTensor(), 43 | normalizer, 44 | ]) 45 | 46 | source_train_loader = DataLoader( 47 | Preprocessor(dataset.source_train, root=osp.join(dataset.source_images_dir, dataset.source_train_path), 48 | transform=train_transformer), 49 | batch_size=batch_size, num_workers=workers, 50 | shuffle=True, pin_memory=True, drop_last=True) 51 | 52 | target_train_loader = DataLoader( 53 | UnsupervisedCamStylePreprocessor(dataset.target_train, 54 | root=osp.join(dataset.target_images_dir, dataset.target_train_path), 55 | camstyle_root=osp.join(dataset.target_images_dir, 56 | dataset.target_train_camstyle_path), 57 | num_cam=dataset.target_num_cam, transform=train_transformer), 58 | batch_size=batch_size, num_workers=workers, 59 | shuffle=True, pin_memory=True, drop_last=True) 60 | 61 | query_loader = DataLoader( 62 | Preprocessor(dataset.query, 63 | root=osp.join(dataset.target_images_dir, dataset.query_path), transform=test_transformer), 64 | batch_size=batch_size, num_workers=workers, 65 | shuffle=False, pin_memory=True) 66 | 67 | gallery_loader = DataLoader( 68 | Preprocessor(dataset.gallery, 69 | root=osp.join(dataset.target_images_dir, dataset.gallery_path), transform=test_transformer), 70 | batch_size=batch_size, num_workers=workers, 71 | shuffle=False, pin_memory=True) 72 | 73 | return dataset, num_classes, source_train_loader, target_train_loader, query_loader, gallery_loader 74 | 75 | 76 | def main(args): 77 | # For fast training. 78 | cudnn.benchmark = True 79 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 80 | 81 | # Redirect print to both console and log file 82 | if not args.evaluate: 83 | sys.stdout = Logger(osp.join(args.logs_dir, 'log.txt')) 84 | print('log_dir=', args.logs_dir) 85 | 86 | # Print logs 87 | print(args) 88 | 89 | # Create data loaders 90 | dataset, num_classes, source_train_loader, target_train_loader, \ 91 | query_loader, gallery_loader = get_data(args.data_dir, args.source, 92 | args.target, args.height, 93 | args.width, args.batch_size, 94 | args.re, args.workers) 95 | 96 | # Create model 97 | model = models.create(args.arch, num_features=args.features, 98 | dropout=args.dropout, num_classes=num_classes) 99 | 100 | # Invariance learning model 101 | num_tgt = len(dataset.target_train) 102 | model_inv = InvNet(args.features, num_tgt, 103 | beta=args.inv_beta, knn=args.knn, 104 | alpha=args.inv_alpha) 105 | 106 | # Load from checkpoint 107 | start_epoch = 0 108 | if args.resume: 109 | checkpoint = load_checkpoint(args.resume) 110 | model.load_state_dict(checkpoint['state_dict']) 111 | model_inv.load_state_dict(checkpoint['state_dict_inv']) 112 | start_epoch = checkpoint['epoch'] 113 | print("=> Start epoch {} " 114 | .format(start_epoch)) 115 | 116 | # Set model 117 | model = nn.DataParallel(model).to(device) 118 | model_inv = model_inv.to(device) 119 | 120 | # Evaluator 121 | evaluator = Evaluator(model) 122 | if args.evaluate: 123 | print("Test:") 124 | evaluator.evaluate(query_loader, gallery_loader, dataset.query, 125 | dataset.gallery, args.output_feature) 126 | return 127 | 128 | # Optimizer 129 | base_param_ids = set(map(id, model.module.base.parameters())) 130 | 131 | base_params_need_for_grad = filter(lambda p: p.requires_grad, model.module.base.parameters()) 132 | 133 | new_params = [p for p in model.parameters() if 134 | id(p) not in base_param_ids] 135 | param_groups = [ 136 | {'params': base_params_need_for_grad, 'lr_mult': 0.1}, 137 | {'params': new_params, 'lr_mult': 1.0}] 138 | 139 | optimizer = torch.optim.SGD(param_groups, lr=args.lr, 140 | momentum=args.momentum, 141 | weight_decay=args.weight_decay, 142 | nesterov=True) 143 | 144 | # Trainer 145 | trainer = Trainer(model, model_inv, lmd=args.lmd) 146 | 147 | # Schedule learning rate 148 | def adjust_lr(epoch): 149 | step_size = args.epochs_decay 150 | lr = args.lr * (0.1 ** (epoch // step_size)) 151 | for g in optimizer.param_groups: 152 | g['lr'] = lr * g.get('lr_mult', 1) 153 | 154 | # Start training 155 | for epoch in range(start_epoch, args.epochs): 156 | adjust_lr(epoch) 157 | trainer.train(epoch, source_train_loader, target_train_loader, optimizer) 158 | 159 | save_checkpoint({ 160 | 'state_dict': model.module.state_dict(), 161 | 'state_dict_inv': model_inv.state_dict(), 162 | 'epoch': epoch + 1, 163 | }, fpath=osp.join(args.logs_dir, 'checkpoint.pth.tar')) 164 | 165 | print('\n * Finished epoch {:3d} \n'. 166 | format(epoch)) 167 | 168 | # Final test 169 | print('Test with best model:') 170 | evaluator = Evaluator(model) 171 | evaluator.evaluate(query_loader, gallery_loader, dataset.query, 172 | dataset.gallery, args.output_feature) 173 | 174 | 175 | if __name__ == '__main__': 176 | parser = argparse.ArgumentParser(description="Invariance Learning for Domain Adaptive Re-ID") 177 | # source 178 | parser.add_argument('-s', '--source', type=str, default='duke', 179 | choices=['market', 'duke', 'msmt17']) 180 | # target 181 | parser.add_argument('-t', '--target', type=str, default='market', 182 | choices=['market', 'duke', 'msmt17']) 183 | # imgs setting 184 | parser.add_argument('-b', '--batch-size', type=int, default=128) 185 | parser.add_argument('-j', '--workers', type=int, default=8) 186 | parser.add_argument('--height', type=int, default=256, 187 | help="input height, default: 256") 188 | parser.add_argument('--width', type=int, default=128, 189 | help="input width, default: 128") 190 | # model 191 | parser.add_argument('-a', '--arch', type=str, default='resnet50', 192 | choices=models.names()) 193 | parser.add_argument('--features', type=int, default=4096) 194 | parser.add_argument('--dropout', type=float, default=0.5) 195 | # optimizer 196 | parser.add_argument('--lr', type=float, default=0.1, 197 | help="learning rate of new parameters, for ImageNet pretrained" 198 | "parameters it is 10 times smaller than this") 199 | parser.add_argument('--momentum', type=float, default=0.9) 200 | parser.add_argument('--weight-decay', type=float, default=5e-4) 201 | # training configs 202 | parser.add_argument('--resume', type=str, default='', metavar='PATH') 203 | parser.add_argument('--evaluate', action='store_true', 204 | help="evaluation only") 205 | parser.add_argument('--epochs', type=int, default=60) 206 | parser.add_argument('--epochs_decay', type=int, default=40) 207 | parser.add_argument('--print-freq', type=int, default=1) 208 | # metric learning 209 | parser.add_argument('--dist-metric', type=str, default='euclidean') 210 | # misc 211 | working_dir = osp.dirname(osp.abspath(__file__)) 212 | parser.add_argument('--data-dir', type=str, metavar='PATH', 213 | default=osp.join(working_dir, 'data')) 214 | parser.add_argument('--logs-dir', type=str, metavar='PATH', 215 | default=osp.join(working_dir, 'logs')) 216 | parser.add_argument('--output_feature', type=str, default='pool5') 217 | # random erasing 218 | parser.add_argument('--re', type=float, default=0.5) 219 | # Invariance learning 220 | parser.add_argument('--inv-alpha', type=float, default=0.01, 221 | help='update rate for the exemplar memory in invariance learning') 222 | parser.add_argument('--inv-beta', type=float, default=0.05, 223 | help='The temperature in invariance learning') 224 | parser.add_argument('--knn', default=6, type=int, 225 | help='number of KNN for neighborhood invariance') 226 | parser.add_argument('--lmd', type=float, default=0.3, 227 | help='weight controls the importance of the source loss and the target loss.') 228 | args = parser.parse_args() 229 | main(args) 230 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /reid/lib/custom_transforms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | import scipy.ndimage 4 | from scipy.ndimage.filters import gaussian_filter 5 | from scipy.ndimage.interpolation import map_coordinates 6 | import collections 7 | from PIL import Image 8 | import numbers 9 | import random 10 | 11 | __author__ = "Wei OUYANG" 12 | __license__ = "GPL" 13 | __version__ = "0.1.0" 14 | __status__ = "Development" 15 | 16 | 17 | def center_crop(x, center_crop_size): 18 | assert x.ndim == 3 19 | centerw, centerh = x.shape[1] // 2, x.shape[2] // 2 20 | halfw, halfh = center_crop_size[0] // 2, center_crop_size[1] // 2 21 | return x[:, centerw - halfw:centerw + halfw, centerh - halfh:centerh + halfh] 22 | 23 | 24 | def to_tensor(x): 25 | import torch 26 | x = x.transpose((2, 0, 1)) 27 | return torch.from_numpy(x).float() 28 | 29 | 30 | def random_num_generator(config, random_state=np.random): 31 | if config[0] == 'uniform': 32 | ret = random_state.uniform(config[1], config[2], 1)[0] 33 | elif config[0] == 'lognormal': 34 | ret = random_state.lognormal(config[1], config[2], 1)[0] 35 | else: 36 | print(config) 37 | raise Exception('unsupported format') 38 | return ret 39 | 40 | 41 | def poisson_downsampling(image, peak, random_state=np.random): 42 | if not isinstance(image, np.ndarray): 43 | imgArr = np.array(image, dtype='float32') 44 | else: 45 | imgArr = image.astype('float32') 46 | Q = imgArr.max(axis=(0, 1)) / peak 47 | if Q[0] == 0: 48 | return imgArr 49 | ima_lambda = imgArr / Q 50 | noisy_img = random_state.poisson(lam=ima_lambda) 51 | return noisy_img.astype('float32') 52 | 53 | 54 | def elastic_transform(image, alpha=1000, sigma=30, spline_order=1, mode='nearest', random_state=np.random): 55 | """Elastic deformation of image as described in [Simard2003]_. 56 | .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for 57 | Convolutional Neural Networks applied to Visual Document Analysis", in 58 | Proc. of the International Conference on Document Analysis and 59 | Recognition, 2003. 60 | """ 61 | assert image.ndim == 3 62 | shape = image.shape[:2] 63 | 64 | dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), 65 | sigma, mode="constant", cval=0) * alpha 66 | dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), 67 | sigma, mode="constant", cval=0) * alpha 68 | 69 | x, y = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij') 70 | indices = [np.reshape(x + dx, (-1, 1)), np.reshape(y + dy, (-1, 1))] 71 | result = np.empty_like(image) 72 | for i in range(image.shape[2]): 73 | result[:, :, i] = map_coordinates( 74 | image[:, :, i], indices, order=spline_order, mode=mode).reshape(shape) 75 | return result 76 | 77 | 78 | class Merge(object): 79 | """Merge a group of images 80 | """ 81 | 82 | def __init__(self, axis=-1): 83 | self.axis = axis 84 | 85 | def __call__(self, images): 86 | if isinstance(images, collections.Sequence) or isinstance(images, np.ndarray): 87 | assert all([isinstance(i, np.ndarray) 88 | for i in images]), 'only numpy array is supported' 89 | shapes = [list(i.shape) for i in images] 90 | for s in shapes: 91 | s[self.axis] = None 92 | assert all([s == shapes[0] for s in shapes] 93 | ), 'shapes must be the same except the merge axis' 94 | return np.concatenate(images, axis=self.axis) 95 | else: 96 | raise Exception("obj is not a sequence (list, tuple, etc)") 97 | 98 | 99 | class Split(object): 100 | """Split images into individual arraies 101 | """ 102 | 103 | def __init__(self, *slices, **kwargs): 104 | assert isinstance(slices, collections.Sequence) 105 | slices_ = [] 106 | for s in slices: 107 | if isinstance(s, collections.Sequence): 108 | slices_.append(slice(*s)) 109 | else: 110 | slices_.append(s) 111 | assert all([isinstance(s, slice) for s in slices_] 112 | ), 'slices must be consist of slice instances' 113 | self.slices = slices_ 114 | self.axis = kwargs.get('axis', -1) 115 | 116 | def __call__(self, image): 117 | if isinstance(image, np.ndarray): 118 | ret = [] 119 | for s in self.slices: 120 | sl = [slice(None)] * image.ndim 121 | sl[self.axis] = s 122 | ret.append(image[sl]) 123 | return ret 124 | else: 125 | raise Exception("obj is not an numpy array") 126 | 127 | 128 | class ElasticTransform(object): 129 | """Apply elastic transformation on a numpy.ndarray (H x W x C) 130 | """ 131 | 132 | def __init__(self, alpha, sigma): 133 | self.alpha = alpha 134 | self.sigma = sigma 135 | 136 | def __call__(self, image): 137 | if isinstance(self.alpha, collections.Sequence): 138 | alpha = random_num_generator(self.alpha) 139 | else: 140 | alpha = self.alpha 141 | if isinstance(self.sigma, collections.Sequence): 142 | sigma = random_num_generator(self.sigma) 143 | else: 144 | sigma = self.sigma 145 | return elastic_transform(image, alpha=alpha, sigma=sigma) 146 | 147 | 148 | class PoissonSubsampling(object): 149 | """Poisson subsampling on a numpy.ndarray (H x W x C) 150 | """ 151 | 152 | def __init__(self, peak, random_state=np.random): 153 | self.peak = peak 154 | self.random_state = random_state 155 | 156 | def __call__(self, image): 157 | if isinstance(self.peak, collections.Sequence): 158 | peak = random_num_generator( 159 | self.peak, random_state=self.random_state) 160 | else: 161 | peak = self.peak 162 | return poisson_downsampling(image, peak, random_state=self.random_state) 163 | 164 | 165 | class AddGaussianNoise(object): 166 | """Add gaussian noise to a numpy.ndarray (H x W x C) 167 | """ 168 | 169 | def __init__(self, mean, sigma, random_state=np.random): 170 | self.sigma = sigma 171 | self.mean = mean 172 | self.random_state = random_state 173 | 174 | def __call__(self, image): 175 | if isinstance(self.sigma, collections.Sequence): 176 | sigma = random_num_generator( 177 | self.sigma, random_state=self.random_state) 178 | else: 179 | sigma = self.sigma 180 | if isinstance(self.mean, collections.Sequence, random_state=self.random_state): 181 | mean = random_num_generator(self.mean) 182 | else: 183 | mean = self.mean 184 | row, col, ch = image.shape 185 | gauss = self.random_state.normal(mean, sigma, (row, col, ch)) 186 | gauss = gauss.reshape(row, col, ch) 187 | image += gauss 188 | return image 189 | 190 | 191 | class AddSpeckleNoise(object): 192 | """Add speckle noise to a numpy.ndarray (H x W x C) 193 | """ 194 | 195 | def __init__(self, mean, sigma, random_state=np.random): 196 | self.sigma = sigma 197 | self.mean = mean 198 | self.random_state = random_state 199 | 200 | def __call__(self, image): 201 | if isinstance(self.sigma, collections.Sequence): 202 | sigma = random_num_generator( 203 | self.sigma, random_state=self.random_state) 204 | else: 205 | sigma = self.sigma 206 | if isinstance(self.mean, collections.Sequence): 207 | mean = random_num_generator( 208 | self.mean, random_state=self.random_state) 209 | else: 210 | mean = self.mean 211 | row, col, ch = image.shape 212 | gauss = self.random_state.normal(mean, sigma, (row, col, ch)) 213 | gauss = gauss.reshape(row, col, ch) 214 | image += image * gauss 215 | return image 216 | 217 | 218 | class RandomGaussianBlurring(object): 219 | """Apply gaussian blur to a numpy.ndarray (H x W x C) 220 | """ 221 | 222 | def __init__(self, sigma, p=0.2, random_state=np.random): 223 | self.sigma = sigma 224 | self.p = p 225 | self.random_state = random_state 226 | 227 | def __call__(self, image): 228 | if isinstance(self.sigma, collections.Sequence): 229 | sigma = random_num_generator( 230 | self.sigma, random_state=self.random_state) 231 | else: 232 | sigma = self.sigma 233 | if random.random() < self.p: 234 | image = gaussian_filter(image, sigma=(sigma, sigma, 0)) 235 | return image 236 | 237 | 238 | class AddGaussianPoissonNoise(object): 239 | """Add poisson noise with gaussian blurred image to a numpy.ndarray (H x W x C) 240 | """ 241 | 242 | def __init__(self, sigma, peak, random_state=np.random): 243 | self.sigma = sigma 244 | self.peak = peak 245 | self.random_state = random_state 246 | 247 | def __call__(self, image): 248 | if isinstance(self.sigma, collections.Sequence): 249 | sigma = random_num_generator( 250 | self.sigma, random_state=self.random_state) 251 | else: 252 | sigma = self.sigma 253 | if isinstance(self.peak, collections.Sequence): 254 | peak = random_num_generator( 255 | self.peak, random_state=self.random_state) 256 | else: 257 | peak = self.peak 258 | bg = gaussian_filter(image, sigma=(sigma, sigma, 0)) 259 | bg = poisson_downsampling( 260 | bg, peak=peak, random_state=self.random_state) 261 | return image + bg 262 | 263 | 264 | class MaxScaleNumpy(object): 265 | """scale with max and min of each channel of the numpy array i.e. 266 | channel = (channel - mean) / std 267 | """ 268 | 269 | def __init__(self, range_min=0.0, range_max=1.0): 270 | self.scale = (range_min, range_max) 271 | 272 | def __call__(self, image): 273 | mn = image.min(axis=(0, 1)) 274 | mx = image.max(axis=(0, 1)) 275 | return self.scale[0] + (image - mn) * (self.scale[1] - self.scale[0]) / (mx - mn) 276 | 277 | 278 | class MedianScaleNumpy(object): 279 | """Scale with median and mean of each channel of the numpy array i.e. 280 | channel = (channel - mean) / std 281 | """ 282 | 283 | def __init__(self, range_min=0.0, range_max=1.0): 284 | self.scale = (range_min, range_max) 285 | 286 | def __call__(self, image): 287 | mn = image.min(axis=(0, 1)) 288 | md = np.median(image, axis=(0, 1)) 289 | return self.scale[0] + (image - mn) * (self.scale[1] - self.scale[0]) / (md - mn) 290 | 291 | 292 | class NormalizeNumpy(object): 293 | """Normalize each channel of the numpy array i.e. 294 | channel = (channel - mean) / std 295 | """ 296 | 297 | def __call__(self, image): 298 | image -= image.mean(axis=(0, 1)) 299 | s = image.std(axis=(0, 1)) 300 | s[s == 0] = 1.0 301 | image /= s 302 | return image 303 | 304 | 305 | class MutualExclude(object): 306 | """Remove elements from one channel 307 | """ 308 | 309 | def __init__(self, exclude_channel, from_channel): 310 | self.from_channel = from_channel 311 | self.exclude_channel = exclude_channel 312 | 313 | def __call__(self, image): 314 | mask = image[:, :, self.exclude_channel] > 0 315 | image[:, :, self.from_channel][mask] = 0 316 | return image 317 | 318 | 319 | class RandomCropNumpy(object): 320 | """Crops the given numpy array at a random location to have a region of 321 | the given size. size can be a tuple (target_height, target_width) 322 | or an integer, in which case the target will be of a square shape (size, size) 323 | """ 324 | 325 | def __init__(self, size, random_state=np.random): 326 | if isinstance(size, numbers.Number): 327 | self.size = (int(size), int(size)) 328 | else: 329 | self.size = size 330 | self.random_state = random_state 331 | 332 | def __call__(self, img): 333 | w, h = img.shape[:2] 334 | th, tw = self.size 335 | if w == tw and h == th: 336 | return img 337 | 338 | x1 = self.random_state.randint(0, w - tw) 339 | y1 = self.random_state.randint(0, h - th) 340 | return img[x1:x1 + tw, y1: y1 + th, :] 341 | 342 | 343 | class CenterCropNumpy(object): 344 | """Crops the given numpy array at the center to have a region of 345 | the given size. size can be a tuple (target_height, target_width) 346 | or an integer, in which case the target will be of a square shape (size, size) 347 | """ 348 | 349 | def __init__(self, size): 350 | if isinstance(size, numbers.Number): 351 | self.size = (int(size), int(size)) 352 | else: 353 | self.size = size 354 | 355 | def __call__(self, img): 356 | w, h = img.shape[:2] 357 | th, tw = self.size 358 | x1 = int(round((w - tw) / 2.)) 359 | y1 = int(round((h - th) / 2.)) 360 | return img[x1:x1 + tw, y1: y1 + th, :] 361 | 362 | 363 | class RandomRotate(object): 364 | """Rotate a PIL.Image or numpy.ndarray (H x W x C) randomly 365 | """ 366 | 367 | def __init__(self, angle_range=(0.0, 360.0), axes=(0, 1), mode='reflect', random_state=np.random): 368 | assert isinstance(angle_range, tuple) 369 | self.angle_range = angle_range 370 | self.random_state = random_state 371 | self.axes = axes 372 | self.mode = mode 373 | 374 | def __call__(self, image): 375 | angle = self.random_state.uniform( 376 | self.angle_range[0], self.angle_range[1]) 377 | if isinstance(image, np.ndarray): 378 | mi, ma = image.min(), image.max() 379 | image = scipy.ndimage.interpolation.rotate( 380 | image, angle, reshape=False, axes=self.axes, mode=self.mode) 381 | return np.clip(image, mi, ma) 382 | elif isinstance(image, Image.Image): 383 | return image.rotate(angle) 384 | else: 385 | raise Exception('unsupported type') 386 | 387 | 388 | class BilinearResize(object): 389 | """Resize a PIL.Image or numpy.ndarray (H x W x C) 390 | """ 391 | 392 | def __init__(self, zoom): 393 | self.zoom = [zoom, zoom, 1] 394 | 395 | def __call__(self, image): 396 | if isinstance(image, np.ndarray): 397 | return scipy.ndimage.interpolation.zoom(image, self.zoom) 398 | elif isinstance(image, Image.Image): 399 | return image.resize(self.size, Image.BILINEAR) 400 | else: 401 | raise Exception('unsupported type') 402 | 403 | 404 | class EnhancedCompose(object): 405 | """Composes several transforms together. 406 | Args: 407 | transforms (List[Transform]): list of transforms to compose. 408 | Example: 409 | >>> transforms.Compose([ 410 | >>> transforms.CenterCrop(10), 411 | >>> transforms.ToTensor(), 412 | >>> ]) 413 | """ 414 | 415 | def __init__(self, transforms): 416 | self.transforms = transforms 417 | 418 | def __call__(self, img): 419 | for t in self.transforms: 420 | if isinstance(t, collections.Sequence): 421 | assert isinstance(img, collections.Sequence) and len(img) == len( 422 | t), "size of image group and transform group does not fit" 423 | tmp_ = [] 424 | for i, im_ in enumerate(img): 425 | if callable(t[i]): 426 | tmp_.append(t[i](im_)) 427 | else: 428 | tmp_.append(im_) 429 | img = tmp_ 430 | elif callable(t): 431 | img = t(img) 432 | elif t is None: 433 | continue 434 | else: 435 | raise Exception('unexpected type') 436 | return img 437 | 438 | -------------------------------------------------------------------------------- /reid/utils/misc.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn.functional as F 6 | from torch import nn 7 | from collections import OrderedDict 8 | from torch.autograd import Variable 9 | 10 | 11 | def gram_matrix(y): 12 | (b, ch, h, w) = y.size() 13 | features = y.view(b, ch, w * h) 14 | features_t = features.transpose(1, 2) 15 | gram = features.bmm(features_t) / (ch * h * w) 16 | return gram 17 | 18 | def poly_lr_scheduler(optimizer, init_lr, iter, lr_decay_iter=1, max_iter=30000, power=0.9,): 19 | """Polynomial decay of learning rate 20 | :param init_lr is base learning rate 21 | :param iter is a current iteration 22 | :param lr_decay_iter how frequently decay occurs, default is 1 23 | :param max_iter is number of maximum iterations 24 | :param power is a polymomial power 25 | """ 26 | if iter % lr_decay_iter or iter > max_iter: 27 | return optimizer 28 | 29 | for param_group in optimizer.param_groups: 30 | tmp = (1 - iter/max_iter)**power 31 | param_group['lr'] = init_lr*tmp 32 | 33 | def wct(content_feat, style_feat): 34 | content_feat = content_feat.data.cpu() 35 | content_feat = content_feat.squeeze(0).double() 36 | 37 | style_feat = style_feat.data.cpu() 38 | style_feat = style_feat.squeeze(0).double() 39 | 40 | C, W, H = content_feat.size() 41 | 42 | transfered = whiten_and_color(content_feat.view(C, -1), 43 | style_feat.view(C, -1)) 44 | 45 | transfered = transfered.view_as(content_feat).float().unsqueeze(0) 46 | 47 | return Variable(transfered).cuda() 48 | 49 | def whiten_and_color(cF,sF): 50 | cFSize = cF.size() 51 | c_mean = torch.mean(cF,1) # c x (h x w) 52 | c_mean = c_mean.unsqueeze(1).expand_as(cF) 53 | cF = cF - c_mean 54 | 55 | contentConv = torch.mm(cF,cF.t()).div(cFSize[1]-1) + torch.eye(cFSize[0]).double() 56 | c_u,c_e,c_v = torch.svd(contentConv,some=False) 57 | 58 | k_c = cFSize[0] 59 | for i in range(cFSize[0]): 60 | if c_e[i] < 0.00001: 61 | k_c = i 62 | break 63 | 64 | sFSize = sF.size() 65 | s_mean = torch.mean(sF,1) 66 | sF = sF - s_mean.unsqueeze(1).expand_as(sF) 67 | styleConv = torch.mm(sF,sF.t()).div(sFSize[1]-1) 68 | s_u,s_e,s_v = torch.svd(styleConv,some=False) 69 | 70 | k_s = sFSize[0] 71 | for i in range(sFSize[0]): 72 | if s_e[i] < 0.00001: 73 | k_s = i 74 | break 75 | 76 | c_d = (c_e[0:k_c]).pow(-0.5) 77 | step1 = torch.mm(c_v[:,0:k_c],torch.diag(c_d)) 78 | step2 = torch.mm(step1,(c_v[:,0:k_c].t())) 79 | whiten_cF = torch.mm(step2,cF) 80 | 81 | s_d = (s_e[0:k_s]).pow(0.5) 82 | targetFeature = torch.mm(torch.mm(torch.mm(s_v[:,0:k_s],torch.diag(s_d)),(s_v[:,0:k_s].t())),whiten_cF) 83 | targetFeature = targetFeature + s_mean.unsqueeze(1).expand_as(targetFeature) 84 | 85 | return targetFeature 86 | 87 | 88 | 89 | def calc_mean_std(feat, eps=1e-5): 90 | # eps is a small value added to the variance to avoid divide-by-zero. 91 | size = feat.data.size() 92 | assert (len(size) == 4) 93 | N, C = size[:2] 94 | feat_var = feat.view(N, C, -1).var(dim=2) + eps 95 | feat_std = feat_var.sqrt().view(N, C, 1, 1) 96 | feat_mean = feat.view(N, C, -1).mean(dim=2).view(N, C, 1, 1) 97 | return feat_mean, feat_std 98 | 99 | 100 | def adaptive_instance_normalization(content_feat, style_feat): 101 | assert (content_feat.data.size()[:2] == style_feat.data.size()[:2]) 102 | size = content_feat.data.size() 103 | style_mean, style_std = calc_mean_std(style_feat) 104 | content_mean, content_std = calc_mean_std(content_feat) 105 | 106 | normalized_feat = (content_feat - content_mean.expand( 107 | size)) / content_std.expand(size) 108 | return normalized_feat * style_std.expand(size) + style_mean.expand(size) 109 | 110 | 111 | def load_model_filter(model, snapshot, prefix=False): 112 | pretrained_dict = torch.load(snapshot) 113 | if prefix: 114 | new_state_dict = OrderedDict() 115 | for k, v in pretrained_dict.items(): 116 | name = k[7:] # remove `enc.` or `dec.` 117 | new_state_dict[name] = v 118 | pretrained_dict = new_state_dict 119 | 120 | model_dict = model.state_dict() 121 | 122 | # 1. filter out unnecessary keys 123 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 124 | # 2. overwrite entries in the existing state dict 125 | model_dict.update(pretrained_dict) 126 | # 3. load the new state dict 127 | model.load_state_dict(pretrained_dict) 128 | 129 | return model 130 | 131 | 132 | def check_mkdir(dir_name): 133 | if not os.path.exists(dir_name): 134 | os.mkdir(dir_name) 135 | 136 | 137 | def initialize_weights(*models): 138 | for model in models: 139 | for module in model.modules(): 140 | if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear): 141 | nn.init.kaiming_normal(module.weight) 142 | if module.bias is not None: 143 | module.bias.data.zero_() 144 | elif isinstance(module, nn.BatchNorm2d): 145 | module.weight.data.fill_(1) 146 | module.bias.data.zero_() 147 | 148 | 149 | def get_upsampling_weight(in_channels, out_channels, kernel_size): 150 | factor = (kernel_size + 1) // 2 151 | if kernel_size % 2 == 1: 152 | center = factor - 1 153 | else: 154 | center = factor - 0.5 155 | og = np.ogrid[:kernel_size, :kernel_size] 156 | filt = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor) 157 | weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size), dtype=np.float64) 158 | weight[range(in_channels), range(out_channels), :, :] = filt 159 | return torch.from_numpy(weight).float() 160 | 161 | 162 | class CrossEntropyLoss2d(nn.Module): 163 | def __init__(self, weight=None, size_average=True, ignore_index=255): 164 | super(CrossEntropyLoss2d, self).__init__() 165 | self.nll_loss = nn.NLLLoss2d(weight, size_average, ignore_index) 166 | 167 | def forward(self, inputs, targets): 168 | return self.nll_loss(F.log_softmax(inputs), targets) 169 | 170 | def cross_entropy2d(input, target, weight=None, size_average=True, ignore_index=255): 171 | n, c, h, w = input.size() 172 | log_p = F.log_softmax(input, dim=1) 173 | log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c) 174 | log_p = log_p[target.view(n * h * w, 1).repeat(1, c) >= 0] 175 | log_p = log_p.view(-1, c) 176 | 177 | mask = target >= 0 178 | target = target[mask] 179 | loss = F.nll_loss(log_p, target, ignore_index=ignore_index, 180 | weight=weight, size_average=False) 181 | if size_average: 182 | loss /= mask.data.sum() 183 | return loss 184 | 185 | 186 | def bootstrapped_cross_entropy2d(input, target, K, weight=None, size_average=True): 187 | 188 | batch_size = input.size()[0] 189 | 190 | def _bootstrap_xentropy_single(input, target, K, weight=None, size_average=True): 191 | n, c, h, w = input.size() 192 | log_p = F.log_softmax(input, dim=1) 193 | log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c) 194 | log_p = log_p[target.view(n * h * w, 1).repeat(1, c) >= 0] 195 | log_p = log_p.view(-1, c) 196 | 197 | mask = target >= 0 198 | target = target[mask] 199 | loss = F.nll_loss(log_p, target, weight=weight, ignore_index=255, 200 | reduce=False, size_average=False) 201 | topk_loss, _ = loss.topk(K) 202 | reduced_topk_loss = topk_loss.sum() / K 203 | 204 | return reduced_topk_loss 205 | 206 | loss = 0.0 207 | # Bootstrap from each image not entire batch 208 | for i in range(batch_size): 209 | loss += _bootstrap_xentropy_single(input=torch.unsqueeze(input[i], 0), 210 | target=torch.unsqueeze(target[i], 0), 211 | K=K, 212 | weight=weight, 213 | size_average=size_average) 214 | return loss / float(batch_size) 215 | 216 | def _fast_hist(label_pred, label_true, num_classes): 217 | mask = (label_true >= 0) & (label_true < num_classes) 218 | hist = np.bincount( 219 | num_classes * label_true[mask].astype(int) + 220 | label_pred[mask], minlength=num_classes ** 2).reshape(num_classes, num_classes) 221 | return hist 222 | 223 | ### T-SNE 224 | def Hbeta(D=np.array([]), beta=1.0): 225 | """ 226 | Compute the perplexity and the P-row for a specific value of the 227 | precision of a Gaussian distribution. 228 | """ 229 | 230 | # Compute P-row and corresponding perplexity 231 | P = np.exp(-D.copy() * beta) 232 | sumP = sum(P) 233 | H = np.log(sumP) + beta * np.sum(D * P) / sumP 234 | P = P / sumP 235 | return H, P 236 | 237 | 238 | def x2p(X=np.array([]), tol=1e-5, perplexity=30.0): 239 | """ 240 | Performs a binary search to get P-values in such a way that each 241 | conditional Gaussian has the same perplexity. 242 | """ 243 | 244 | # Initialize some variables 245 | print("Computing pairwise distances...") 246 | (n, d) = X.shape 247 | sum_X = np.sum(np.square(X), 1) 248 | D = np.add(np.add(-2 * np.dot(X, X.T), sum_X).T, sum_X) 249 | P = np.zeros((n, n)) 250 | beta = np.ones((n, 1)) 251 | logU = np.log(perplexity) 252 | 253 | # Loop over all datapoints 254 | for i in range(n): 255 | 256 | # Print progress 257 | if i % 500 == 0: 258 | print("Computing P-values for point %d of %d..." % (i, n)) 259 | 260 | # Compute the Gaussian kernel and entropy for the current precision 261 | betamin = -np.inf 262 | betamax = np.inf 263 | Di = D[i, np.concatenate((np.r_[0:i], np.r_[i+1:n]))] 264 | (H, thisP) = Hbeta(Di, beta[i]) 265 | 266 | # Evaluate whether the perplexity is within tolerance 267 | Hdiff = H - logU 268 | tries = 0 269 | while np.abs(Hdiff) > tol and tries < 50: 270 | 271 | # If not, increase or decrease precision 272 | if Hdiff > 0: 273 | betamin = beta[i].copy() 274 | if betamax == np.inf or betamax == -np.inf: 275 | beta[i] = beta[i] * 2. 276 | else: 277 | beta[i] = (beta[i] + betamax) / 2. 278 | else: 279 | betamax = beta[i].copy() 280 | if betamin == np.inf or betamin == -np.inf: 281 | beta[i] = beta[i] / 2. 282 | else: 283 | beta[i] = (beta[i] + betamin) / 2. 284 | 285 | # Recompute the values 286 | (H, thisP) = Hbeta(Di, beta[i]) 287 | Hdiff = H - logU 288 | tries += 1 289 | 290 | # Set the final row of P 291 | P[i, np.concatenate((np.r_[0:i], np.r_[i+1:n]))] = thisP 292 | 293 | # Return final P-matrix 294 | print("Mean value of sigma: %f" % np.mean(np.sqrt(1 / beta))) 295 | return P 296 | 297 | 298 | def pca(X=np.array([]), no_dims=50): 299 | """ 300 | Runs PCA on the NxD array X in order to reduce its dimensionality to 301 | no_dims dimensions. 302 | """ 303 | 304 | print("Preprocessing the data using PCA...") 305 | (n, d) = X.shape 306 | X = X - np.tile(np.mean(X, 0), (n, 1)) 307 | (l, M) = np.linalg.eig(np.dot(X.T, X)) 308 | Y = np.dot(X, M[:, 0:no_dims]) 309 | return Y 310 | 311 | 312 | def tsne(X=np.array([]), no_dims=2, initial_dims=50, perplexity=30.0, max_iter=1000): 313 | """ 314 | Runs t-SNE on the dataset in the NxD array X to reduce its 315 | dimensionality to no_dims dimensions. The syntaxis of the function is 316 | `Y = tsne.tsne(X, no_dims, perplexity), where X is an NxD NumPy array. 317 | """ 318 | 319 | # Check inputs 320 | if isinstance(no_dims, float): 321 | print("Error: array X should have type float.") 322 | return -1 323 | if round(no_dims) != no_dims: 324 | print("Error: number of dimensions should be an integer.") 325 | return -1 326 | 327 | # Initialize variables 328 | X = pca(X, initial_dims).real 329 | (n, d) = X.shape 330 | max_iter = max_iter 331 | initial_momentum = 0.5 332 | final_momentum = 0.8 333 | eta = 500 334 | min_gain = 0.01 335 | Y = np.random.randn(n, no_dims) 336 | dY = np.zeros((n, no_dims)) 337 | iY = np.zeros((n, no_dims)) 338 | gains = np.ones((n, no_dims)) 339 | 340 | # Compute P-values 341 | P = x2p(X, 1e-5, perplexity) 342 | P = P + np.transpose(P) 343 | P = P / np.sum(P) 344 | P = P * 4. # early exaggeration 345 | P = np.maximum(P, 1e-12) 346 | 347 | # Run iterations 348 | for iter in range(max_iter): 349 | 350 | # Compute pairwise affinities 351 | sum_Y = np.sum(np.square(Y), 1) 352 | num = -2. * np.dot(Y, Y.T) 353 | num = 1. / (1. + np.add(np.add(num, sum_Y).T, sum_Y)) 354 | num[range(n), range(n)] = 0. 355 | Q = num / np.sum(num) 356 | Q = np.maximum(Q, 1e-12) 357 | 358 | # Compute gradient 359 | PQ = P - Q 360 | for i in range(n): 361 | dY[i, :] = np.sum(np.tile(PQ[:, i] * num[:, i], (no_dims, 1)).T * (Y[i, :] - Y), 0) 362 | 363 | # Perform the update 364 | if iter < 20: 365 | momentum = initial_momentum 366 | else: 367 | momentum = final_momentum 368 | gains = (gains + 0.2) * ((dY > 0.) != (iY > 0.)) + \ 369 | (gains * 0.8) * ((dY > 0.) == (iY > 0.)) 370 | gains[gains < min_gain] = min_gain 371 | iY = momentum * iY - eta * (gains * dY) 372 | Y = Y + iY 373 | Y = Y - np.tile(np.mean(Y, 0), (n, 1)) 374 | 375 | # Compute current value of cost function 376 | if (iter + 1) % 10 == 0: 377 | C = np.sum(P * np.log(P / Q)) 378 | print("Iteration %d: error is %f" % (iter + 1, C)) 379 | 380 | # Stop lying about P-values 381 | if iter == 100: 382 | P = P / 4. 383 | 384 | # Return solution 385 | return Y 386 | 387 | 388 | def evaluate(predictions, gts, num_classes): 389 | hist = np.zeros((num_classes, num_classes)) 390 | for lp, lt in zip(predictions, gts): 391 | hist += _fast_hist(lp.flatten(), lt.flatten(), num_classes) 392 | # axis 0: gt, axis 1: prediction 393 | acc = np.diag(hist).sum() / hist.sum() 394 | acc_cls = np.diag(hist) / hist.sum(axis=1) 395 | acc_cls = np.nanmean(acc_cls) 396 | iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)) 397 | mean_iu = np.nanmean(iu) 398 | freq = hist.sum(axis=1) / hist.sum() 399 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() 400 | return acc, acc_cls, mean_iu, fwavacc, iu 401 | 402 | 403 | class AverageMeter(object): 404 | def __init__(self): 405 | self.reset() 406 | 407 | def reset(self): 408 | self.val = 0 409 | self.avg = 0 410 | self.sum = 0 411 | self.count = 0 412 | 413 | def update(self, val, n=1): 414 | self.val = val 415 | self.sum += val * n 416 | self.count += n 417 | self.avg = self.sum / self.count 418 | 419 | 420 | class PolyLR(object): 421 | def __init__(self, optimizer, curr_iter, max_iter, lr_decay): 422 | self.max_iter = float(max_iter) 423 | self.init_lr_groups = [] 424 | for p in optimizer.param_groups: 425 | self.init_lr_groups.append(p['lr']) 426 | self.param_groups = optimizer.param_groups 427 | self.curr_iter = curr_iter 428 | self.lr_decay = lr_decay 429 | 430 | def step(self): 431 | for idx, p in enumerate(self.param_groups): 432 | p['lr'] = self.init_lr_groups[idx] * (1 - self.curr_iter / self.max_iter) ** self.lr_decay 433 | 434 | class LogFile: 435 | def __init__(self, fl): 436 | open(fl,'w').close() 437 | self.fl = fl 438 | 439 | def log(self, log_str): 440 | with open(self.fl, 'a') as f: 441 | f.write(log_str+'\n') 442 | --------------------------------------------------------------------------------