├── .gitignore
├── LICENSE
├── PNet
    └── train_pnet.py
├── README.md
├── eval_lib
    ├── Makefile
    ├── build
    │   └── temp.linux-x86_64-3.7
    │   │   └── eval.o
    ├── cython_eval.cpython-36m-x86_64-linux-gnu.so
    ├── cython_eval.cpython-37m-x86_64-linux-gnu.so
    ├── cython_eval.so
    ├── eval.c
    ├── eval.pyx
    ├── eval_metrics.py
    ├── setup.py
    └── test_cython_eval.py
├── eval_utils.py
├── fig
    └── core.png
├── loader.py
├── loss.py
├── model.py
├── train_core.py
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 mangye16
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/PNet/train_pnet.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: UTF-8 -*-
  2 | from __future__ import print_function, division
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.optim as optim
  8 | from torch.optim import lr_scheduler
  9 | from torch.autograd import Variable
 10 | from torchvision import datasets, models, transforms
 11 | 
 12 | from tensorboardX import SummaryWriter
 13 | 
 14 | import sys
 15 | import json
 16 | import scipy
 17 | import os, time
 18 | import argparse
 19 | import numpy as np
 20 | import torchvision
 21 | import matplotlib
 22 | matplotlib.use('agg')
 23 | import matplotlib.pyplot as plt
 24 | from PIL import Image
 25 | from shutil import copyfile
 26 | from model import ft_net
 27 | from test_eval_cython import get_test_acc, extr_fea_train
 28 | from utils import *
 29 | import loader, loss
 30 | import pdb
 31 | 
 32 | version =  torch.__version__
 33 | # #####################################################################
 34 | # argsions
 35 | # --------
 36 | parser = argparse.ArgumentParser(description='Training')
 37 | parser.add_argument('--gpu',default='0', type=str,help='gpu ids: e.g. 0  0,1,2  0,2')
 38 | parser.add_argument('--seed', default=1, type=int, help='rng seed')
 39 | parser.add_argument('--model_dir',default='.checkpoint/', type=str, help='output model name')
 40 | parser.add_argument('--data_dir',default='/home/comp/mangye/dataset/', type=str, help='data dir')
 41 | parser.add_argument('--dataset',default='duke',type=str, help='training data:Market1501, DukeMTMCreID')
 42 | parser.add_argument('--pretrained',default='',type=str, help='path of pretrained "model:./model/baseline/net_8.pth"')
 43 | parser.add_argument('--batchsize', default=32, type=int, help='batchsize')
 44 | parser.add_argument('--noise_ratio', default=0.2, type=float, help='percentage of noise data in the training')
 45 | parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate')
 46 | parser.add_argument('--alpha', default=2, type=float, help='beta distribution: alpha')
 47 | parser.add_argument('--beta', default=6, type=float, help='beta distribution: beta')
 48 | parser.add_argument('--LabelWt', default=60, type=int, help='label refinment weight')
 49 | parser.add_argument('--weighttype', default=0, type=int, help='weight type: instance weight, class weight')
 50 | parser.add_argument('--stage2', action='store_true', help='training stage 2')
 51 | 
 52 | args = parser.parse_args()
 53 | 
 54 | torch.manual_seed(args.seed)
 55 | 
 56 | start_epoch  = 0
 57 | if args.stage2:
 58 |     start_epoch = start_epoch + 20
 59 |     
 60 | best_acc = 0
 61 | test_epoch = 2
 62 | lr = args.lr
 63 | data_dir = args.data_dir + args.dataset
 64 | suffix = args.dataset + '_noise_{}_'.format(args.noise_ratio)
 65 | if args.LabelWt > 0 or args.stage2:   
 66 |     suffix = suffix + 'batch_{}_wt_{}'.format(args.batchsize,args.LabelWt)    
 67 | else:
 68 |     suffix = suffix + 'batch_{}_baseline'.format(args.batchsize) 
 69 |     
 70 | 
 71 | if args.stage2:
 72 |     suffix = suffix + '_beta_{}_{}_lr_{:1.1e}'.format(args.alpha, args.beta, args.lr)
 73 |     suffix = suffix + '_w_st2_new'
 74 | else:
 75 |     suffix = suffix + '_lr_{:1.1e}'.format(args.lr)
 76 |     suffix = suffix + '_w_st1'
 77 |     
 78 | print ('model: ' + suffix)
 79 | 
 80 | # define the log path       
 81 | log_dir = './new_res/' + args.dataset + '_log/'
 82 | checkpoint_path = './res/checkpoint/' 
 83 | vis_log_dir = log_dir + suffix + '/'
 84 | if not os.path.isdir(log_dir):
 85 |     os.makedirs(log_dir)
 86 | if not os.path.isdir(vis_log_dir):
 87 |     os.makedirs(vis_log_dir)
 88 | writer = SummaryWriter(vis_log_dir)  
 89 | test_log_file = open(log_dir + suffix + '.txt', "w")       
 90 | sys.stdout = Logger(log_dir  + suffix + '_os.txt')
 91 | 
 92 | # define the gpu id
 93 | str_ids = args.gpu.split(',')
 94 | gpu_ids = []
 95 | for str_id in str_ids:
 96 |     gid = int(str_id)
 97 |     if gid >=0:
 98 |         gpu_ids.append(gid)
 99 | # set gpu ids
100 | if len(gpu_ids)>0:
101 |     torch.cuda.set_device(gpu_ids[0])
102 | 
103 | print ('using gpu: {}'.format(gpu_ids))
104 | 
105 | # #####################################################################
106 | # Load Data
107 | train_transform = transforms.Compose([
108 |         #transforms.RandomResizedCrop(size=128, scale=(0.75,1.0), ratio=(0.75,1.3333), interpolation=3), #Image.BICUBIC)
109 |         transforms.Resize((288,144), interpolation=3),
110 |         transforms.RandomCrop((256,128)),
111 |         transforms.RandomHorizontalFlip(),
112 |         transforms.ToTensor(),
113 |         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
114 |         ])
115 | test_transform = transforms.Compose([
116 |         transforms.Resize((256,128), interpolation=3),
117 |         transforms.ToTensor(),
118 |         transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
119 |         ])
120 | 
121 | # load training dataDatasetFolder
122 | print('Starting loading training data: ', args.dataset )
123 | train_dataset = loader.DatasetFolder(os.path.join(data_dir, 'train'), transform=train_transform)
124 | class_names = train_dataset.classes
125 | dataset_sizes_train = len(train_dataset)
126 | 
127 | use_gpu = torch.cuda.is_available()
128 | 
129 | # Define a model
130 | model = ft_net(len(class_names))
131 | 
132 | if use_gpu:
133 |     model = model.cuda()
134 |     
135 | # Load a pretrainied model
136 | if args.pretrained or args.stage2:
137 |     # model_name = 'market_noise_0.2_batch_32_lambda_0.4_lr_1.0e-02_st1_epoch_best.t'
138 |     model_name = '{}_noise_{}_batch_32_wt_60_lr_1.0e-02_w_st1_epoch_best.t'.format(args.dataset, args.noise_ratio)
139 |     print('Initilizaing weights with {}'.format(model_name))
140 |     model_path = checkpoint_path + model_name
141 |     model.load_state_dict(torch.load(model_path))
142 | else:
143 |     print('Initilizaing weights with ImageNet')
144 |     
145 | # generate noisy label
146 | if args.noise_ratio >= 0:
147 |     trainLabels = torch.LongTensor([y for (p, y, w) in train_dataset.imgs])
148 |     trainLabels_nsy, if_truelbl = gen_nosiy_lbl(trainLabels, args.noise_ratio, len(class_names))
149 |     print('Finish adding noisy label')
150 | 
151 | # generate instance weight    
152 | if args.stage2:
153 |     print('Generating sef-generated weights......')
154 |     weight_file = './new_res/' + 'new_{}_{}_weights.npy'.format(args.dataset, args.noise_ratio)
155 |     label_file = './new_res/' + 'new_{}_{}_label.npy'.format(args.dataset, args.noise_ratio)
156 |     # if os.path.exists(weight_file):
157 |         # all_weights = np.load(weight_file)
158 |         # pre_pids = np.load(label_file)
159 |     # else:
160 |     tansform_bak = train_transform
161 |     train_dataset.transform = test_transform
162 |     temploader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batchsize, shuffle=False, num_workers=8)
163 |     
164 |     model.eval()  # Set model to evaluate mode
165 |     print('Start extract features...')
166 |     start = time.time()
167 |     train_feas, pre_pids = extr_fea_train(model, train_dataset, temploader, use_gpu)
168 |     
169 |     print('Evaluation time: {}'.format(time.time()-start))
170 |     indexs, ori_weight = gen_weights_dist(train_feas, trainLabels_nsy, class_names, args.alpha, args.beta)
171 |     order = np.argsort(indexs)
172 |     all_weights = ori_weight[order]
173 |     np.save(weight_file, all_weights)
174 |     np.save(label_file, pre_pids)
175 |     train_dataset.transform = tansform_bak
176 |     all_weights = all_weights.astype(np.float32)
177 |     for i in range(len(trainLabels_nsy)):
178 |         train_dataset.imgs[i] = (train_dataset.imgs[i][0], int(pre_pids[i]), all_weights[i])  
179 | else:
180 |     print('Setting same weights for all the instances...')
181 |     for i in range(len(trainLabels_nsy)):
182 |         train_dataset.imgs[i] = (train_dataset.imgs[i][0], trainLabels_nsy[i],1)        
183 | 
184 |     
185 | dataloaders_train = torch.utils.data.DataLoader(train_dataset, batch_size=args.batchsize, shuffle=True, num_workers=8) # 8 workers may work faster
186 | 
187 | # load testing dataDatasetFolder
188 | test_dataset = {x: datasets.ImageFolder( os.path.join(data_dir,x) ,test_transform) for x in ['gallery','query']}
189 | dataloaders_test = {x: torch.utils.data.DataLoader(test_dataset[x], batch_size=args.batchsize, shuffle=False, num_workers=8) for x in ['gallery','query']}
190 | 
191 | # Define loss functions
192 | # if args.LabelWt>0:
193 |     # criterion = loss.LabelRefineLoss(lambda1=args.LabelWt)
194 | if args.stage2:
195 |     criterion = loss.InstanceWeightLoss(weighted = 1)
196 | else:
197 |     criterion = nn.CrossEntropyLoss()
198 | 
199 | # optimizer
200 | ignored_params = list(map(id, model.model.fc.parameters() )) + list(map(id, model.classifier.parameters() ))
201 | base_params = filter(lambda p: id(p) not in ignored_params, model.parameters())
202 | optimizer_ft = optim.SGD([
203 |          {'params': base_params, 'lr': lr},
204 |          {'params': model.model.fc.parameters(), 'lr': lr*10},
205 |          {'params': model.classifier.parameters(), 'lr': lr*10}
206 |      ], weight_decay=5e-4, momentum=0.9, nesterov=True)
207 | 
208 | # Decay LR by a factor of 0.1 every 40 epochs
209 | exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1)
210 | 
211 | def save_network(network, epoch_label, is_best = False):
212 |     if is_best:
213 |         save_path = checkpoint_path + suffix + '_epoch_best.t'
214 |     else:
215 |         save_path = checkpoint_path + suffix + '_epoch_{}.t'.format(epoch_label)
216 |     torch.save(network.state_dict(), save_path)
217 | def sigmoid_rampup(current, rampup_length):
218 |     """Exponential rampup from https://arxiv.org/abs/1610.02242"""
219 |     if rampup_length == 0:
220 |         return 1.0
221 |     else:
222 |         current = np.clip(current, 0.0, rampup_length)
223 |         phase = 1.0 - current / rampup_length
224 |         w = float(np.exp(-2.0 * phase * phase))
225 |         return min(w,0.5)
226 |     
227 | def train_model(model, criterion, optimizer_ft, scheduler, epoch):
228 |     
229 |     scheduler.step()
230 |     lambda1 = sigmoid_rampup(epoch, args.LabelWt)
231 |     train_loss = AverageMeter()
232 |     data_time = AverageMeter()
233 |     batch_time = AverageMeter()
234 |     
235 |     model.train()
236 |     correct = 0
237 |     total = 0
238 |     end = time.time()
239 |     for batch_idx, (inputs, targets, weights) in enumerate(dataloaders_train):
240 |         if use_gpu:
241 |             inputs = Variable(inputs.cuda())  
242 |             targets = Variable(targets.cuda()) 
243 |             weights = Variable(weights.cuda())
244 |         data_time.update(time.time() - end)    
245 |         
246 |         optimizer_ft.zero_grad()
247 |         
248 |         outputs = model(inputs)
249 |         
250 |         if args.stage2:
251 |             loss = criterion(outputs, targets, weights) 
252 |         else:
253 |             loss = criterion(outputs, targets, lambda1) 
254 |         
255 |         loss.backward()
256 |         optimizer_ft.step()
257 |         
258 |         train_loss.update(loss.item(), inputs.size(0))
259 | 
260 |         # measure elapsed time
261 |         batch_time.update(time.time() - end)
262 |         end = time.time()
263 |                  
264 |         _, predicted = outputs.max(1)
265 |         correct += predicted.eq(targets).sum().item()
266 |         total += inputs.size(0)
267 |         
268 |         if batch_idx%10==0:
269 |             print('Epoch: [{}][{}/{}] '
270 |                   'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
271 |                   'Data: {data_time.val:.3f} ({data_time.avg:.3f}) '
272 |                   'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f}) '
273 |                   'Accu: {:.2f}'.format(
274 |                   epoch, batch_idx, len(dataloaders_train),100.*correct/total, batch_time=batch_time, data_time=data_time, train_loss=train_loss))
275 | 
276 |     writer.add_scalar('training acc (train)', 100.*correct/total, epoch)
277 |     writer.add_scalar('loss',  train_loss.avg, epoch)
278 | 
279 | 
280 | for epoch in range(start_epoch, start_epoch+41):
281 | 
282 |     # training
283 |     print('Start Training..........')
284 |     train_model(model, criterion, optimizer_ft, exp_lr_scheduler, epoch)
285 |     
286 |     # evaluation
287 |     if epoch%test_epoch ==0:
288 |         model.eval()  # Set model to evaluate mode
289 |         start = time.time()
290 |         cmc, mAP = get_test_acc(model, test_dataset, dataloaders_test, use_gpu, max_rank=10)
291 |         if cmc[0] > best_acc:
292 |             best_epoch = epoch
293 |             best_acc = cmc[0]
294 |             save_network(model, epoch, is_best = True)
295 |         print('Epoch {}: R1:{:.4%}   R5:{:.4%}  R10:{:.4%}  mAP:{:.4%} (Best Epoch[{}])'.format(
296 |             epoch, cmc[0],cmc[4],cmc[9], mAP ,best_epoch)) 
297 |         print('Epoch {}: R1:{:.4%}   R5:{:.4%}  R10:{:.4%}  mAP:{:.4%} (Best Epoch[{}])'.format(
298 |             epoch, cmc[0],cmc[4],cmc[9], mAP ,best_epoch), file = test_log_file) 
299 |         test_log_file.flush()
300 |         print('Evaluation time: {}'.format(time.time()-start))
301 |         
302 |     # if epoch%20==0:
303 |         # save_network(model, epoch, is_best = False)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ReID-Label-Noise
 2 | 
 3 | Demo Code for refining person re-identification model under label noise used in [1] and [2].
 4 | 
 5 | ## Highlight
 6 | 
 7 | The goal of this work is to learn a robust Re-ID model against different noise types. We introduce an online co-refining (CORE) framework with dynamic mutual learning, where networks and label predictions are online optimized collaboratively by distilling the knowledge from other peer networks.
 8 | 
 9 | <img src="./fig/core.png" width="600">
10 | 
11 | 
12 | ### 1. Prepare the Dataset
13 | 
14 | Note that the demo code use a re-oganized file structure so that the code can be seamlessly applied on three datasets, including Market1501, Duke-MTMC and CUHK03 datasets. The detailed description can be found in this [website](https://github.com/layumi/Person_reID_baseline_pytorch#dataset--preparation).
15 | 
16 | ```
17 | training/
18 |    |--id 1/
19 |       |--img 001001/
20 |       |--img 001002/
21 |    |--id 2/  
22 |       |--img 002001/
23 |       |--img 002002/
24 | 
25 | query/
26 | 
27 | gallery/
28 | ...                 
29 | ```
30 | 
31 | ### 2. Running the Code.
32 |   Train a model by
33 |   ```bash
34 | python train_core.py --dataset market --batchsize 32 --noise_ratio 0.2 --lr 0.01 --pattern
35 | ```
36 | 
37 |   - `--dataset`: which dataset "market" , "duke" or "cuhk03".
38 | 
39 |   - `--batchsize`: batch training size.
40 | 
41 |   - `--noise_ratio`: 0.2
42 | 
43 |   - `--lr`: initial learning rate.
44 |   
45 |   -  `--pattern`: "patterned noise" or "random noise".
46 |   
47 |   - `--gpu`:  which gpu to run.
48 | 
49 | You need mannully define the data path first.
50 | 
51 | **Parameters**: More parameters can be found in the script.
52 | 
53 | **Training Model**: The training models will be saved in `checkpoint/".
54 | 
55 | 
56 | ### 3. Citation
57 | 
58 | Please kindly cite this paper in your publications if it helps your research:
59 | ```
60 | @article{tip21core,
61 |   title={Collaborative Refining for Person Re-Identification with Label Noise},
62 |   author={Ye, Mang and Li, He and Du, Bo and Shen, Jianbing and Shao, Ling and Hoi, Steven C. H.},
63 |   journal={IEEE Transactions on Image Processing (TIP)},
64 |   year={2021},
65 | }
66 | 
67 | @article{tifs20noisy,
68 |   title={PurifyNet: A Robust Person Re-identification Model with Noisy Labels},
69 |   author={Ye, Mang and Yuen, Pong C.},
70 |   journal={IEEE Transactions on Information Forensics and Security (TIFS)},
71 |   volume={15},
72 |   pages={2655--2666},
73 |   year={2020},
74 | }
75 | ```
76 | 
77 | ###  4. References.
78 | 
79 | [1] M. Ye, H. Li, B. Du, J. Shen, L. Shao, and S. C., Hoi. 	Collaborative Refining for Person Re-Identification with Label Noise. IEEE Transactions on Image Processing (TIP), 2021.
80 | 
81 | [2] M. Ye and P. C. Yuen. PurifyNet: A Robust Person Re-identification Model with Noisy Labels. IEEE Transactions on Information Forensics and Security (TIFS), 2020.
82 | 


--------------------------------------------------------------------------------
/eval_lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | clean:
5 | 	rm -rf build
6 | 	rm -f eval.c *.so
7 | 


--------------------------------------------------------------------------------
/eval_lib/build/temp.linux-x86_64-3.7/eval.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mangye16/ReID-Label-Noise/89aa11f68c275a0bcff232d9a5c3ae152c9276af/eval_lib/build/temp.linux-x86_64-3.7/eval.o


--------------------------------------------------------------------------------
/eval_lib/cython_eval.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mangye16/ReID-Label-Noise/89aa11f68c275a0bcff232d9a5c3ae152c9276af/eval_lib/cython_eval.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/eval_lib/cython_eval.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mangye16/ReID-Label-Noise/89aa11f68c275a0bcff232d9a5c3ae152c9276af/eval_lib/cython_eval.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/eval_lib/cython_eval.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mangye16/ReID-Label-Noise/89aa11f68c275a0bcff232d9a5c3ae152c9276af/eval_lib/cython_eval.so


--------------------------------------------------------------------------------
/eval_lib/eval.pyx:
--------------------------------------------------------------------------------
  1 | # cython: boundscheck=False, wraparound=False, nonecheck=False, cdivision=True
  2 | 
  3 | cimport cython
  4 | cimport numpy as np
  5 | import numpy as np
  6 | 
  7 | cpdef eval_market1501_wrap(distmat,
  8 |         q_pids,
  9 |         g_pids,
 10 |         q_camids,
 11 |         g_camids,
 12 |         max_rank):
 13 |     distmat = np.asarray(distmat,dtype=np.float32)
 14 |     q_pids = np.asarray(q_pids, dtype=np.int64)
 15 |     g_pids = np.asarray(g_pids , dtype=np.int64)
 16 |     q_camids=np.asarray(q_camids,dtype=np.int64)
 17 |     g_camids=np.asarray(g_camids, dtype=np.int64)
 18 |     return eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, max_rank)
 19 | 
 20 | cpdef eval_market1501(
 21 |         float[:,:] distmat,
 22 |         long[:] q_pids,
 23 |         long[:] g_pids,
 24 |         long[:] q_camids,
 25 |         long[:] g_camids,
 26 |         long max_rank,
 27 | ):
 28 |     # return 0,0
 29 |     cdef:
 30 |         long num_q = distmat.shape[0], num_g = distmat.shape[1]
 31 | 
 32 |     if num_g < max_rank:
 33 |         max_rank = num_g
 34 |         print("Note: number of gallery samples is quite small, got {}".format(num_g))
 35 | 
 36 |     cdef:
 37 |         long[:,:] indices = np.argsort(distmat, axis=1)
 38 |         long[:,:] matches = (np.asarray(g_pids)[np.asarray(indices)] == np.asarray(q_pids)[:, np.newaxis]).astype(np.int64)
 39 |         float[:,:] all_cmc = np.zeros((num_q,max_rank),dtype=np.float32)
 40 |         float[:] all_AP = np.zeros(num_q,dtype=np.float32)
 41 | 
 42 |         long q_pid, q_camid
 43 |         long[:] order=np.zeros(num_g,dtype=np.int64), keep =np.zeros(num_g,dtype=np.int64)
 44 | 
 45 |         long num_valid_q = 0, q_idx, idx
 46 |         # long[:] orig_cmc=np.zeros(num_g,dtype=np.int64)
 47 |         float[:] orig_cmc=np.zeros(num_g,dtype=np.float32)
 48 |         float[:] cmc=np.zeros(num_g,dtype=np.float32), tmp_cmc=np.zeros(num_g,dtype=np.float32)
 49 |         long num_orig_cmc=0
 50 |         float num_rel=0.
 51 |         float tmp_cmc_sum =0.
 52 |         # num_orig_cmc is the valid size of orig_cmc, cmc and tmp_cmc
 53 |         unsigned int orig_cmc_flag=0
 54 | 
 55 |     for q_idx in range(num_q):
 56 |         # get query pid and camid
 57 |         q_pid = q_pids[q_idx]
 58 |         q_camid = q_camids[q_idx]
 59 |         # remove gallery samples that have the same pid and camid with query
 60 |         order = indices[q_idx]
 61 |         for idx in range(num_g):
 62 |             keep[idx] = ( g_pids[order[idx]] !=q_pid) or (g_camids[order[idx]]!=q_camid )
 63 |         # compute cmc curve
 64 |         num_orig_cmc=0
 65 |         orig_cmc_flag=0
 66 |         for idx in range(num_g):
 67 |             if keep[idx]:
 68 |                 orig_cmc[num_orig_cmc] = matches[q_idx][idx]
 69 |                 num_orig_cmc +=1
 70 |                 if matches[q_idx][idx]>1e-31:
 71 |                     orig_cmc_flag=1
 72 |         if not orig_cmc_flag:
 73 |             all_AP[q_idx]=-1
 74 |             # print('continue ', q_idx)
 75 |             # this condition is true when query identity does not appear in gallery
 76 |             continue
 77 |         my_cusum(orig_cmc,cmc,num_orig_cmc)
 78 |         for idx in range(num_orig_cmc):
 79 |             if cmc[idx] >1:
 80 |                 cmc[idx] =1
 81 |         all_cmc[q_idx] = cmc[:max_rank]
 82 |         num_valid_q+=1
 83 | 
 84 |         # print('ori cmc', np.asarray(orig_cmc).tolist())
 85 |         # print('cmc', np.asarray(cmc).tolist())
 86 |         # compute average precision
 87 |         # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
 88 |         num_rel = 0.
 89 |         for idx in range(num_orig_cmc):
 90 |             num_rel += orig_cmc[idx]
 91 |         my_cusum( orig_cmc, tmp_cmc, num_orig_cmc)
 92 |         for idx in range(num_orig_cmc):
 93 |             tmp_cmc[idx] = tmp_cmc[idx] / (idx+1.) * orig_cmc[idx]
 94 |         # print('tmp_cmc', np.asarray(tmp_cmc).tolist())
 95 | 
 96 |         tmp_cmc_sum=my_sum(tmp_cmc,num_orig_cmc)
 97 |         all_AP[q_idx] = tmp_cmc_sum / num_rel
 98 |         # print('final',tmp_cmc_sum, num_rel, tmp_cmc_sum / num_rel,'\n')
 99 | 
100 |     assert num_valid_q > 0, "Error: all query identities do not appear in gallery"
101 |     # print_dbg('all ap', all_AP)
102 |     # print_dbg('all cmc', all_cmc)
103 |     all_AP_np = np.asarray(all_AP)
104 |     all_AP_np[np.isclose(all_AP,-1)] = np.nan
105 |     return  np.asarray(all_cmc).astype(np.float32).sum(axis=0) / num_valid_q, \
106 |             np.nanmean(all_AP_np)
107 | 
108 | def print_dbg(msg, val):
109 |     print(msg, np.asarray(val))
110 | 
111 | cpdef void my_cusum(
112 |         cython.numeric[:] src,
113 |         cython.numeric[:] dst,
114 |         long size
115 |     ) nogil:
116 |     cdef:
117 |         long idx
118 |     for idx in range(size):
119 |         if idx==0:
120 |             dst[idx] = src[idx]
121 |         else:
122 |             dst[idx] = src[idx]+dst[idx-1]
123 | 
124 | cpdef cython.numeric my_sum(
125 |         cython.numeric[:] src,
126 |         long size
127 | ) nogil:
128 |     cdef:
129 |         long idx
130 |         cython.numeric ttl=0
131 |     for idx in range(size):
132 |         ttl+=src[idx]
133 |     return ttl
134 | 


--------------------------------------------------------------------------------
/eval_lib/eval_metrics.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | from __future__ import division
  4 | 
  5 | import numpy as np
  6 | import copy
  7 | from collections import defaultdict
  8 | import sys
  9 | 
 10 | try:
 11 |     from cython_eval import eval_market1501_wrap
 12 |     CYTHON_EVAL_AVAI = True
 13 |     print("Cython evaluation is AVAILABLE")
 14 | except ImportError:
 15 |     CYTHON_EVAL_AVAI = False
 16 |     print("Warning: Cython evaluation is UNAVAILABLE")
 17 | 
 18 | 
 19 | def eval_cuhk03(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, N=100):
 20 |     """Evaluation with cuhk03 metric
 21 |     Key: one image for each gallery identity is randomly sampled for each query identity.
 22 |     Random sampling is performed N times (default: N=100).
 23 |     """
 24 |     num_q, num_g = distmat.shape
 25 |     if num_g < max_rank:
 26 |         max_rank = num_g
 27 |         print("Note: number of gallery samples is quite small, got {}".format(num_g))
 28 |     indices = np.argsort(distmat, axis=1)
 29 |     matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)
 30 | 
 31 |     # compute cmc curve for each query
 32 |     all_cmc = []
 33 |     all_AP = []
 34 |     num_valid_q = 0. # number of valid query
 35 |     for q_idx in range(num_q):
 36 |         # get query pid and camid
 37 |         q_pid = q_pids[q_idx]
 38 |         q_camid = q_camids[q_idx]
 39 | 
 40 |         # remove gallery samples that have the same pid and camid with query
 41 |         order = indices[q_idx]
 42 |         remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
 43 |         keep = np.invert(remove)
 44 | 
 45 |         # compute cmc curve
 46 |         orig_cmc = matches[q_idx][keep] # binary vector, positions with value 1 are correct matches
 47 |         if not np.any(orig_cmc):
 48 |             # this condition is true when query identity does not appear in gallery
 49 |             continue
 50 | 
 51 |         kept_g_pids = g_pids[order][keep]
 52 |         g_pids_dict = defaultdict(list)
 53 |         for idx, pid in enumerate(kept_g_pids):
 54 |             g_pids_dict[pid].append(idx)
 55 | 
 56 |         cmc, AP = 0., 0.
 57 |         for repeat_idx in range(N):
 58 |             mask = np.zeros(len(orig_cmc), dtype=np.bool)
 59 |             for _, idxs in g_pids_dict.items():
 60 |                 # randomly sample one image for each gallery person
 61 |                 rnd_idx = np.random.choice(idxs)
 62 |                 mask[rnd_idx] = True
 63 |             masked_orig_cmc = orig_cmc[mask]
 64 |             _cmc = masked_orig_cmc.cumsum()
 65 |             _cmc[_cmc > 1] = 1
 66 |             cmc += _cmc[:max_rank].astype(np.float32)
 67 |             # compute AP
 68 |             num_rel = masked_orig_cmc.sum()
 69 |             tmp_cmc = masked_orig_cmc.cumsum()
 70 |             tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)]
 71 |             tmp_cmc = np.asarray(tmp_cmc) * masked_orig_cmc
 72 |             AP += tmp_cmc.sum() / num_rel
 73 |         cmc /= N
 74 |         AP /= N
 75 |         all_cmc.append(cmc)
 76 |         all_AP.append(AP)
 77 |         num_valid_q += 1.
 78 | 
 79 |     assert num_valid_q > 0, "Error: all query identities do not appear in gallery"
 80 | 
 81 |     all_cmc = np.asarray(all_cmc).astype(np.float32)
 82 |     all_cmc = all_cmc.sum(0) / num_valid_q
 83 |     mAP = np.mean(all_AP)
 84 | 
 85 |     return all_cmc, mAP
 86 | 
 87 | 
 88 | def eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, max_rank):
 89 |     """Evaluation with market1501 metric
 90 |     Key: for each query identity, its gallery images from the same camera view are discarded.
 91 |     """
 92 |     num_q, num_g = distmat.shape
 93 |     if num_g < max_rank:
 94 |         max_rank = num_g
 95 |         print("Note: number of gallery samples is quite small, got {}".format(num_g))
 96 |     indices = np.argsort(distmat, axis=1)
 97 |     # print(indices)
 98 |     matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)
 99 |     # print(matches)
100 | 
101 |     # compute cmc curve for each query
102 |     all_cmc = []
103 |     all_AP = []
104 |     num_valid_q = 0. # number of valid query
105 |     for q_idx in range(num_q):
106 | 
107 |         # get query pid and camid
108 |         q_pid = q_pids[q_idx]
109 |         q_camid = q_camids[q_idx]
110 |         print('q_pid: ', q_pid)
111 |         print('q_camid: ', q_camid)
112 |         # remove gallery samples that have the same pid and camid with query
113 |         order = indices[q_idx]
114 |         remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
115 |         keep = np.invert(remove)
116 | 
117 |         # compute cmc curve
118 |         orig_cmc = matches[q_idx][keep] # binary vector, positions with value 1 are correct matches
119 |         if not np.any(orig_cmc):
120 |             # this condition is true when query identity does not appear in gallery
121 |             continue
122 |         cmc = orig_cmc.cumsum()
123 |         # print(cmc)
124 |         cmc[cmc >= 1] = 1
125 |         all_cmc.append(cmc[:max_rank])
126 |         num_valid_q += 1.
127 |         
128 |         # compute average precision
129 |         # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
130 |         num_rel = orig_cmc.sum()
131 |         # print(num_rel)
132 |         tmp_cmc = orig_cmc.cumsum()
133 |         tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)]
134 |         tmp_cmc = np.asarray(tmp_cmc) * orig_cmc
135 |         AP = tmp_cmc.sum() / num_rel
136 |         # print(all_AP)
137 |         all_AP.append(AP)
138 | 
139 |     assert num_valid_q > 0, "Error: all query identities do not appear in gallery"
140 |     print(num_valid_q)
141 |     all_cmc = np.asarray(all_cmc).astype(np.float32)
142 |     print(all_cmc.sum(0))
143 |     all_cmc = all_cmc.sum(0) / num_valid_q
144 |     print(all_cmc)
145 |     mAP = np.mean(all_AP)
146 | 
147 |     return all_cmc, mAP
148 | 
149 | 
150 | def evaluate(distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50, use_metric_cuhk03=False, use_cython=True):
151 |     if use_metric_cuhk03:
152 |         return eval_cuhk03(distmat, q_pids, g_pids, q_camids, g_camids, max_rank)
153 |     else:
154 |         if use_cython and CYTHON_EVAL_AVAI:
155 |             return eval_market1501_wrap(distmat, q_pids, g_pids, q_camids, g_camids, max_rank)
156 |         else:
157 |             return eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, max_rank)


--------------------------------------------------------------------------------
/eval_lib/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from distutils.core import setup
 3 | from distutils.extension import Extension
 4 | from Cython.Distutils import build_ext
 5 | 
 6 | try:
 7 |     numpy_include = np.get_include()
 8 | except AttributeError:
 9 |     numpy_include = np.get_numpy_include()
10 | print(numpy_include)
11 | 
12 | ext_modules = [Extension("cython_eval",
13 |                          ["eval.pyx"],
14 |                          libraries=["m"],
15 |                          include_dirs=[numpy_include],
16 |                          extra_compile_args=["-ffast-math", "-Wno-cpp", "-Wno-unused-function"]
17 |                          ),
18 |                ]
19 | 
20 | setup(
21 |     name='eval_lib',
22 |     cmdclass={"build_ext": build_ext},
23 |     ext_modules=ext_modules)
24 | 


--------------------------------------------------------------------------------
/eval_lib/test_cython_eval.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function
 2 | import sys, os
 3 | 
 4 | sys.path.insert(
 5 |     0, os.path.dirname(os.path.abspath(__file__)) + '/..'
 6 | )
 7 | 
 8 | try:
 9 |     from eval_lib.cython_eval import eval_market1501_wrap
10 | except ImportError:
11 |     print("Error: eval.pyx not compiled, please do 'make' before running 'python test.py'. exit")
12 |     sys.exit()
13 | 
14 | from eval_metrics import eval_market1501
15 | import numpy as np
16 | import time
17 | 
18 | num_q = 100
19 | num_g = 1000
20 | 
21 | distmat = np.random.rand(num_q, num_g) * 20
22 | q_pids = np.random.randint(0, num_q, size=num_q)
23 | g_pids = np.random.randint(0, num_g, size=num_g)
24 | q_camids = np.random.randint(0, 5, size=num_q)
25 | g_camids = np.random.randint(0, 5, size=num_g)
26 | 
27 | end = time.time()
28 | cmc, mAP = eval_market1501_wrap(distmat,
29 |                                 q_pids,
30 |                                 g_pids,
31 |                                 q_camids,
32 |                                 g_camids, 10)
33 | elapsed_cython = time.time() - end
34 | print("=> Cython evaluation")
35 | print("consume time {:.5f} \n mAP is {} \n cmc is {}".format(elapsed_cython, mAP, cmc))
36 | 
37 | end = time.time()
38 | cmc, mAP = eval_market1501(distmat,
39 |                            q_pids,
40 |                            g_pids,
41 |                            q_camids,
42 |                            g_camids, 10)
43 | elapsed_python = time.time() - end
44 | print("=> Python evaluation")
45 | print("consume time {:.5f} \n mAP is {} \n cmc is {}".format(elapsed_python, mAP, cmc))
46 | 
47 | xtimes = elapsed_python / elapsed_cython
48 | print("=> Conclusion: cython is {:.2f}x faster than python".format(xtimes))


--------------------------------------------------------------------------------
/eval_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | from torch.optim import lr_scheduler
  5 | from torch.autograd import Variable
  6 | import torchvision
  7 | from torchvision import datasets, models, transforms
  8 | from eval_lib.cython_eval import eval_market1501_wrap
  9 | from model import ft_net
 10 | import pdb
 11 | import numpy as np
 12 | import scipy.io
 13 | import time
 14 | import sys, os
 15 | import argparse
 16 | 
 17 | def load_network(network, model_name, which_epoch):
 18 |     save_path = os.path.join('./model',model_name,'net_%s.pth'%which_epoch)
 19 |     # network.load_state_dict(torch.load(save_path))
 20 |     network.load_state_dict(torch.load(save_path))
 21 |     return network
 22 | 
 23 | def fliplr(img):
 24 |     '''flip horizontal'''
 25 |     inv_idx = torch.arange(img.size(3)-1,-1,-1).long()  # N x C x H x W
 26 |     img_flip = img.index_select(3,inv_idx)
 27 |     return img_flip
 28 | 
 29 | def extract_feature(model,dataloaders):
 30 |     features = torch.FloatTensor()
 31 |     count = 0
 32 |     for data in dataloaders:
 33 |         img, label = data
 34 |         n, c, h, w = img.size()
 35 |         count += n
 36 |         # if count%100 == 0:
 37 |         #     print(count)
 38 |         ff = torch.FloatTensor(n,2048).zero_()
 39 |         # add original and flipped features
 40 |         for i in range(2):
 41 |             if(i==1):
 42 |                 img = fliplr(img)
 43 |             input_img = Variable(img.cuda())
 44 |             _, outputs = model(input_img) 
 45 |             f = outputs.data.cpu()
 46 |             ff = ff+f
 47 |         # norm feature
 48 |         fnorm = torch.norm(ff, p=2, dim=1, keepdim=True)
 49 |         ff = ff.div(fnorm.expand_as(ff))
 50 |         features = torch.cat((features,ff), 0)
 51 |     return features
 52 | 
 53 | def extract_feature_v1(model, dataloaders):
 54 |     features = torch.FloatTensor()
 55 |     count = 0
 56 |     new_label = []
 57 |     for data in dataloaders:
 58 |         img, label, _ = data
 59 |         n, c, h, w = img.size()
 60 |         count += n
 61 |         # if count%100 == 0:
 62 |         #     print(count)
 63 |         ff = torch.FloatTensor(n,2048).zero_()
 64 |         # add original and flipped features
 65 |         for i in range(2):
 66 |             if(i==1):
 67 |                 img = fliplr(img)
 68 |             input_img = Variable(img.cuda())
 69 |             logits, outputs = model(input_img) 
 70 |             f = outputs.data.cpu()
 71 |             ff = ff+f
 72 |         # norm feature
 73 |         fnorm = torch.norm(ff, p=2, dim=1, keepdim=True)
 74 |         ff = ff.div(fnorm.expand_as(ff))
 75 |         features = torch.cat((features,ff), 0)
 76 |         _, tmp_label = logits.max(1)
 77 |         new_label = np.append(new_label,tmp_label)
 78 |         # predicted labels 
 79 |         
 80 |         
 81 |     return features, new_label
 82 | 
 83 | def get_id(img_path):
 84 |     camera_id = []
 85 |     labels = []
 86 |     for path, v in img_path:
 87 |         filename = os.path.basename(path)
 88 |         label = filename[0:4]
 89 |         camera = filename.split('c')[1]
 90 |         labels.append(int(label))
 91 |         camera_id.append(int(camera[0]))
 92 |     return camera_id, labels
 93 | 
 94 | def get_id_CUHK(img_path):
 95 |     camera_id = []
 96 |     labels = []
 97 |     for path, v in img_path:
 98 |         filename = path.split('/')[-1]
 99 |         camera = 2*(int(filename.split('_')[0])-1) + int(filename.split('_')[2])
100 |         label = path.split('/')[-2]
101 |         labels.append(int(label))
102 |         camera_id.append(int(camera))
103 |     return camera_id, labels
104 | 
105 | def get_id_CUHK_v1(img_path):
106 |     camera_id = []
107 |     labels = []
108 |     for path, v, _ in img_path:
109 |         filename = path.split('/')[-1]
110 |         camera = 2*(int(filename.split('_')[0])-1) + int(filename.split('_')[2])
111 |         label = path.split('/')[-2]
112 |         labels.append(int(label))
113 |         camera_id.append(int(camera))
114 |     return camera_id, labels
115 | 
116 | def get_id_v1(img_path):
117 |     camera_id = []
118 |     labels = []
119 |     for path, v, _ in img_path:
120 |         filename = os.path.basename(path)
121 |         label = filename[0:4]
122 |         camera = filename.split('c')[1]
123 |         labels.append(int(label))
124 |         camera_id.append(int(camera[0]))
125 |     return camera_id, labels
126 | 
127 | def get_test_acc(model, image_datasets, dataloaders, use_gpu, max_rank=10):
128 |     
129 |     gallery_path = image_datasets['gallery'].imgs
130 |     query_path = image_datasets['query'].imgs
131 |     try:
132 |         g_camid,g_pid = get_id(gallery_path)
133 |         q_camid,q_pid = get_id(query_path)
134 |     except:
135 |         g_camid,g_pid = get_id_CUHK(gallery_path)
136 |         q_camid,q_pid = get_id_CUHK(query_path) 
137 |         
138 |     if use_gpu:
139 |         model = model.cuda()
140 |     # Extract feature
141 |     g_feas = extract_feature(model,dataloaders['gallery'])
142 |     # print(g_feas.shape)
143 |     # print("Extracted features for gallery set, obtained {}-by-{} matrix".format(g_feas.size(0), g_feas.size(1)))
144 |     q_feas = extract_feature(model,dataloaders['query'])
145 |     # print("Extracted features for gallery set, obtained {}-by-{} matrix".format(q_feas.size(0), q_feas.size(1)))
146 | 
147 |     distmat = np.matmul(q_feas.data.numpy(),np.transpose(g_feas.data.numpy()))*(-1.0)
148 |     CMC, mAP = eval_market1501_wrap(distmat, q_pid, g_pid, q_camid, g_camid, max_rank=10)
149 |     return CMC, mAP
150 | 
151 | 
152 | def extr_fea_train(model, image_datasets, dataloaders, use_gpu):
153 |     
154 |     gallery_path = image_datasets.imgs
155 |     # pdb.set_trace()
156 |     try:
157 |         g_camid,g_pid = get_id_v1(gallery_path)
158 |     except:
159 |         g_camid,g_pid = get_id_CUHK_v1(gallery_path)
160 |         
161 |     if use_gpu:
162 |         model = model.cuda()
163 |     # Extract feature
164 |     g_feas, pre_ids = extract_feature_v1(model, dataloaders)
165 |     # print("Extracted features for gallery set, obtained {}-by-{} matrix".format(g_feas.size(0), g_feas.size(1)))
166 | 
167 |     return g_feas, pre_ids
168 |     
169 | def extr_fea_train_un(model, image_datasets, dataloaders, use_gpu):
170 |     
171 |     gallery_path = image_datasets.imgs
172 |     # pdb.set_trace()
173 |     # try:
174 |         # g_camid,g_pid = get_id_v1(gallery_path)
175 |     # except:
176 |         # g_camid,g_pid = get_id_CUHK_v1(gallery_path)
177 |         
178 |     if use_gpu:
179 |         model = model.cuda()
180 |     # Extract feature
181 |     g_feas = extract_feature_v1(model,dataloaders)
182 |     # print("Extracted features for gallery set, obtained {}-by-{} matrix".format(g_feas.size(0), g_feas.size(1)))
183 | 
184 |     return g_feas
185 | 
186 | 
187 | def extract_train_second_label(model, dataloaders):
188 |     features = torch.FloatTensor()
189 |     count = 0
190 |     second_label = []
191 |     for data in dataloaders:
192 |         img, label, _ = data
193 |         n, c, h, w = img.size()
194 |         count += n
195 |         ff = torch.FloatTensor(n, 2048).zero_()
196 |         # add original and flipped features
197 |         for i in range(2):
198 |             if (i == 1):
199 |                 img = fliplr(img)
200 |             input_img = Variable(img.cuda())
201 |             logits, outputs = model(input_img)
202 |             f = outputs.data.cpu()
203 |             ff = ff + f
204 |         # norm feature
205 |         fnorm = torch.norm(ff, p=2, dim=1, keepdim=True)
206 |         ff = ff.div(fnorm.expand_as(ff))
207 |         features = torch.cat((features, ff), 0)
208 | 
209 |         # predicted second labels
210 |         _, top2 = logits.topk(2,1)
211 |         top2= top2.cpu()
212 |         tmp_label = top2[:,1]
213 |         for ii in range(len(label)):
214 |             if not top2[ii,0]==label[ii]:
215 |                 tmp_label[ii] = top2[ii,0]
216 |         second_label = np.append(second_label, tmp_label)
217 | 
218 |     return features, second_label
219 |     
220 | def extract_train_pred_label(model, dataloaders):
221 |     features = torch.FloatTensor()
222 |     count = 0
223 |     pred_label = []
224 |     for data in dataloaders:
225 |         img, label, _ = data
226 |         n, c, h, w = img.size()
227 |         count += n
228 |         ff = torch.FloatTensor(n, 2048).zero_()
229 |         # add original and flipped features
230 |         for i in range(2):
231 |             if (i == 1):
232 |                 img = fliplr(img)
233 |             input_img = Variable(img.cuda())
234 |             logits, outputs = model(input_img)
235 |             f = outputs.data.cpu()
236 |             ff = ff + f
237 |         # norm feature
238 |         fnorm = torch.norm(ff, p=2, dim=1, keepdim=True)
239 |         ff = ff.div(fnorm.expand_as(ff))
240 |         features = torch.cat((features, ff), 0)
241 | 
242 |         # predicted second labels
243 |         _, top2 = logits.topk(2,1)
244 |         top2= top2.cpu()
245 |         tmp_label = top2[:,0]
246 |         for ii in range(len(label)):
247 |             if not top2[ii,0]==label[ii]:
248 |                 tmp_label[ii] = top2[ii,0]
249 |         pred_label = np.append(pred_label, tmp_label)
250 | 
251 |     return features, pred_label


--------------------------------------------------------------------------------
/fig/core.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mangye16/ReID-Label-Noise/89aa11f68c275a0bcff232d9a5c3ae152c9276af/fig/core.png


--------------------------------------------------------------------------------
/loader.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torchvision import transforms
 4 | from torch.utils.data import Dataset, DataLoader
 5 | from PIL import Image
 6 | import numpy as np
 7 | import os,sys
 8 | # import matplotlib.pyplot as plt
 9 | 
10 | def default_loader(path):
11 |     return Image.open(path).convert('RGB')
12 | 
13 | def make_dataset(data_dir, class_to_idx):
14 |     images = []
15 |     for target in sorted(class_to_idx.keys()):
16 |         d = os.path.join(data_dir, target)
17 |         instan_num = len(os.listdir(d))
18 |         weight0 = 1.0/instan_num
19 |         # weight0 = 1.0
20 |         for root, _, fnames in sorted(os.walk(d)):
21 |             for fname in sorted(fnames):
22 |                 if os.path.exists(os.path.join(root, fname)):
23 |                     path = os.path.join(root, fname)
24 |                     item = (path, class_to_idx[target],weight0)
25 |                     images.append(item)
26 |     return images
27 | 
28 | class DatasetFolder(Dataset):
29 |     def __init__(self, root, transform=None, target_transform=None, loader=default_loader):
30 |         classes, class_to_idx = self._find_classes(root)
31 |         samples = make_dataset(root, class_to_idx)
32 |         if len(samples) == 0:
33 |             raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n"))
34 |         self.root = root
35 |         self.loader = loader
36 |         self.classes = classes
37 |         self.class_to_idx = class_to_idx
38 |         self.imgs = samples
39 |         self.targets = [s[1] for s in samples]
40 |         class_sample_counts = np.unique([s[1] for s in samples], return_counts=True)[1]
41 |         # weight_dem = sum(1. / class_sample_counts)
42 |         weight_dem = 1
43 |         self.weights = [s[2]/weight_dem for s in samples] 
44 |         self.transform = transform
45 |         self.target_transform = target_transform
46 | 
47 |     def _find_classes(self, dir):
48 |         if sys.version_info >= (3, 5):
49 |             classes = [d.name for d in os.scandir(dir) if d.is_dir()]
50 |         else:
51 |             classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
52 |         classes.sort()
53 |         class_to_idx = {classes[i]: i for i in range(len(classes))}
54 |         return classes, class_to_idx
55 | 
56 |     def __getitem__(self, index):
57 |         path, target, weigit = self.imgs[index]
58 |         # weigit = self.weights[index]
59 |         sample = self.loader(path)
60 |         if self.transform is not None:
61 |             sample = self.transform(sample)
62 |         if self.target_transform is not None:
63 |             target = self.target_transform(target)
64 |         return sample, target, weigit
65 | 
66 |     def __len__(self):
67 |         return len(self.imgs)
68 | 
69 | 


--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.autograd as autograd
  5 | import math
  6 | from torch.nn.modules import loss
  7 | 
  8 | 
  9 | def class_select(logits, target):
 10 |     batch_size, num_classes = logits.size()
 11 |     if target.is_cuda:
 12 |         device = target.data.get_device()
 13 |         one_hot_mask = autograd.Variable(torch.arange(0, num_classes)
 14 |                                                .long()
 15 |                                                .repeat(batch_size, 1)
 16 |                                                .cuda(device)
 17 |                                                .eq(target.data.repeat(num_classes, 1).t()))
 18 |     else:
 19 |         one_hot_mask = autograd.Variable(torch.arange(0, num_classes)
 20 |                                                .long()
 21 |                                                .repeat(batch_size, 1)
 22 |                                                .eq(target.data.repeat(num_classes, 1).t()))
 23 |     return logits.masked_select(one_hot_mask)
 24 | 
 25 | 
 26 | class FocalLoss(nn.Module):
 27 |     def __init__(self, num_classes, gamma=2, alpha=0.25, aggregate='mean'):
 28 |         super(FocalLoss, self).__init__()
 29 |         assert aggregate in ['sum', 'mean', None]
 30 |         self.aggregate = aggregate
 31 |         self.alpha = alpha
 32 |         # self.alpha = Variable(torch.ones(num_classes)*alpha)
 33 |         self.gamma = gamma
 34 |         self.num_classes = num_classes
 35 |         print('Initializing FocalLoss for training: alpha={}, gamma={}'.format(self.alpha, self.gamma))
 36 | 
 37 |     def forward(self, input, target, weights=None):
 38 |         assert input.dim() == 2
 39 |         assert not target.requires_grad
 40 |         target = target.squeeze(1) if target.dim() == 2 else target
 41 |         assert target.dim() == 1
 42 | 
 43 |         logpt = F.log_softmax(input, dim=1)
 44 |         logpt_gt = logpt.gather(1,target.unsqueeze(1))
 45 |         logpt_gt = logpt_gt.view(-1)
 46 |         pt_gt = logpt_gt.exp()
 47 |         assert logpt_gt.size() == pt_gt.size()
 48 |         
 49 |         loss = -self.alpha*(torch.pow((1-pt_gt), self.gamma))*logpt_gt
 50 |         
 51 |         if self.aggregate == 'sum':
 52 |             return loss.sum()
 53 |         elif self.aggregate == 'mean':
 54 |             return loss.mean()
 55 |         elif self.aggregate is None:
 56 |             return loss
 57 | 
 58 | class InstanceCrossEntropyLoss(nn.Module):
 59 |     """
 60 |     Cross entropy with instance-wise weights. Leave `aggregate` to None to obtain a loss
 61 |     vector of shape (batch_size,).
 62 |     """
 63 |     def __init__(self, aggregate='mean', weighted=0):
 64 |         super(InstanceCrossEntropyLoss, self).__init__()
 65 |         assert aggregate in ['sum', 'mean', None]
 66 |         self.aggregate = aggregate
 67 |         self.weighted = weighted
 68 |         print('Initializing InstanceCrossEntropyLoss for training: with weights{}'.format(self.weighted))
 69 |         if self.weighted == 1:
 70 |             print('Weighted loss is used...')
 71 | 
 72 |     def forward(self, logits, target, weights=None):
 73 |         assert logits.dim() == 2
 74 |         assert not target.requires_grad
 75 |         target = target.squeeze(1) if target.dim() == 2 else target
 76 |         assert target.dim() == 1
 77 |         softmax_result = F.log_softmax(logits, dim=1)
 78 |         loss = class_select(-softmax_result, target)
 79 | 
 80 |         if self.weighted == 1 or self.weighted == 2:
 81 |             assert list(loss.size()) == list(weights.size())
 82 |             loss = weights * loss
 83 | 
 84 |         if self.aggregate == 'sum':
 85 |             return loss.sum()
 86 |         elif self.aggregate == 'mean':
 87 |             return loss.mean()
 88 |         elif self.aggregate is None:
 89 |             return loss
 90 | 
 91 | 
 92 | class SmoothlabelCrossEntropyLoss(nn.Module):
 93 |     def __init__(self, beta=1.0, aggregate='mean', weighted=0):
 94 |         super(SmoothlabelCrossEntropyLoss, self).__init__()
 95 |         assert aggregate in ['sum', 'mean', None]
 96 |         self.aggregate = aggregate
 97 |         self.weighted = weighted
 98 |         self.beta = beta
 99 |         print('Initializing SmoothlabelCrossEntropyLoss for training: beta={}, weights={}'.format(self.beta, self.weighted))
100 |         if self.weighted == 1:
101 |             print('Weighted loss is used...')
102 |             
103 |     def forward(self, input, target, weights=None):
104 |         assert input.dim() == 2
105 |         assert not target.requires_grad
106 |         target = target.squeeze(1) if target.dim() == 2 else target
107 |         assert target.dim() == 1
108 | 
109 |         logpt = F.log_softmax(input, dim=1)
110 |         logpt_gt = logpt.gather(1,target.unsqueeze(1))
111 |         logpt_gt = logpt_gt.view(-1)
112 |         logpt_pred,_ = torch.max(logpt,1)
113 |         logpt_pred = logpt_pred.view(-1)
114 |         assert logpt_gt.size() == logpt_pred.size()
115 |         loss = - logpt_gt - self.beta* logpt_pred
116 |         
117 |         if self.weighted == 1 or self.weighted == 2:
118 |             assert list(loss.size()) == list(weights.size())
119 |             loss = loss * weights
120 |         if self.aggregate == 'sum':
121 |             return loss.sum()
122 |         elif self.aggregate == 'mean':
123 |             return loss.mean()
124 |         elif self.aggregate is None:
125 |             return loss
126 | 
127 | class SmoothlabelClassCrossEntropyLoss(nn.Module):
128 |     def __init__(self, beta=0.0, aggregate='mean', weighted=0):
129 |         super(SmoothlabelClassCrossEntropyLoss, self).__init__()
130 |         assert aggregate in ['sum', 'mean', None]
131 |         self.aggregate = aggregate
132 |         self.weighted = weighted
133 |         self.beta = beta
134 |         print('Initializing SmoothlabelClassCrossEntropyLoss for training: beta={}, weights={}'.format(self.beta, self.weighted))
135 |         if self.weighted == 1:
136 |             print('Weighted loss is used...')
137 |             
138 |     def forward(self, input, target, weights=None):
139 |         assert input.dim() == 2
140 |         assert not target.requires_grad
141 |         target = target.squeeze(1) if target.dim() == 2 else target
142 |         assert target.dim() == 1
143 | 
144 |         logpt = F.log_softmax(input, dim=1)
145 |         logpt_gt = logpt.gather(1,target.unsqueeze(1))
146 |         logpt_gt = logpt_gt.view(-1)
147 |         logpt_pred,_ = torch.max(logpt,1)
148 |         logpt_pred = logpt_pred.view(-1)
149 |         assert logpt_gt.size() == logpt_pred.size()
150 |         loss = - (1-self.beta)*logpt_gt - self.beta* logpt_pred
151 |         
152 |         if self.weighted == 1:
153 |             assert list(loss.size()) == list(weights.size())
154 |             loss = loss * weights.exp()
155 |         if self.aggregate == 'sum':
156 |             return loss.sum()
157 |         elif self.aggregate == 'mean':
158 |             return loss.mean()
159 |         elif self.aggregate is None:
160 |             return loss
161 |             
162 | class LabelRefineLoss(nn.Module):
163 |     def __init__(self, lambda1=0.0, aggregate='mean'):
164 |         super(LabelRefineLoss, self).__init__()
165 |         assert aggregate in ['sum', 'mean', None]
166 |         self.aggregate = aggregate
167 |         self.lambda1 = lambda1
168 |         print('Initializing LabelRefineLoss for training: lambda1={}'.format(self.lambda1))
169 |             
170 |     def forward(self, input, target, lambda1):
171 |         assert input.dim() == 2
172 |         assert not target.requires_grad
173 |         target = target.squeeze(1) if target.dim() == 2 else target
174 |         assert target.dim() == 1
175 | 
176 |         logpt = F.log_softmax(input, dim=1)
177 |         logpt_gt = logpt.gather(1,target.unsqueeze(1))
178 |         logpt_gt = logpt_gt.view(-1)
179 |         logpt_pred,_ = torch.max(logpt,1)
180 |         logpt_pred = logpt_pred.view(-1)
181 |         assert logpt_gt.size() == logpt_pred.size()
182 |         loss = - (1-lambda1)*logpt_gt - lambda1* logpt_pred
183 |         
184 |         if self.aggregate == 'sum':
185 |             return loss.sum()
186 |         elif self.aggregate == 'mean':
187 |             return loss.mean()
188 |         elif self.aggregate is None:
189 |             return loss
190 |             
191 | class InstanceWeightLoss(nn.Module):
192 |     """
193 |     Cross entropy with instance-wise weights. Leave `aggregate` to None to obtain a loss
194 |     vector of shape (batch_size,).
195 |     """
196 |     def __init__(self, aggregate='mean', weighted=0):
197 |         super(InstanceWeightLoss, self).__init__()
198 |         assert aggregate in ['sum', 'mean', None]
199 |         self.aggregate = aggregate
200 |         self.weighted = weighted
201 |         print('Initializing Instance Weight for training: with weights{}'.format(self.weighted))
202 |         if self.weighted == 1:
203 |             print('Weighted loss is used...')
204 | 
205 |     def forward(self, logits, target, weights=None):
206 |         assert logits.dim() == 2
207 |         assert not target.requires_grad
208 |         target = target.squeeze(1) if target.dim() == 2 else target
209 |         assert target.dim() == 1
210 |         softmax_result = F.log_softmax(logits, dim=1)
211 |         loss = class_select(-softmax_result, target)
212 | 
213 |         if self.weighted == 1 or self.weighted == 2:
214 |             assert list(loss.size()) == list(weights.size())
215 |             # pdb.set_trace()
216 |             loss = weights * loss
217 | 
218 |         if self.aggregate == 'sum':
219 |             return loss.sum()
220 |         elif self.aggregate == 'mean':
221 |             return loss.mean()
222 |         elif self.aggregate is None:
223 |             return loss
224 | 
225 | class CoRefineLoss(loss._Loss):
226 | 
227 |     def __init__(self, lambda1=0.0, aggregate='mean'):
228 |         super(CoRefineLoss, self).__init__()
229 |         assert aggregate in ['sum', 'mean', None]
230 |         self.aggregate = aggregate
231 |         self.lambda1 = lambda1
232 | 
233 |         """The KL-Divergence loss for the model and refined labels output.
234 |         output must be a pair of (model_output, refined_labels), both NxC tensors.
235 |         The rows of refined_labels must all add up to one (probability scores);
236 |         however, model_output must be the pre-softmax output of the network."""
237 | 
238 |     def forward(self, output1, output2, target, lambdaKL = 0):
239 | 
240 |         # Target is ignored at training time. Loss is defined as KL divergence
241 |         # between the model output and the refined labels.
242 |         if output2.requires_grad:
243 |             raise ValueError("Refined labels should not require gradients.")
244 | 
245 |         output1_log_prob = F.log_softmax(output1, dim=1)
246 |         output2_prob = F.softmax(output2, dim=1)
247 | 
248 |         _, pred_label = output2_prob.max(1)
249 | 
250 |         # Loss is normal cross entropy loss
251 |         base_loss = F.cross_entropy(output1, pred_label)
252 | 
253 | 
254 |         # Loss is -dot(model_output_log_prob, refined_labels). Prepare tensors
255 |         # for batch matrix multiplicatio
256 | 
257 |         model_output1_log_prob = output1_log_prob.unsqueeze(2)
258 |         model_output2_prob = output2_prob.unsqueeze(1)
259 | 
260 |         # Compute the loss, and average/sum for the batch.
261 |         kl_loss = -torch.bmm(model_output2_prob, model_output1_log_prob)
262 |         if self.aggregate == 'mean':
263 |             loss_co = base_loss.mean() + lambdaKL * kl_loss.mean()
264 |         else:
265 |             loss_co = base_loss.sum() + lambdaKL * kl_loss.sum()
266 |         return loss_co
267 | 
268 | class CoRefineLossPLus(loss._Loss):
269 | 
270 |         def __init__(self, lambda1=0.0, aggregate='mean'):
271 |             super(CoRefineLossPLus, self).__init__()
272 |             assert aggregate in ['sum', 'mean', None]
273 |             self.aggregate = aggregate
274 |             self.lambda1 = lambda1
275 | 
276 |             """The KL-Divergence loss for the model and refined labels output.
277 |             output must be a pair of (model_output, refined_labels), both NxC tensors.
278 |             The rows of refined_labels must all add up to one (probability scores);
279 |             however, model_output must be the pre-softmax output of the network."""
280 | 
281 |         def forward(self, output1, output2, target, lambdaKL=0):
282 | 
283 |             # Target is ignored at training time. Loss is defined as KL divergence
284 |             # between the model output and the refined labels.
285 |             if output2.requires_grad:
286 |                 raise ValueError("Refined labels should not require gradients.")
287 | 
288 |             output1_log_prob = F.log_softmax(output1, dim=1)
289 |             output2_prob = F.softmax(output2, dim=1)
290 | 
291 |             _, pred_label2 = output2_prob.max(1)
292 |             _, pred_label1 = output1_log_prob.max(1)
293 | 
294 |             # compute the mask
295 |             mask = pred_label2.eq(pred_label1)
296 |             # Loss is normal cross entropy loss
297 |             base_loss = F.cross_entropy(output1, pred_label2)
298 |             base_loss = base_loss * mask.float()
299 | 
300 | 
301 |             # Loss is -dot(model_output_log_prob, refined_labels). Prepare tensors
302 |             # for batch matrix multiplicatio
303 | 
304 |             model_output1_log_prob = output1_log_prob.unsqueeze(2)
305 |             model_output2_prob = output2_prob.unsqueeze(1)
306 | 
307 | 
308 |             # Compute the loss, and average/sum for the batch.
309 |             kl_loss = -torch.bmm(model_output2_prob, model_output1_log_prob)
310 |             if self.aggregate == 'mean':
311 |                 loss_co = base_loss.mean() + lambdaKL * kl_loss.mean()
312 |             else:
313 |                 loss_co = base_loss.sum() + lambdaKL * kl_loss.sum()
314 |             return loss_co


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn import init
  4 | from torchvision import models
  5 | from torch.autograd import Variable
  6 | 
  7 | # #####################################################################
  8 | def weights_init_kaiming(m):
  9 |     classname = m.__class__.__name__
 10 |     # print(classname)
 11 |     if classname.find('Conv') != -1:
 12 |         init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
 13 |     elif classname.find('Linear') != -1:
 14 |         init.kaiming_normal_(m.weight.data, a=0, mode='fan_out')
 15 |         init.constant_(m.bias.data, 0.0)
 16 |     elif classname.find('BatchNorm1d') != -1:
 17 |         init.normal_(m.weight.data, 1.0, 0.02)
 18 |         init.constant_(m.bias.data, 0.0)
 19 | 
 20 | def weights_init_classifier(m):
 21 |     classname = m.__class__.__name__
 22 |     if classname.find('Linear') != -1:
 23 |         init.normal_(m.weight.data, std=0.001)
 24 |         init.constant_(m.bias.data, 0.0)
 25 | 
 26 | # Defines the new fc layer and classification layer
 27 | # |--Linear--|--bn--|--relu--|--Linear--|
 28 | class ClassBlock(nn.Module):
 29 |     def __init__(self, input_dim, class_num, dropout=True, relu=True, num_bottleneck=512):
 30 |         super(ClassBlock, self).__init__()
 31 |         add_block = []
 32 |         add_block += [nn.Linear(input_dim, num_bottleneck)] 
 33 |         add_block += [nn.BatchNorm1d(num_bottleneck)]
 34 |         if relu:
 35 |             add_block += [nn.LeakyReLU(0.1)]
 36 |         if dropout:
 37 |             add_block += [nn.Dropout(p=0.5)]
 38 |         add_block = nn.Sequential(*add_block)
 39 |         add_block.apply(weights_init_kaiming)
 40 | 
 41 |         classifier = []
 42 |         classifier += [nn.Linear(num_bottleneck, class_num)]
 43 |         classifier = nn.Sequential(*classifier)
 44 |         classifier.apply(weights_init_classifier)
 45 | 
 46 |         self.add_block = add_block
 47 |         self.classifier = classifier
 48 |     def forward(self, x):
 49 |         x = self.add_block(x)
 50 |         x = self.classifier(x)
 51 |         return x
 52 | 
 53 | # Define the ResNet50-based Model
 54 | class ft_net(nn.Module):
 55 | 
 56 |     def __init__(self, class_num ):
 57 |         super(ft_net, self).__init__()
 58 |         model_ft = models.resnet50(pretrained=True)
 59 |         # avg pooling to global pooling
 60 |         model_ft.avgpool = nn.AdaptiveAvgPool2d((1,1))
 61 |         self.model = model_ft
 62 |         self.classifier = ClassBlock(2048, class_num)
 63 | 
 64 |     # def forward(self, x):
 65 |     #     x = self.model.conv1(x)
 66 |     #     x = self.model.bn1(x)
 67 |     #     x = self.model.relu(x)
 68 |     #     x = self.model.maxpool(x)
 69 |     #     x = self.model.layer1(x)
 70 |     #     x = self.model.layer2(x)
 71 |     #     x = self.model.layer3(x)
 72 |     #     x = self.model.layer4(x)
 73 |     #     x = self.model.avgpool(x)
 74 |     #     x = torch.squeeze(x)
 75 |     #     x = self.classifier(x)
 76 |     #     return x
 77 | 
 78 |     def l2norm(self,x):
 79 |         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()
 80 |         out = torch.div(x,norm)
 81 |         return out
 82 | 
 83 |     def forward(self, x):
 84 |         x = self.model.conv1(x)
 85 |         x = self.model.bn1(x)
 86 |         x = self.model.relu(x)
 87 |         x = self.model.maxpool(x)
 88 |         x = self.model.layer1(x)
 89 |         x = self.model.layer2(x)
 90 |         x = self.model.layer3(x)
 91 |         x = self.model.layer4(x)
 92 |         x = self.model.avgpool(x)
 93 |         y = torch.squeeze(x)
 94 |         x = self.classifier(y)
 95 |         if self.training:
 96 |             return x
 97 |         else:
 98 |             return x, self.l2norm(y)
 99 | 
100 | # Define the DenseNet121-based Model
101 | class ft_net_dense(nn.Module):
102 | 
103 |     def __init__(self, class_num ):
104 |         super().__init__()
105 |         model_ft = models.densenet121(pretrained=True)
106 |         model_ft.features.avgpool = nn.AdaptiveAvgPool2d((1,1))
107 |         model_ft.fc = nn.Sequential()
108 |         self.model = model_ft
109 |         # For DenseNet, the feature dim is 1024 
110 |         self.classifier = ClassBlock(1024, class_num)
111 | 
112 |     def forward(self, x):
113 |         x = self.model.features(x)
114 |         x = torch.squeeze(x)
115 |         x = self.classifier(x)
116 |         return x
117 |     
118 | # Define the ResNet50-based Model (Middle-Concat)
119 | # In the spirit of "The Devil is in the Middle: Exploiting Mid-level Representations for Cross-Domain Instance Matching." Yu, Qian, et al. arXiv:1711.08106 (2017).
120 | class ft_net_middle(nn.Module):
121 | 
122 |     def __init__(self, class_num ):
123 |         super(ft_net_middle, self).__init__()
124 |         model_ft = models.resnet50(pretrained=True)
125 |         # avg pooling to global pooling
126 |         model_ft.avgpool = nn.AdaptiveAvgPool2d((1,1))
127 |         self.model = model_ft
128 |         self.classifier = ClassBlock(2048+1024, class_num)
129 | 
130 |     def forward(self, x):
131 |         x = self.model.conv1(x)
132 |         x = self.model.bn1(x)
133 |         x = self.model.relu(x)
134 |         x = self.model.maxpool(x)
135 |         x = self.model.layer1(x)
136 |         x = self.model.layer2(x)
137 |         x = self.model.layer3(x)
138 |         # x0  n*1024*1*1
139 |         x0 = self.model.avgpool(x)
140 |         x = self.model.layer4(x)
141 |         # x1  n*2048*1*1
142 |         x1 = self.model.avgpool(x)
143 |         x = torch.cat((x0,x1),1)
144 |         x = torch.squeeze(x)
145 |         x = self.classifier(x)
146 |         return x
147 | 
148 | # Part Model proposed in Yifan Sun etal. (2018)
149 | class PCB(nn.Module):
150 |     def __init__(self, class_num ):
151 |         super(PCB, self).__init__()
152 | 
153 |         self.part = 6 # We cut the pool5 to 6 parts
154 |         model_ft = models.resnet50(pretrained=True)
155 |         self.model = model_ft
156 |         self.avgpool = nn.AdaptiveAvgPool2d((self.part,1))
157 |         self.dropout = nn.Dropout(p=0.5)
158 |         # remove the final downsample
159 |         self.model.layer4[0].downsample[0].stride = (1,1)
160 |         self.model.layer4[0].conv2.stride = (1,1)
161 |         # define 6 classifiers
162 |         for i in range(self.part):
163 |             name = 'classifier'+str(i)
164 |             setattr(self, name, ClassBlock(2048, class_num, True, False, 256))
165 | 
166 |     def forward(self, x):
167 |         x = self.model.conv1(x)
168 |         x = self.model.bn1(x)
169 |         x = self.model.relu(x)
170 |         x = self.model.maxpool(x)
171 |         
172 |         x = self.model.layer1(x)
173 |         x = self.model.layer2(x)
174 |         x = self.model.layer3(x)
175 |         x = self.model.layer4(x)
176 |         x = self.avgpool(x)
177 |         x = self.dropout(x)
178 |         part = {}
179 |         predict = {}
180 |         # get six part feature batchsize*2048*6
181 |         for i in range(self.part):
182 |             part[i] = torch.squeeze(x[:,:,i])
183 |             name = 'classifier'+str(i)
184 |             c = getattr(self,name)
185 |             predict[i] = c(part[i])
186 | 
187 |         # sum prediction
188 |         #y = predict[0]
189 |         #for i in range(self.part-1):
190 |         #    y += predict[i+1]
191 |         y = []
192 |         for i in range(self.part):
193 |             y.append(predict[i])
194 |         return y
195 | 
196 | class PCB_test(nn.Module):
197 |     def __init__(self,model):
198 |         super(PCB_test,self).__init__()
199 |         self.part = 6
200 |         self.model = model.model
201 |         self.avgpool = nn.AdaptiveAvgPool2d((self.part,1))
202 |         # remove the final downsample
203 |         self.model.layer4[0].downsample[0].stride = (1,1)
204 |         self.model.layer4[0].conv2.stride = (1,1)
205 | 
206 |     def forward(self, x):
207 |         x = self.model.conv1(x)
208 |         x = self.model.bn1(x)
209 |         x = self.model.relu(x)
210 |         x = self.model.maxpool(x)
211 | 
212 |         x = self.model.layer1(x)
213 |         x = self.model.layer2(x)
214 |         x = self.model.layer3(x)
215 |         x = self.model.layer4(x)
216 |         x = self.avgpool(x)
217 |         y = x.view(x.size(0),x.size(1),x.size(2))
218 |         return y
219 | 
220 | # # debug model structure
221 | # #net = ft_net(751)
222 | # net = ft_net_dense(751)
223 | # #print(net)
224 | # input = Variable(torch.FloatTensor(8, 3, 224, 224))
225 | # output = net(input)
226 | # print('net output size:')
227 | # print(output.shape)
228 | 


--------------------------------------------------------------------------------
/train_core.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: UTF-8 -*-
  2 | from __future__ import print_function, division
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.optim as optim
  8 | from torch.optim import lr_scheduler
  9 | from torch.autograd import Variable
 10 | from torchvision import datasets, models, transforms
 11 | 
 12 | from tensorboardX import SummaryWriter
 13 | 
 14 | import sys
 15 | import json
 16 | import scipy
 17 | import os, time
 18 | import argparse
 19 | import numpy as np
 20 | import torchvision
 21 | import matplotlib
 22 | 
 23 | matplotlib.use('agg')
 24 | import matplotlib.pyplot as plt
 25 | from PIL import Image
 26 | from shutil import copyfile
 27 | from model import ft_net
 28 | from eval_utils import get_test_acc, extract_train_second_label
 29 | from utils import *
 30 | import loader, loss
 31 | 
 32 | version = torch.__version__
 33 | # #####################################################################
 34 | # argsions
 35 | # --------
 36 | parser = argparse.ArgumentParser(description='Training')
 37 | parser.add_argument('--gpu', default='0', type=str, help='gpu ids: e.g. 0  0,1,2  0,2')
 38 | parser.add_argument('--seed', default=1, type=int, help='rng seed')
 39 | parser.add_argument('--model_dir', default='.checkpoint/', type=str, help='output model name')
 40 | parser.add_argument('--data_dir', default='/home/comp/mangye/dataset/', type=str, help='data dir')
 41 | parser.add_argument('--dataset', default='market', type=str, help='training data:Market1501, DukeMTMCreID, CUHK03')
 42 | parser.add_argument('--resume', default='', type=str, help='path of pretrained "model:./model/baseline/net_8.pth"')
 43 | parser.add_argument('--batchsize', default=32, type=int, help='batchsize')
 44 | parser.add_argument('--noise_ratio', default=0.2, type=float, help='percentage of noise data in the training')
 45 | parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate')
 46 | parser.add_argument('--stage2', action='store_true', help='either use stage2')
 47 | parser.add_argument('--pattern', action='store_true', help='either use patterned noise')
 48 | 
 49 | args = parser.parse_args()
 50 | 
 51 | torch.manual_seed(args.seed)
 52 | 
 53 | # initialization
 54 | start_epoch = 0
 55 | weight_r = [0, 0]
 56 | best_acc = 0
 57 | test_epoch = 2
 58 | data_dir = args.data_dir + args.dataset
 59 | 
 60 | # suffix
 61 | if not args.pattern:
 62 |     suffix = args.dataset + '_core2_noise_{}_lr_{:1.1e}'.format(args.noise_ratio, args.lr)
 63 | else:
 64 |     suffix = args.dataset + '_core_Pnoise_{}_lr_{:1.1e}'.format(args.noise_ratio, args.lr)
 65 | 
 66 | print('model: ' + suffix)
 67 | 
 68 | # define the log path       
 69 | log_dir = './res/' + args.dataset + '_log/'
 70 | checkpoint_path = './res/checkpoint_' + args.dataset + '/'
 71 | vis_log_dir = log_dir + suffix + '/'
 72 | if not os.path.isdir(log_dir):
 73 |     os.makedirs(log_dir)
 74 | if not os.path.isdir(vis_log_dir):
 75 |     os.makedirs(vis_log_dir)
 76 | if not os.path.isdir(checkpoint_path):
 77 |     os.makedirs(checkpoint_path)
 78 | writer = SummaryWriter(vis_log_dir)
 79 | sys.stdout = Logger(log_dir + suffix + '_os.txt')
 80 | 
 81 | # define the gpu id
 82 | str_ids = args.gpu.split(',')
 83 | gpu_ids = []
 84 | for str_id in str_ids:
 85 |     gid = int(str_id)
 86 |     if gid >= 0:
 87 |         gpu_ids.append(gid)
 88 | 
 89 | # set gpu ids
 90 | if len(gpu_ids) > 0:
 91 |     torch.cuda.set_device(gpu_ids[0])
 92 | print('using gpu: {}'.format(gpu_ids))
 93 | 
 94 | # #####################################################################
 95 | # Load Data
 96 | train_transform = transforms.Compose([
 97 |     transforms.Resize((288, 144), interpolation=3),
 98 |     transforms.RandomCrop((256, 128)),
 99 |     transforms.RandomHorizontalFlip(),
100 |     transforms.ToTensor(),
101 |     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
102 | ])
103 | test_transform = transforms.Compose([
104 |     transforms.Resize((256, 128), interpolation=3),
105 |     transforms.ToTensor(),
106 |     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
107 | ])
108 | 
109 | # load training dataDatasetFolder
110 | print('Starting loading training data: ', args.dataset)
111 | train_dataset = loader.DatasetFolder(os.path.join(data_dir, 'train'), transform=train_transform)
112 | class_names = train_dataset.classes
113 | dataset_sizes_train = len(train_dataset)
114 | 
115 | use_gpu = torch.cuda.is_available()
116 | 
117 | # Define a model
118 | model1 = ft_net(len(class_names))
119 | model2 = ft_net(len(class_names))
120 | if args.pattern:
121 |     model_tmp = ft_net(len(class_names))
122 | if use_gpu:
123 |     model1 = model1.cuda()
124 |     model2 = model2.cuda()
125 |     if args.pattern:
126 |         model_tmp = model_tmp.cuda()
127 | 
128 | # Load a pretrainied model
129 | if args.resume:
130 |     # two pretrained SELF models (ignore when you start)
131 |     model_name1 = 'market_noise_0.2_batch_32_refine_lr_1.0e-02_model1.t'
132 |     print('Initilizaing weights with ', args.resume)
133 |     model_path1 = checkpoint_path + model_name1
134 |     model1.load_state_dict(torch.load(model_path1))
135 | 
136 |     model_name2 = 'market_noise_0.2_batch_32_refine_lr_1.0e-02_model2.t'
137 |     model_path2 = checkpoint_path + model_name2
138 |     model2.load_state_dict(torch.load(model_path2))
139 | else:
140 |     print('Initilizaing weights with ImageNet')
141 | 
142 | # generate noisy label
143 | if args.noise_ratio >= 0:
144 |     if not args.pattern:
145 |         print('adding random noisy label')
146 |         trainLabels = torch.LongTensor([y for (p, y, w) in train_dataset.imgs])
147 |         trainLabels_nsy, if_truelbl = gen_nosiy_lbl(trainLabels, args.noise_ratio, len(class_names))
148 |     else:
149 |         print('adding patterned noisy label')
150 |         trainLabels = torch.LongTensor([y for (p, y, w) in train_dataset.imgs])
151 | 
152 |         model_path = './res/checkpoint/' + args.dataset + '_base_noise_0.0_lr_1.0e-02_epoch_60.t'
153 |         model_tmp.load_state_dict(torch.load(model_path))
154 | 
155 |         tansform_bak = train_transform
156 |         train_dataset.transform = test_transform
157 |         temploader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batchsize, shuffle=False, num_workers=8)
158 | 
159 |         model_tmp.eval()  # Set model to evaluate mode
160 |         print('extract second label...')
161 |         _, second_label = extract_train_second_label(model_tmp, temploader)
162 |         del model_tmp
163 |         trainLabels_nsy, if_truelbl = gen_pattern_nosiy_lbl(trainLabels, args.noise_ratio, len(class_names), second_label)
164 | 
165 | 
166 | # generate instance weight    
167 | if args.stage2:
168 |     print('Generating sef-generated weights......')
169 |     # TO DO
170 | else:
171 |     print('Setting same weights for all the instances...')
172 |     for i in range(len(trainLabels_nsy)):
173 |         train_dataset.imgs[i] = (train_dataset.imgs[i][0], trainLabels_nsy[i], 1)
174 | 
175 |     # load training dataDatasetFolder
176 | dataloaders_train = torch.utils.data.DataLoader(train_dataset, batch_size=args.batchsize, shuffle=True,
177 |                                                 num_workers=8)  # 8 workers may work faster
178 | 
179 | # load testing dataDatasetFolder
180 | test_dataset = {x: datasets.ImageFolder(os.path.join(data_dir, x), test_transform) for x in ['gallery', 'query']}
181 | dataloaders_test = {
182 |     x: torch.utils.data.DataLoader(test_dataset[x], batch_size=args.batchsize, shuffle=False, num_workers=8) for x in
183 |     ['gallery', 'query']}
184 | 
185 | # self label refining loss
186 | criterion1 = loss.LabelRefineLoss()
187 | # label co-refining loss
188 | criterion2 = loss.CoRefineLoss()
189 | 
190 | # optimizers for two networks
191 | ignored_params1 = list(map(id, model1.model.fc.parameters())) + list(map(id, model1.classifier.parameters()))
192 | base_params1 = filter(lambda p: id(p) not in ignored_params1, model1.parameters())
193 | optimizer_1 = optim.SGD([
194 |     {'params': base_params1, 'lr': args.lr},
195 |     {'params': model1.model.fc.parameters(), 'lr': args.lr * 10},
196 |     {'params': model1.classifier.parameters(), 'lr': args.lr * 10}
197 | ], weight_decay=5e-4, momentum=0.9, nesterov=True)
198 | ignored_params2 = list(map(id, model2.model.fc.parameters())) + list(map(id, model2.classifier.parameters()))
199 | base_params2 = filter(lambda p: id(p) not in ignored_params2, model2.parameters())
200 | optimizer_2 = optim.SGD([
201 |     {'params': base_params2, 'lr': args.lr},
202 |     {'params': model2.model.fc.parameters(), 'lr': args.lr * 10},
203 |     {'params': model2.classifier.parameters(), 'lr': args.lr * 10}
204 | ], weight_decay=5e-4, momentum=0.9, nesterov=True)
205 | 
206 | 
207 | # Decay LR by a factor of 0.1 every 20 epochs
208 | def adjust_learning_rate(optimizer_1, optimizer_2, epoch):
209 |     """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
210 |     if epoch < 20:
211 |         lr = args.lr
212 |     elif 20 <= epoch < 50:
213 |         lr = args.lr * 0.1
214 |     elif epoch >= 50:
215 |         lr = args.lr * 0.01
216 | 
217 |     optimizer_1.param_groups[0]['lr'] = lr
218 |     for i in range(len(optimizer_1.param_groups) - 1):
219 |         optimizer_1.param_groups[i + 1]['lr'] = lr * 10
220 | 
221 |     optimizer_2.param_groups[0]['lr'] = lr
222 |     for i in range(len(optimizer_2.param_groups) - 1):
223 |         optimizer_2.param_groups[i + 1]['lr'] = lr * 10
224 | 
225 | 
226 | def save_network(network1, network2, epoch_label, is_best=False):
227 |     if is_best:
228 |         save_path1 = checkpoint_path + suffix + 'model1_epoch_best.t'
229 |         save_path2 = checkpoint_path + suffix + 'model2_epoch_best.t'
230 |     else:
231 |         save_path1 = checkpoint_path + suffix + 'model1_epoch_{}.t'.format(epoch_label)
232 |         save_path2 = checkpoint_path + suffix + 'model2_epoch_{}.t'.format(epoch_label)
233 | 
234 |     torch.save(network1.state_dict(), save_path1)
235 |     torch.save(network2.state_dict(), save_path2)
236 | 
237 | 
238 | def train_model(model1, model2, criterion1, criterion2, optimizer_1, optimizer_2, epoch, weight_r):
239 |     adjust_learning_rate(optimizer_1, optimizer_2, epoch)
240 |     train_loss1 = AverageMeter()
241 |     train_loss2 = AverageMeter()
242 |     data_time = AverageMeter()
243 |     batch_time = AverageMeter()
244 | 
245 |     model1.train()
246 |     model2.train()
247 | 
248 |     correct1 = 0
249 |     correct2 = 0
250 |     total = 0
251 |     end = time.time()
252 |     for batch_idx, (inputs, targets, weights) in enumerate(dataloaders_train):
253 |         if use_gpu:
254 |             inputs = Variable(inputs.cuda())
255 |             targets = Variable(targets.cuda())
256 |             weights = Variable(weights.cuda())
257 |         data_time.update(time.time() - end)
258 | 
259 |         # model inputs
260 |         outputs1 = model1(inputs)
261 |         outputs2 = model2(inputs)
262 | 
263 |         # optimization
264 |         if epoch <= 20:
265 |             # self refining in first stage for the first network
266 |             loss1 = criterion1(outputs1, targets, weight_r[0])
267 |             optimizer_1.zero_grad()
268 |             loss1.backward()
269 |             optimizer_1.step()
270 | 
271 |             # self refining in first stage for the second network
272 |             loss2 = criterion1(outputs2, targets, weight_r[1])
273 |             optimizer_2.zero_grad()
274 |             loss2.backward()
275 |             optimizer_2.step()
276 |         else:
277 |             # co-refining in second stage for the first network
278 |             loss1 = criterion2(outputs1, outputs2.detach(), targets, 1)
279 |             optimizer_1.zero_grad()
280 |             loss1.backward()
281 |             optimizer_1.step()
282 | 
283 |             # co-refining in second stage for the second network
284 |             loss2 = criterion2(outputs2, outputs1.detach(), targets, 1)
285 |             optimizer_2.zero_grad()
286 |             loss2.backward()
287 |             optimizer_2.step()
288 | 
289 |         # log loss
290 |         train_loss1.update(loss1.item(), inputs.size(0))
291 |         train_loss2.update(loss2.item(), inputs.size(0))
292 | 
293 |         # measure elapsed time
294 |         batch_time.update(time.time() - end)
295 |         end = time.time()
296 | 
297 |         # measure accuracy
298 |         _, predicted1 = outputs1.max(1)
299 |         correct1 += predicted1.eq(targets).sum().item()
300 | 
301 |         _, predicted2 = outputs2.max(1)
302 |         correct2 += predicted2.eq(targets).sum().item()
303 | 
304 |         total += inputs.size(0)
305 | 
306 |         if batch_idx % 10 == 0:
307 |             print('Epoch:[{}][{}/{}] '
308 |                   'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
309 |                   'Loss1: {train_loss1.val:.4f} ({train_loss1.avg:.4f}) '
310 |                   'Loss2: {train_loss2.val:.4f} ({train_loss2.avg:.4f}) '
311 |                   'Acc1: {:.2f}  Acc2: {:.2f}'.format(
312 |                    epoch, batch_idx, len(dataloaders_train), 100. * correct1 / total, 100. * correct2 / total,
313 |                    batch_time=batch_time, train_loss1=train_loss1, train_loss2=train_loss2))
314 | 
315 |     writer.add_scalar('trainAcc', 100. * correct1 / total, epoch)
316 |     writer.add_scalar('trainAcc2', 100. * correct2 / total, epoch)
317 |     writer.add_scalar('loss1', train_loss1.avg, epoch)
318 |     writer.add_scalar('loss2', train_loss2.avg, epoch)
319 | 
320 |     weight_r = [1. / (1. + train_loss1.avg), 1. / (1. + train_loss2.avg)]
321 |     return weight_r
322 | 
323 | 
324 | for epoch in range(start_epoch, start_epoch + 61):
325 | 
326 |     # training
327 |     print('Start Training..........')
328 |     weight_r = train_model(model1, model2, criterion1, criterion2, optimizer_1, optimizer_2, epoch, weight_r)
329 | 
330 |     if epoch % test_epoch == 0:
331 |         model1.eval()  # Set model to evaluate mode
332 |         start = time.time()
333 |         cmc, mAP = get_test_acc(model1, test_dataset, dataloaders_test, use_gpu, max_rank=10)
334 |         writer.add_scalar('rank1', cmc[0], epoch)
335 |         writer.add_scalar('mAP', mAP, epoch)
336 |         if cmc[0] > best_acc:
337 |             best_epoch = epoch
338 |             best_acc = cmc[0]
339 |             save_network(model1, model2, epoch, is_best=True)
340 |         print('Epoch {}: R1:{:.4%}   R5:{:.4%}  R10:{:.4%}  mAP:{:.4%} (Best Epoch[{}])'.format(
341 |             epoch, cmc[0], cmc[4], cmc[9], mAP, best_epoch))
342 |         print('Evaluation time: {}'.format(time.time() - start))
343 | 
344 |     # evaluation
345 |     if epoch > 20 and epoch % 20 == 0:
346 |         save_network(model1, model2, epoch, is_best=False)
347 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.init as init
  4 | 
  5 | import os
  6 | import os.path as osp
  7 | import sys
  8 | import time
  9 | import math
 10 | import random
 11 | import numpy as np
 12 | 
 13 | from scipy import stats
 14 | from scipy.spatial import distance
 15 | import pdb
 16 | 
 17 | def gen_nosiy_lbl(orig_lable, noise_ratio, class_number):
 18 |     random.seed(1)
 19 |     label = orig_lable.numpy()
 20 |     label_noisy = label.copy()
 21 |     for idx in range(class_number):
 22 |         idx_sub = np.argwhere(label==idx)[:,0]
 23 |         idx_nsy = np.array(random.sample(idx_sub.tolist(), int(np.round(len(idx_sub)*noise_ratio))))
 24 |         for i in idx_nsy:
 25 |             tar_lbl = random.sample((list(range(idx))+list(range(idx+1,class_number))),1)
 26 |             label_noisy[i] = tar_lbl[0]
 27 |     if_true = np.array((label==label_noisy)*1)
 28 |     return label_noisy, if_true
 29 | 
 30 | def gen_pattern_nosiy_lbl(orig_lable, noise_ratio, class_number, second_label):
 31 |     random.seed(1)
 32 |     label = orig_lable.numpy()
 33 |     label_noisy = label.copy()
 34 |     for idx in range(class_number):
 35 |         idx_sub = np.argwhere(label==idx)[:,0]
 36 |         idx_nsy = np.array(random.sample(idx_sub.tolist(), int(np.round(len(idx_sub)*noise_ratio))))
 37 |         for i in idx_nsy:
 38 |             label_noisy[i] = int(second_label[i])
 39 |     if_true = np.array((label==label_noisy)*1)
 40 |     return label_noisy, if_true
 41 | 
 42 | 
 43 | 
 44 | def mixup_data(x, y, alpha=1.0, use_cuda=True):
 45 |     '''Compute the mixup data. Return mixed inputs, pairs of targets, and lambda'''
 46 |     if alpha > 0.:
 47 |         lam = np.random.beta(alpha, alpha)
 48 |     else:
 49 |         lam = 1.
 50 |     batch_size = x.size()[0]
 51 |     if use_cuda:
 52 |         index = torch.randperm(batch_size).cuda()
 53 |     else:
 54 |         index = torch.randperm(batch_size)
 55 |     mixed_x = lam * x + (1 - lam) * x[index,:]
 56 |     y_a, y_b = y, y[index]
 57 |     return mixed_x, y_a, y_b, lam
 58 | 
 59 | def mixup_criterion(y_a, y_b, weights, lam):
 60 |     return lambda criterion, pred: lam * criterion(pred, y_a,weights) + (1 - lam) * criterion(pred, y_b,weights)
 61 | 
 62 | def get_mean_and_std(dataset):
 63 |     '''Compute the mean and std value of dataset.'''
 64 |     dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
 65 |     mean = torch.zeros(3)
 66 |     std = torch.zeros(3)
 67 |     print('==> Computing mean and std..')
 68 |     for inputs, targets in dataloader:
 69 |         for i in range(3):
 70 |             mean[i] += inputs[:,i,:,:].mean()
 71 |             std[i] += inputs[:,i,:,:].std()
 72 |     mean.div_(len(dataset))
 73 |     std.div_(len(dataset))
 74 |     return mean, std
 75 |     
 76 | def get_mfea_thres(features, alpha, beta):
 77 |     l2dsts = []
 78 |     mean_fea = np.mean(features, 0)
 79 |     for i in range(features.shape[0]):
 80 |         l2dst = distance.euclidean(features[i,:], mean_fea)
 81 |         l2dsts.append(l2dst)
 82 |     l2dsts = np.array(l2dsts)
 83 |     l2dst_max = np.max(l2dsts)
 84 |     l2dst_min = np.min(l2dsts)
 85 |     if l2dst_max-l2dst_min == 0:
 86 |         l2dsts_norm = (l2dsts-l2dst_min)
 87 |     else:
 88 |         l2dsts_norm = (l2dsts-l2dst_min)/(l2dst_max-l2dst_min)
 89 |     # l2dsts_norm_sort = np.sort(l2dsts_norm)
 90 |     # print(l2dsts_norm_sort)
 91 |     weights = stats.beta.pdf(l2dsts_norm, alpha, beta)
 92 |     return weights
 93 | 
 94 | def gen_weights_dist(features, trainLabels_nsy, class_names, alpha, beta):
 95 |     features = features.data.numpy()
 96 |     all_weights = np.array([])
 97 |     indexs = np.array([])
 98 |     for i in range(len(class_names)):
 99 |         f_idxs = np.where(trainLabels_nsy == i)[0]
100 |         sele_feas = features[f_idxs, :]
101 |         weights = get_mfea_thres(sele_feas, alpha, beta)
102 |         all_weights = np.concatenate([all_weights, weights])
103 |         indexs = np.concatenate([indexs, f_idxs])
104 |     return indexs, all_weights   
105 |     
106 | class Logger(object):
107 |     """
108 |     Write console output to external text file.
109 |     Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/logging.py.
110 |     """  
111 |     def __init__(self, fpath=None):
112 |         self.console = sys.stdout
113 |         self.file = None
114 |         if fpath is not None:
115 |             mkdir_if_missing(osp.dirname(fpath))
116 |             self.file = open(fpath, 'w')
117 | 
118 |     def __del__(self):
119 |         self.close()
120 | 
121 |     def __enter__(self):
122 |         pass
123 | 
124 |     def __exit__(self, *args):
125 |         self.close()
126 | 
127 |     def write(self, msg):
128 |         self.console.write(msg)
129 |         if self.file is not None:
130 |             self.file.write(msg)
131 | 
132 |     def flush(self):
133 |         self.console.flush()
134 |         if self.file is not None:
135 |             self.file.flush()
136 |             os.fsync(self.file.fileno())
137 | 
138 |     def close(self):
139 |         self.console.close()
140 |         if self.file is not None:
141 |             self.file.close()   
142 | 
143 | def mkdir_if_missing(directory):
144 |     if not osp.exists(directory):
145 |         try:
146 |             os.makedirs(directory)
147 |         except OSError as e:
148 |             if e.errno != errno.EEXIST:
149 |                 raise               
150 | class AverageMeter(object):
151 |     """Computes and stores the average and current value""" 
152 |     def __init__(self):
153 |         self.reset()
154 |                    
155 |     def reset(self):
156 |         self.val = 0
157 |         self.avg = 0
158 |         self.sum = 0
159 |         self.count = 0 
160 | 
161 |     def update(self, val, n=1):
162 |         self.val = val
163 |         self.sum += val * n
164 |         self.count += n
165 |         self.avg = self.sum / self.count


--------------------------------------------------------------------------------