├── 0一些基本工具.py
├── 1数据集管理.py
├── 2从数据集中导入视频图片.py
├── 3随机采样,形成pk批次.py
├── 4随机图像裁剪.py
├── 5损失函数.py
├── 6评估模型的方法.py
├── README.md
├── info
    ├── query_IDX.mat
    ├── test_name.txt
    ├── tracks_test_info.mat
    ├── tracks_train_info.mat
    └── train_name.txt
├── main_video_person_reid.py
├── models
    ├── ResNet.py
    ├── __init__.py
    └── resnet3d.py
└── test.py


/0一些基本工具.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import os
  3 | import sys
  4 | import errno
  5 | import shutil
  6 | import json
  7 | import os.path as osp
  8 | 
  9 | import torch
 10 | 
 11 | 
 12 | def mkdir_if_missing(directory):  # 如果文件不存在,就新建一个文件
 13 |     if not osp.exists(directory):  # 如果不存在该文件
 14 |         try:
 15 |             os.makedirs(directory)  # 建立这个文件
 16 |         except OSError as e:
 17 |             if e.errno != errno.EEXIST:
 18 |                 raise
 19 | 
 20 | 
 21 | class AverageMeter(object):
 22 |     """Computes and stores the average and current value.     计算并存储平均值和当前值.
 23 | 
 24 |        Code imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
 25 |     """
 26 |     def __init__(self, val=0, avg=0, sum=0, count=0):
 27 |         self.val = val
 28 |         self.avg = avg
 29 |         self.sum = sum
 30 |         self.count = count
 31 |         # self.reset()
 32 | 
 33 |     def reset(self):  # 复位
 34 |         self.val = 0
 35 |         self.avg = 0
 36 |         self.sum = 0
 37 |         self.count = 0
 38 | 
 39 |     def update(self, val, n=1):  # 更新各个值的状态
 40 |         self.val = val
 41 |         self.sum += val * n
 42 |         self.count += n
 43 |         self.avg = self.sum / self.count
 44 | 
 45 | 
 46 | def save_checkpoint(state, is_best, fpath='checkpoint.pth.tar'):  # 保存checkpoint文件
 47 |     mkdir_if_missing(osp.dirname(fpath))  # 如果当前路径不存在该名字的文件,就新建它
 48 |     torch.save(state, fpath)  # 将模型的状态,保存在fpath文件中
 49 |     if is_best:  # 如果这个模型的结果是最好的.
 50 |         shutil.copy(fpath, osp.join(osp.dirname(fpath), 'best_model.pth.tar'))
 51 |         # shutil.copyfile(src, dst)：复制文件内容（不包含元数据）从src到dst。    dst必须是完整的目标文件名.
 52 | 
 53 | 
 54 | class Logger(object):
 55 |     """
 56 |     Write console output to external text file.     将控制台输出写入外部文本文件。日志输出
 57 |     Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/logging.py.
 58 |     """
 59 |     def __init__(self, fpath=None):
 60 |         self.console = sys.stdout  # 控制台输出
 61 |         self.file = None
 62 |         if fpath is not None:  # 如果fpath存在,就建立这个文件
 63 |             mkdir_if_missing(os.path.dirname(fpath))
 64 |             self.file = open(fpath, 'w')  # 打开这个文件,打开文件的模式是写入.
 65 | 
 66 |     def __del__(self):  # 关闭文件
 67 |         self.close()
 68 | 
 69 |     def __enter__(self):
 70 |         pass
 71 | 
 72 |     def __exit__(self, *args):  # 退出文件
 73 |         self.close()
 74 | 
 75 |     def write(self, msg):  # 将信息写入文件
 76 |         self.console.write(msg)  # = sys.stdout.write(msg)  打印信息
 77 |         if self.file is not None:
 78 |             self.file.write(msg)  # = open(fpath,'w').write(msg)  向checkpoint文件中写入信息.
 79 | 
 80 |     def flush(self):  # flush()函数:刷新stdout,每隔一秒输出,在屏幕上可以实时看到输出信息.
 81 |         self.console.flush()
 82 |         if self.file is not None:
 83 |             self.file.flush()  # 刷新缓冲区，即将缓冲区中的数据立刻写入文件，同时清空缓冲区，不需要是被动的等待输出缓冲区写入。
 84 |             os.fsync(self.file.fileno())  # fileno()返回一个整型的文件描述符(file descriptor FD 整型)，可用于底层操作系统的I/O 操作
 85 |             # fsync()强制将文件描述符为fd的文件写入硬盘
 86 | 
 87 |     def close(self):
 88 |         self.console.close()  # 关闭控制台输出
 89 |         if self.file is not None:
 90 |             self.file.close()  # 关闭文件
 91 | 
 92 | 
 93 | def read_json(fpath):
 94 |     with open(fpath, 'r') as f:  # 打开fpath文件,以只读模式,作为f
 95 |         obj = json.load(f)  # 读取json信息,取得指定文件中内容,参数为要读取的文件对象
 96 |     return obj
 97 | 
 98 | 
 99 | def write_json(obj, fpath):
100 |     mkdir_if_missing(osp.dirname(fpath))  # 先建立文件
101 |     with open(fpath, 'w') as f:  # 打开fpath文件,以写入模式,作为f
102 |         json.dump(obj, f, indent=4, separators=(',', ': '))  # 存入到指定文件,第一个参数为要存入的内容,第二个为文件的对象
103 |         # indent=4,换行且按照indent的数值显示前面的空白分行显示
104 |         # separators：分隔符，这表示dictionary内keys之间用“,”隔开，而KEY和value之间用“：”隔开。
105 | 


--------------------------------------------------------------------------------
/1数据集管理.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, absolute_import
  2 | import os
  3 | import glob
  4 | import re
  5 | import sys
  6 | import urllib
  7 | import tarfile
  8 | import zipfile
  9 | import os.path as osp
 10 | from scipy.io import loadmat
 11 | import numpy as np
 12 | 
 13 | from utils import mkdir_if_missing, write_json, read_json
 14 | 
 15 | """Dataset classes 数据集的种类"""
 16 | 
 17 | 
 18 | class Mars(object):
 19 |     """
 20 |     MARS
 21 | 
 22 |     Reference:
 23 |     Zheng et al. MARS: A Video Benchmark for Large-Scale Person Re-identification. ECCV 2016.
 24 |     
 25 |     Dataset statistics:
 26 |     # identities: 1261
 27 |     # tracklets: 8298 (train) + 1980 (query) + 9330 (gallery)
 28 |     # cameras: 6
 29 | 
 30 |     Args:
 31 |         min_seq_len (int): tracklet with length shorter than this value will be discarded (default: 0).
 32 | 长度小于此值的tracklet将被丢弃
 33 |     """
 34 |     root = '/media/ying/0BDD17830BDD1783/ReIdDataset/Mars'  # 数据集的相对路径,下面是数据集的拆分信息
 35 |     train_name_path = osp.join(root, 'info/train_name.txt')  # 包含训练集图片名称的路径
 36 |     test_name_path = osp.join(root, 'info/test_name.txt')  # 包含测试集图片名称的路径
 37 |     track_train_info_path = osp.join(root, 'info/tracks_train_info.mat')  # .mat是matlab格式的数据文件
 38 |     track_test_info_path = osp.join(root, 'info/tracks_test_info.mat')  # 跟踪测试信息的路径
 39 |     query_IDX_path = osp.join(root, 'info/query_IDX.mat')  # 查询身份的路径
 40 | 
 41 |     def __init__(self, min_seq_len=0):
 42 |         self._check_before_run()  # 在运行前检查
 43 | 
 44 |         # prepare meta data  准备元数据
 45 |         train_names = self._get_names(self.train_name_path)  # 获得训练集图片的所有名称
 46 |         test_names = self._get_names(self.test_name_path)  # 获得测试集所有图片的名称
 47 |         track_train = loadmat(self.track_train_info_path)  # 从scipy.io.loadmat读取.mat文件.
 48 |         track_train = track_train['track_train_info']  # 所有的元素都封装在ndarray 中。
 49 |         # 数据结构为 numpy.ndarray (8298, 4)
 50 |         track_test = loadmat(self.track_test_info_path)['track_test_info']
 51 |         # numpy.ndarray (12180, 4)
 52 |         query_IDX = loadmat(self.query_IDX_path)['query_IDX'].squeeze()  # squeeze()去除size为1的维度
 53 |         # numpy.ndarray (1980,)
 54 |         query_IDX -= 1
 55 |         # index from 0
 56 |         track_query = track_test[query_IDX, :]  # 测试的视频段,从0开始
 57 |         gallery_IDX = [i for i in range(track_test.shape[0]) if i not in query_IDX]  # 如果测试的索引不在查询中,那么它在图库中
 58 |         track_gallery = track_test[gallery_IDX, :]  # 图库的视频段
 59 | 
 60 |         train, num_train_tracklets, num_train_pids, num_train_imgs = \
 61 |             self._process_data(train_names, track_train, home_dir='bbox_train', relabel=True, min_seq_len=min_seq_len)
 62 | 
 63 |         query, num_query_tracklets, num_query_pids, num_query_imgs = \
 64 |             self._process_data(test_names, track_query, home_dir='bbox_test', relabel=False, min_seq_len=min_seq_len)
 65 | 
 66 |         gallery, num_gallery_tracklets, num_gallery_pids, num_gallery_imgs = \
 67 |             self._process_data(test_names, track_gallery, home_dir='bbox_test', relabel=False, min_seq_len=min_seq_len)
 68 | 
 69 |         num_imgs_per_tracklet = num_train_imgs + num_query_imgs + num_gallery_imgs  # 每个视频段中包含的图片数
 70 |         min_num = np.min(num_imgs_per_tracklet)  # 最小图片数
 71 |         max_num = np.max(num_imgs_per_tracklet)  # 最大图片数
 72 |         avg_num = np.mean(num_imgs_per_tracklet)  # 平均图片数
 73 | 
 74 |         num_total_pids = num_train_pids + num_query_pids  # 行人身份的总数=训练+测试
 75 |         num_total_tracklets = num_train_tracklets + num_query_tracklets + num_gallery_tracklets  # 视频段的总数
 76 | 
 77 |         print("=> MARS loaded")
 78 |         print("Dataset statistics:")
 79 |         print("  ------------------------------")
 80 |         print("  subset   | # ids | # tracklets")
 81 |         print("  ------------------------------")
 82 |         print("  train    | {:5d} | {:8d}".format(num_train_pids, num_train_tracklets))
 83 |         print("  query    | {:5d} | {:8d}".format(num_query_pids, num_query_tracklets))
 84 |         print("  gallery  | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_tracklets))
 85 |         print("  ------------------------------")
 86 |         print("  total    | {:5d} | {:8d}".format(num_total_pids, num_total_tracklets))
 87 |         print("  number of images per tracklet: {} ~ {}, average {:.1f}".format(min_num, max_num, avg_num))
 88 |         print("  ------------------------------")
 89 | 
 90 |         self.train = train
 91 |         self.query = query
 92 |         self.gallery = gallery
 93 | 
 94 |         self.num_train_pids = num_train_pids
 95 |         self.num_query_pids = num_query_pids
 96 |         self.num_gallery_pids = num_gallery_pids
 97 | 
 98 |     def _check_before_run(self):
 99 |         """Check if all files are available before going deeper检查所有的文件是否在文件夹(路径)中"""
100 |         if not osp.exists(self.root):
101 |             raise RuntimeError("'{}' is not available".format(self.root))
102 |         if not osp.exists(self.train_name_path):
103 |             raise RuntimeError("'{}' is not available".format(self.train_name_path))
104 |         if not osp.exists(self.test_name_path):
105 |             raise RuntimeError("'{}' is not available".format(self.test_name_path))
106 |         if not osp.exists(self.track_train_info_path):
107 |             raise RuntimeError("'{}' is not available".format(self.track_train_info_path))
108 |         if not osp.exists(self.track_test_info_path):
109 |             raise RuntimeError("'{}' is not available".format(self.track_test_info_path))
110 |         if not osp.exists(self.query_IDX_path):
111 |             raise RuntimeError("'{}' is not available".format(self.query_IDX_path))
112 | 
113 |     def _get_names(self, fpath):
114 |         names = []
115 |         with open(fpath, 'r') as f:  # 以只读的方式打开路径中的文件,作为f
116 |             for line in f:  # 遍历文件中的每一行
117 |                 new_line = line.rstrip()  # rstrip()删除 string 字符串末尾的指定字符（默认为空格）.
118 |                 names.append(new_line)  # 将字符串添加到names列表的末尾
119 |         return names
120 | 
121 |     def _process_data(self, names, meta_data, home_dir=None, relabel=False, min_seq_len=0):  # 处理数据
122 |         assert home_dir in ['bbox_train', 'bbox_test']  # home_dir为'bbox_train', 'bbox_test'两者之一
123 |         num_tracklets = meta_data.shape[0]  # 小视频段的数目 = 元数据的行数
124 |         pid_list = list(set(meta_data[:, 2].tolist()))  # meta_data[:, 2]读取第二列(列数从0开始)的数组元素
125 |         # tolist()将数组转换成列表    set() 函数创建一个无序不重复元素集
126 |         num_pids = len(pid_list)
127 | 
128 |         if relabel:  # 重新标记
129 |             pid2label = {pid: label for label, pid in enumerate(pid_list)}  # pid: label 在pid_list列表中
130 |         tracklets = []  # 小段视频
131 |         num_imgs_per_tracklet = []  # 每个小段视频里的图片数
132 | 
133 |         for tracklet_idx in range(num_tracklets):  # 遍历小段视频
134 |             data = meta_data[tracklet_idx, ...]  # 从元数据中获得数据
135 |             start_index, end_index, pid, camid = data  # 这些数据是图片起始索引,结束索引,身份,摄像头的id
136 |             if pid == -1:  # 如果不存在身份id
137 |                 continue  # junk images are just ignored  忽略这些无用图片
138 |             assert 1 <= camid <= 6  # 摄像头id   1~6之间
139 |             if relabel:  # 如果重新标记
140 |                 pid = pid2label[pid]  # 获得原来pid对应的relabel后的label
141 |             camid -= 1  # index starts from 0  摄像头的索引从0开始
142 |             img_names = names[start_index-1:end_index]   # 根据索引,获得对应图片的名字
143 | 
144 |             # make sure image names correspond to the same person  确保图片名字对应于同一个人
145 |             pnames = [img_name[:4] for img_name in img_names]
146 |             assert len(set(pnames)) == 1, "Error: a single tracklet contains different person images"
147 | 
148 |             # make sure all images are captured under the same camera  # 确保所有的图片由同一个摄像头捕获
149 |             camnames = [img_name[5] for img_name in img_names]
150 |             assert len(set(camnames)) == 1, "Error: images are captured under different cameras!"
151 | 
152 |             # append image names with directory information  附加包含目录信息的图像名称
153 |             img_paths = [osp.join(self.root, home_dir, img_name[:4], img_name) for img_name in img_names]
154 |             if len(img_paths) >= min_seq_len:
155 |                 img_paths = tuple(img_paths)  # 图像路径,以元组形成
156 |                 tracklets.append((img_paths, pid, camid))  # 将(图像路径,行人id,摄像头id)以列表形式,添加到tracklets列表后面
157 |                 num_imgs_per_tracklet.append(len(img_paths))  # 小段视频中包含的图像数
158 | 
159 |         num_tracklets = len(tracklets)
160 | 
161 |         return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet
162 |     # 返回小段视频,小段视频的数目,行人id的个数,每段视频中的图像数
163 | 
164 | 
165 | class iLIDSVID(object):
166 |     """
167 |     iLIDS-VID
168 | 
169 |     Reference:
170 |     Wang et al. Person Re-Identification by Video Ranking. ECCV 2014.
171 |     
172 |     Dataset statistics:
173 |     # identities: 300
174 |     # tracklets: 600
175 |     # cameras: 2
176 | 
177 |     Args:
178 |         split_id (int): indicates which split to use. There are totally 10 splits.
179 |     """
180 |     root = './data/ilids-vid'
181 |     dataset_url = 'http://www.eecs.qmul.ac.uk/~xiatian/iLIDS-VID/iLIDS-VID.tar'
182 |     data_dir = osp.join(root, 'i-LIDS-VID')
183 |     split_dir = osp.join(root, 'train-test people splits')
184 |     split_mat_path = osp.join(split_dir, 'train_test_splits_ilidsvid.mat')
185 |     split_path = osp.join(root, 'splits.json')
186 |     cam_1_path = osp.join(root, 'i-LIDS-VID/sequences/cam1')
187 |     cam_2_path = osp.join(root, 'i-LIDS-VID/sequences/cam2')
188 | 
189 |     def __init__(self, split_id=0):
190 |         self._download_data()
191 |         self._check_before_run()
192 | 
193 |         self._prepare_split()
194 |         splits = read_json(self.split_path)
195 |         if split_id >= len(splits):
196 |             raise ValueError("split_id exceeds range, received {}, but expected between 0 and {}".format(split_id,
197 |                                                                                                          len(splits)-1))
198 |         split = splits[split_id]
199 |         train_dirs, test_dirs = split['train'], split['test']
200 |         print("# train identites: {}, # test identites {}".format(len(train_dirs), len(test_dirs)))
201 | 
202 |         train, num_train_tracklets, num_train_pids, num_imgs_train = \
203 |             self._process_data(train_dirs, cam1=True, cam2=True)
204 |         query, num_query_tracklets, num_query_pids, num_imgs_query = \
205 |             self._process_data(test_dirs, cam1=True, cam2=False)
206 |         gallery, num_gallery_tracklets, num_gallery_pids, num_imgs_gallery = \
207 |             self._process_data(test_dirs, cam1=False, cam2=True)
208 | 
209 |         num_imgs_per_tracklet = num_imgs_train + num_imgs_query + num_imgs_gallery
210 |         min_num = np.min(num_imgs_per_tracklet)
211 |         max_num = np.max(num_imgs_per_tracklet)
212 |         avg_num = np.mean(num_imgs_per_tracklet)
213 | 
214 |         num_total_pids = num_train_pids + num_query_pids
215 |         num_total_tracklets = num_train_tracklets + num_query_tracklets + num_gallery_tracklets
216 | 
217 |         print("=> iLIDS-VID loaded")
218 |         print("Dataset statistics:")
219 |         print("  ------------------------------")
220 |         print("  subset   | # ids | # tracklets")
221 |         print("  ------------------------------")
222 |         print("  train    | {:5d} | {:8d}".format(num_train_pids, num_train_tracklets))
223 |         print("  query    | {:5d} | {:8d}".format(num_query_pids, num_query_tracklets))
224 |         print("  gallery  | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_tracklets))
225 |         print("  ------------------------------")
226 |         print("  total    | {:5d} | {:8d}".format(num_total_pids, num_total_tracklets))
227 |         print("  number of images per tracklet: {} ~ {}, average {:.1f}".format(min_num, max_num, avg_num))
228 |         print("  ------------------------------")
229 | 
230 |         self.train = train
231 |         self.query = query
232 |         self.gallery = gallery
233 | 
234 |         self.num_train_pids = num_train_pids
235 |         self.num_query_pids = num_query_pids
236 |         self.num_gallery_pids = num_gallery_pids
237 | 
238 |     def _download_data(self):
239 |         if osp.exists(self.root):
240 |             print("This dataset has been downloaded.")
241 |             return
242 | 
243 |         mkdir_if_missing(self.root)
244 |         fpath = osp.join(self.root, osp.basename(self.dataset_url))
245 | 
246 |         print("Downloading iLIDS-VID dataset")
247 |         url_opener = urllib.URLopener()
248 |         url_opener.retrieve(self.dataset_url, fpath)
249 | 
250 |         print("Extracting files")
251 |         tar = tarfile.open(fpath)
252 |         tar.extractall(path=self.root)
253 |         tar.close()
254 | 
255 |     def _check_before_run(self):
256 |         """Check if all files are available before going deeper"""
257 |         if not osp.exists(self.root):
258 |             raise RuntimeError("'{}' is not available".format(self.root))
259 |         if not osp.exists(self.data_dir):
260 |             raise RuntimeError("'{}' is not available".format(self.data_dir))
261 |         if not osp.exists(self.split_dir):
262 |             raise RuntimeError("'{}' is not available".format(self.split_dir))
263 | 
264 |     def _prepare_split(self):
265 |         if not osp.exists(self.split_path):
266 |             print("Creating splits")
267 |             mat_split_data = loadmat(self.split_mat_path)['ls_set']
268 |             
269 |             num_splits = mat_split_data.shape[0]
270 |             num_total_ids = mat_split_data.shape[1]
271 |             assert num_splits == 10
272 |             assert num_total_ids == 300
273 |             num_ids_each = num_total_ids/2
274 | 
275 |             # pids in mat_split_data are indices, so we need to transform them
276 |             # to real pids
277 |             person_cam1_dirs = os.listdir(self.cam_1_path)
278 |             person_cam2_dirs = os.listdir(self.cam_2_path)
279 | 
280 |             # make sure persons in one camera view can be found in the other camera view
281 |             assert set(person_cam1_dirs) == set(person_cam2_dirs)
282 | 
283 |             splits = []
284 |             for i_split in range(num_splits):
285 |                 # first 50% for testing and the remaining for training, following Wang et al. ECCV'14.
286 |                 train_idxs = sorted(list(mat_split_data[i_split, num_ids_each:]))
287 |                 test_idxs = sorted(list(mat_split_data[i_split, :num_ids_each]))
288 |                 
289 |                 train_idxs = [int(i)-1 for i in train_idxs]
290 |                 test_idxs = [int(i)-1 for i in test_idxs]
291 |                 
292 |                 # transform pids to person dir names
293 |                 train_dirs = [person_cam1_dirs[i] for i in train_idxs]
294 |                 test_dirs = [person_cam1_dirs[i] for i in test_idxs]
295 |                 
296 |                 split = {'train': train_dirs, 'test': test_dirs}
297 |                 splits.append(split)
298 | 
299 |             print("Totally {} splits are created, following Wang et al. ECCV'14".format(len(splits)))
300 |             print("Split file is saved to {}".format(self.split_path))
301 |             write_json(splits, self.split_path)
302 | 
303 |         print("Splits created")
304 | 
305 |     def _process_data(self, dirnames, cam1=True, cam2=True):
306 |         tracklets = []
307 |         num_imgs_per_tracklet = []
308 |         dirname2pid = {dirname: i for i, dirname in enumerate(dirnames)}
309 |         
310 |         for dirname in dirnames:
311 |             if cam1:
312 |                 person_dir = osp.join(self.cam_1_path, dirname)
313 |                 img_names = glob.glob(osp.join(person_dir, '*.png'))
314 |                 assert len(img_names) > 0
315 |                 img_names = tuple(img_names)
316 |                 pid = dirname2pid[dirname]
317 |                 tracklets.append((img_names, pid, 0))
318 |                 num_imgs_per_tracklet.append(len(img_names))
319 | 
320 |             if cam2:
321 |                 person_dir = osp.join(self.cam_2_path, dirname)
322 |                 img_names = glob.glob(osp.join(person_dir, '*.png'))
323 |                 assert len(img_names) > 0
324 |                 img_names = tuple(img_names)
325 |                 pid = dirname2pid[dirname]
326 |                 tracklets.append((img_names, pid, 1))
327 |                 num_imgs_per_tracklet.append(len(img_names))
328 | 
329 |         num_tracklets = len(tracklets)
330 |         num_pids = len(dirnames)
331 | 
332 |         return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet
333 | 
334 | 
335 | class PRID(object):
336 |     """
337 |     PRID
338 | 
339 |     Reference:
340 |     Hirzer et al. Person Re-Identification by Descriptive and Discriminative Classification. SCIA 2011.
341 |     
342 |     Dataset statistics:
343 |     # identities: 200
344 |     # tracklets: 400
345 |     # cameras: 2
346 | 
347 |     Args:
348 |         split_id (int): indicates which split to use. There are totally 10 splits.
349 |         min_seq_len (int): tracklet with length shorter than this value will be discarded (default: 0).
350 |     """
351 |     root = './data/prid2011'
352 |     dataset_url = 'https://files.icg.tugraz.at/f/6ab7e8ce8f/?raw=1'
353 |     split_path = osp.join(root, 'splits_prid2011.json')
354 |     cam_a_path = osp.join(root, 'prid_2011', 'multi_shot', 'cam_a')
355 |     cam_b_path = osp.join(root, 'prid_2011', 'multi_shot', 'cam_b')
356 | 
357 |     def __init__(self, split_id=0, min_seq_len=0):
358 |         self._check_before_run()
359 |         splits = read_json(self.split_path)
360 |         if split_id >= len(splits):
361 |             raise ValueError("split_id exceeds range, received {}, but expected between 0 and {}".format(split_id,
362 |                                                                                                          len(splits)-1))
363 |         split = splits[split_id]
364 |         train_dirs, test_dirs = split['train'], split['test']
365 |         print("# train identites: {}, # test identites {}".format(len(train_dirs), len(test_dirs)))
366 | 
367 |         train, num_train_tracklets, num_train_pids, num_imgs_train = \
368 |             self._process_data(train_dirs, cam1=True, cam2=True)
369 |         query, num_query_tracklets, num_query_pids, num_imgs_query = \
370 |             self._process_data(test_dirs, cam1=True, cam2=False)
371 |         gallery, num_gallery_tracklets, num_gallery_pids, num_imgs_gallery = \
372 |             self._process_data(test_dirs, cam1=False, cam2=True)
373 | 
374 |         num_imgs_per_tracklet = num_imgs_train + num_imgs_query + num_imgs_gallery
375 |         min_num = np.min(num_imgs_per_tracklet)
376 |         max_num = np.max(num_imgs_per_tracklet)
377 |         avg_num = np.mean(num_imgs_per_tracklet)
378 | 
379 |         num_total_pids = num_train_pids + num_query_pids
380 |         num_total_tracklets = num_train_tracklets + num_query_tracklets + num_gallery_tracklets
381 | 
382 |         print("=> PRID-2011 loaded")
383 |         print("Dataset statistics:")
384 |         print("  ------------------------------")
385 |         print("  subset   | # ids | # tracklets")
386 |         print("  ------------------------------")
387 |         print("  train    | {:5d} | {:8d}".format(num_train_pids, num_train_tracklets))
388 |         print("  query    | {:5d} | {:8d}".format(num_query_pids, num_query_tracklets))
389 |         print("  gallery  | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_tracklets))
390 |         print("  ------------------------------")
391 |         print("  total    | {:5d} | {:8d}".format(num_total_pids, num_total_tracklets))
392 |         print("  number of images per tracklet: {} ~ {}, average {:.1f}".format(min_num, max_num, avg_num))
393 |         print("  ------------------------------")
394 | 
395 |         self.train = train
396 |         self.query = query
397 |         self.gallery = gallery
398 | 
399 |         self.num_train_pids = num_train_pids
400 |         self.num_query_pids = num_query_pids
401 |         self.num_gallery_pids = num_gallery_pids
402 | 
403 |     def _check_before_run(self):
404 |         """Check if all files are available before going deeper"""
405 |         if not osp.exists(self.root):
406 |             raise RuntimeError("'{}' is not available".format(self.root))
407 | 
408 |     def _process_data(self, dirnames, cam1=True, cam2=True):
409 |         tracklets = []
410 |         num_imgs_per_tracklet = []
411 |         dirname2pid = {dirname: i for i, dirname in enumerate(dirnames)}
412 |         
413 |         for dirname in dirnames:
414 |             if cam1:
415 |                 person_dir = osp.join(self.cam_a_path, dirname)
416 |                 img_names = glob.glob(osp.join(person_dir, '*.png'))
417 |                 assert len(img_names) > 0
418 |                 img_names = tuple(img_names)
419 |                 pid = dirname2pid[dirname]
420 |                 tracklets.append((img_names, pid, 0))
421 |                 num_imgs_per_tracklet.append(len(img_names))
422 | 
423 |             if cam2:
424 |                 person_dir = osp.join(self.cam_b_path, dirname)
425 |                 img_names = glob.glob(osp.join(person_dir, '*.png'))
426 |                 assert len(img_names) > 0
427 |                 img_names = tuple(img_names)
428 |                 pid = dirname2pid[dirname]
429 |                 tracklets.append((img_names, pid, 1))
430 |                 num_imgs_per_tracklet.append(len(img_names))
431 | 
432 |         num_tracklets = len(tracklets)
433 |         num_pids = len(dirnames)
434 | 
435 |         return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet
436 | 
437 | 
438 | """Create dataset"""
439 | 
440 | __factory = {
441 |     'mars': Mars,
442 |     'ilidsvid': iLIDSVID,
443 |     'prid': PRID,
444 | }
445 | 
446 | 
447 | def get_names():
448 |     return __factory.keys()
449 | 
450 | 
451 | def init_dataset(name, *args, **kwargs):
452 |     if name not in __factory.keys():
453 |         raise KeyError("Unknown dataset: {}".format(name))
454 |     return __factory[name](*args, **kwargs)
455 | 
456 | 
457 | if __name__ == '__main__':
458 |     # test
459 |     # dataset = Market1501()
460 |     dataset = Mars()
461 |     # dataset = iLIDSVID()
462 |     # dataset = PRID()
463 | 


--------------------------------------------------------------------------------
/2从数据集中导入视频图片.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, absolute_import
  2 | import os
  3 | from PIL import Image
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | from torch.utils.data import Dataset
  8 | import random
  9 | 
 10 | 
 11 | def read_image(img_path):
 12 |     """Keep reading image until succeed.一直进行读入图片操作，直到成功。
 13 |     This can avoid IOError incurred by heavy IO process."""
 14 |     got_img = False  # 读取图像的标志，初始为False
 15 |     if not os.path.exists(img_path):
 16 |         raise IOError('{} does not exist'.format(img_path))
 17 |     while not got_img:
 18 |         try:
 19 |             # 从图像路径读取图片，并将图片转换为RGB格式
 20 |             img = Image.open(img_path).convert('RGB')
 21 |             got_img = True  # 成功读取图片，则读取图像的标志置为TRUE
 22 |         except IOError:  # 读取图片出错，报错
 23 |             print("IOError incurred when reading '{}'. Will redo. Don't worry. Just chill.".format(img_path))
 24 |             pass
 25 |     return img
 26 | 
 27 | 
 28 | class VideoDataset(Dataset):
 29 |     """Video Person ReID Dataset.基于视频reid的数据集。
 30 |     Note batch data has shape (batch, seq_len, channel, height, width).
 31 |     注意，一个批次的数据的形状为（batch，序列长度，通道数，图片的高度，图片的宽度）
 32 |     """
 33 |     # 三种采样方法，平等的，随机，所有
 34 |     sample_methods = ['evenly', 'random', 'all']
 35 | 
 36 |     # 初始化数据集
 37 |     def __init__(self, dataset, seq_len=15, sample='evenly', transform=None):
 38 |         self.dataset = dataset
 39 |         self.seq_len = seq_len
 40 |         self.sample = sample
 41 |         self.transform = transform
 42 | 
 43 |     def __len__(self):
 44 |         return len(self.dataset)  # 数据集长度
 45 | 
 46 |     def __getitem__(self, index):
 47 |         img_paths, pid, camid = self.dataset[index]  # 从数据集中图片的索引（名称）获得图片路径，行人的身份，摄像头的id
 48 |         num = len(img_paths)
 49 | 
 50 |         # 采样方式：1.随机采用
 51 |         if self.sample == 'random':
 52 |             """
 53 |             Randomly sample seq_len consecutive frames from num frames,
 54 |             if num is smaller than seq_len, then replicate items.
 55 |             This sampling strategy is used in training phase.
 56 |             随机从num个视频帧中采样seq_len长度的连续帧，如果num小于seq_len，则复制它们，使之等于seq_len。在训练阶段采用这种采样策略。
 57 |             """
 58 |             frame_indices = list(range(num))  # 视频帧的索引0-num
 59 |             rand_end = max(0, len(frame_indices) - self.seq_len - 1)  # 随机范围的终止
 60 |             begin_index = random.randint(0, rand_end)  # 开始的索引值
 61 |             end_index = min(begin_index + self.seq_len, len(frame_indices))  # 结束的索引值
 62 | 
 63 |             indices = frame_indices[begin_index:end_index]  # 最终的索引范围
 64 | 
 65 |             for index in indices:  # 遍历索引
 66 |                 if len(indices) >= self.seq_len:  # 如果索引的长度大于等于序列长度
 67 |                     break
 68 |                 indices.append(index)  # 向列表末尾增加索引
 69 |             indices = np.array(indices)  # 将列表转换成数组
 70 | 
 71 |             imgs = []  # 图像列表
 72 |             for index in indices:  # 遍历新的索引
 73 |                 index = int(index)  # 首先保证索引是整数
 74 |                 img_path = img_paths[index]  # 从数据集中获得图像的路径,并结合索引,确定图像的具体路径
 75 |                 img = read_image(img_path)  # 根据路径读取图片
 76 |                 if self.transform is not None:  # 执行随机裁剪
 77 |                     img = self.transform(img)
 78 |                 img = img.unsqueeze(0)  # unsqueeze(0)函数,增加维度,在第0维增加1个维度
 79 |                 imgs.append(img)  # 向图像列表末尾增加图像索引.存储这些索引,保证时间顺序
 80 |             imgs = torch.cat(imgs, dim=0)  # cat([a,b],dim),若dim=0,则将a,b按行放在一起. 若dim=1,则a,b按列放在一起
 81 |             # imgs = imgs.permute(1,0,2,3)
 82 |             return imgs, pid, camid
 83 | 
 84 |         elif self.sample == 'dense':
 85 |             """
 86 |             Sample all frames in a video into a list of clips, each clip contains seq_len frames, batch_size needs to 
 87 |             be set to 1.
 88 |             将视频中的所有帧采样到一系列的clips，每个clip包含seq_len个帧,批次大小需要设置为1
 89 |             This sampling strategy is used in test phase. 在测试阶段采用密集采样策略.
 90 |             """
 91 |             cur_index = 0  # 初始索引为0
 92 |             frame_indices = list(range(num))  # 视频帧的索引范围
 93 |             indices_list = []
 94 |             while num - cur_index > self.seq_len:  # 如果视频帧的数目大于seq_len
 95 |                 indices_list.append(frame_indices[cur_index:cur_index + self.seq_len])
 96 |                 # 将帧的索引[0,seq_len],添加到原来帧的索引列表的末尾
 97 |                 cur_index += self.seq_len  # 当前索引值=0+seq_len
 98 | 
 99 |             last_seq = frame_indices[cur_index:]  # 最后留下的序列为当前索引值后面的列表值
100 |             for index in last_seq:  # 遍历剩下的索引值
101 |                 if len(last_seq) >= self.seq_len:  # 如果剩下的索引值长度大于每个身份要采样的序列长度
102 |                     break
103 |                 last_seq.append(index)  # 将索引加到列表后面
104 |             indices_list.append(last_seq)  # 存储这些索引,保证时间顺序
105 | 
106 |             imgs_list = []
107 |             for indices in indices_list:  # 遍历所有索引中的视频段的索引,即有几个视频段
108 |                 imgs = []
109 |                 for index in indices:  # 在每个视频段中,遍历每一帧的索引
110 |                     index = int(index)
111 |                     img_path = img_paths[index]  # 确定图像的路径
112 |                     img = read_image(img_path)  # 读图
113 |                     if self.transform is not None:
114 |                         img = self.transform(img)
115 |                     img = img.unsqueeze(0)
116 |                     imgs.append(img)
117 |                 imgs = torch.cat(imgs, dim=0)
118 |                 # imgs = imgs.permute(1,0,2,3)
119 |                 imgs_list.append(imgs)  # 将每个视频段的图像,存在一个总的list中
120 |             imgs_array = torch.stack(imgs_list)  # 沿着一个新维度对输入张量序列进行连接。
121 |             return imgs_array, pid, camid
122 | 
123 |         else:
124 |             raise KeyError("Unknown sample method: {}. Expected one of {}".format(self.sample, self.sample_methods))
125 | 


--------------------------------------------------------------------------------
/3随机采样,形成pk批次.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from collections import defaultdict
 3 | import numpy as np
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | class RandomIdentitySampler(object):  # 随机为每个id选择一定数目的样本,形成一个批次
 9 |     """
10 |     Randomly sample N identities, then for each identity,  首先随机采样N个身份,然后对每个身份,随机选择K个例子,根据这种策略
11 |     randomly sample K instances, therefore batch size is N*K.   形成一个N*K批次.
12 | 
13 |     Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/data/sampler.py.
14 | 
15 |     Args:
16 |         data_source (Dataset): dataset to sample from.  一个包含样本的参数
17 |         num_instances (int): number of instances per identity.  每个身份的样本数
18 |     """
19 |     def __init__(self, data_source, num_instances=4):
20 |         self.data_source = data_source  # 数据集
21 |         self.num_instances = num_instances  # 默认给每个身份选择4帧图片
22 |         self.index_dic = defaultdict(list)  # 索引字典的初始化,采用defaultdict()方法
23 |         # defaultdict的作用是在于，当字典里的key不存在但被查找时，返回的不是keyError而是一个默认值,[]
24 |         for index, (_, pid, _) in enumerate(data_source):  # 将数据集中的行人id和索引放进字典中
25 |             self.index_dic[pid].append(index)
26 |         self.pids = list(self.index_dic.keys())  # 行人id是字典的key值
27 |         self.num_identities = len(self.pids)  # 行人的个数是行人id的长度
28 | 
29 |     def __iter__(self):  # 迭代器
30 |         indices = torch.randperm(self.num_identities)  # torch.randperm(n),给定参数n,返回一个从0到n-1的随机整数排列.
31 |         ret = []  # 初始化个列表
32 |         for i in indices:  # 遍历数据集中的行人身份
33 |             pid = self.pids[i]  # 行人的身份
34 |             t = self.index_dic[pid]  # 获得字典中,行人身份对应的样本数
35 |             replace = False if len(t) >= self.num_instances else True
36 |             # 如果t>=4,则replace = False 如果t < 4,replace = True ,即样本数小于4,则复制前面的样本
37 |             t = np.random.choice(t, size=self.num_instances, replace=replace)  # np.random.choice(a,size,replace,p)
38 |             # 从t中选择size个样本,replace = true的话有可能会出现重复的样本,就是将前面抽出来的样本重新放回去.
39 |             ret.extend(t)  # 在list的末尾一次性追加另一个序列中的多个值,即用新列表扩展原来的列表.
40 |         return iter(ret)  # iter()函数,迭代器.
41 | 
42 |     def __len__(self):
43 |         return self.num_identities * self.num_instances  # 一个批次的长度
44 | 


--------------------------------------------------------------------------------
/4随机图像裁剪.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from torchvision.transforms import *
 4 | from PIL import Image
 5 | import random
 6 | import numpy as np
 7 | 
 8 | 
 9 | class Random2DTranslation(object):
10 |     """
11 |     With a probability, first increase image size to (1 + 1/8), and then perform random crop.
12 |     设定一个概率p,首先将图像尺寸扩大到(1 + 1/8),然后执行随机裁剪
13 |     Args:
14 |         height (int): target height.  目标图像的高度
15 |         width (int): target width.  目标图像的宽度
16 |         p (float): probability of performing this transformation. Default: 0.5.  执行改变图像大小,进行裁剪的概率
17 |     """
18 |     def __init__(self, height, width, p=0.5, interpolation=Image.BILINEAR):
19 |         self.height = height
20 |         self.width = width
21 |         self.p = p
22 |         self.interpolation = interpolation  # 插值方式:双线性插值
23 | 
24 |     def __call__(self, img):
25 |         """
26 |         Args:
27 |             img (PIL Image): Image to be cropped.
28 | 
29 |         Returns:
30 |             PIL Image: Cropped image.
31 |         """
32 |         if random.random() < self.p:  # random.random()返回随机生成的一个实数,它在[0,1)范围内.
33 |             return img.resize((self.width, self.height), self.interpolation)  # 不改变图像的大小,((图像的宽, 高), 双线性插值)
34 | 
35 |         new_width, new_height = int(round(self.width * 1.125)), int(round(self.height * 1.125))
36 |         # 先将图像尺寸扩大到原来的1.125倍, round函数:返回浮点数的四舍五入值
37 |         resized_img = img.resize((new_width, new_height), self.interpolation)  # 改变图像的大小,采用双线性插值方法
38 |         x_maxrange = new_width - self.width  # 宽度的余量
39 |         y_maxrange = new_height - self.height  # 高度的余量
40 |         # random.uniform(x,y) 随机生成一个实数,它在[x,y)范围内.
41 |         x1 = int(round(random.uniform(0, x_maxrange)))
42 |         y1 = int(round(random.uniform(0, y_maxrange)))  # 随机确定左上顶点
43 |         box = (x1, y1, x1 + self.width, y1 + self.height)  # 裁剪图片的区域范围(左,上,右,下)
44 |         # Python中规定左上角为(0,0)的坐标点,最后的两个数字必须比前面两个大
45 |         croped_img = resized_img.crop(box)  # 进行图像裁剪
46 | 
47 |         return croped_img
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     pass
52 | 


--------------------------------------------------------------------------------
/5损失函数.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | from torch.autograd import Variable
  6 | 
  7 | """
  8 | Shorthands for loss:  几个损失函数的缩写
  9 | - CrossEntropyLabelSmooth: xent   带标签平滑正则化的交叉熵损失函数  简记为  xent
 10 | - TripletLoss: htri  三元组损失 简记为 htri
 11 | - CenterLoss: cent  中心损失  简记为 cent
 12 | """
 13 | __all__ = ['CrossEntropyLabelSmooth', 'TripletLoss', 'CenterLoss']
 14 | 
 15 | 
 16 | class CrossEntropyLabelSmooth(nn.Module):
 17 |     """Cross entropy loss with label smoothing regularizer.
 18 | 
 19 |     Reference:
 20 |     Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016.
 21 |     Equation: y = (1 - epsilon) * y + epsilon / K.
 22 | 
 23 |     Args:
 24 |         num_classes (int): number of classes.  类别的数目
 25 |         epsilon (float): weight.   权重
 26 |     """
 27 |     def __init__(self, num_classes, epsilon=0.1, use_gpu=True):
 28 |         super(CrossEntropyLabelSmooth, self).__init__()  # super() 函数是用于调用父类的一个方法
 29 |         self.num_classes = num_classes
 30 |         self.epsilon = epsilon  # 权重设为0.1
 31 |         self.use_gpu = use_gpu  # 使用gpu
 32 |         self.logsoftmax = nn.LogSoftmax(dim=1)  # Log(Softmax(x))  将计算Softmax的维数
 33 | 
 34 |     def forward(self, inputs, targets):  # 前向传播
 35 |         """
 36 |         Args:
 37 |             inputs: prediction matrix (before softmax) with shape (batch_size, num_classes)
 38 | 输入 : 具有形状（batch_size，num_classes）的预测矩阵（在softmax之前）
 39 |             targets: ground truth labels with shape (num_classes)
 40 | 目标 : 具有形状（num_classes）的ground truth标签
 41 |         """
 42 |         log_probs = self.logsoftmax(inputs)  # 将输入进行Log(Softmax(x))计算
 43 |         targets = torch.zeros(log_probs.size())  # log_probs.size()先获得输入的长度,再返回一个全为标量0的张量,形状由输入长度决定
 44 |         targets = targets.scatter_(1, targets.unsqueeze(1).data.cpu(), 1)
 45 |         # scatter_(dim, index, src)将src中数据根据index中的索引按照dim的方向填进input中
 46 |         if self.use_gpu:
 47 |             targets = targets.cuda()
 48 |         targets = Variable(targets, requires_grad=False)
 49 |         targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
 50 |         loss = (- targets * log_probs).mean(0).sum()
 51 |         return loss
 52 | 
 53 | 
 54 | class TripletLoss(nn.Module):
 55 |     """Triplet loss with hard positive/negative mining.
 56 | 
 57 |     Reference:
 58 |     Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737.
 59 | 
 60 |     Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py.
 61 | 
 62 |     Args:
 63 |         margin (float): margin for triplet.
 64 |     """
 65 |     def __init__(self, margin=0.3):
 66 |         super(TripletLoss, self).__init__()
 67 |         self.margin = margin
 68 |         self.ranking_loss = nn.MarginRankingLoss(margin=margin)
 69 | 
 70 |     def forward(self, inputs, targets):
 71 |         """
 72 |         Args:
 73 |             inputs: feature matrix with shape (batch_size, feat_dim)
 74 |             targets: ground truth labels with shape (num_classes)
 75 |         """
 76 |         n = inputs.size(0)
 77 |         # Compute pairwise distance, replace by the official when merged
 78 |         dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n)
 79 |         dist = dist + dist.t()
 80 |         dist.addmm_(1, -2, inputs, inputs.t())
 81 |         dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
 82 |         # For each anchor, find the hardest positive and negative
 83 |         mask = targets.expand(n, n).eq(targets.expand(n, n).t())
 84 |         dist_ap, dist_an = [], []
 85 |         for i in range(n):
 86 |             dist_ap.append(dist[i][mask[i]].max())
 87 |             dist_an.append(dist[i][mask[i] == 0].min())
 88 |         dist_ap = torch.cat(dist_ap)
 89 |         dist_an = torch.cat(dist_an)
 90 |         # Compute ranking hinge loss
 91 |         y = dist_an.data.new()
 92 |         y.resize_as_(dist_an.data)
 93 |         y.fill_(1)
 94 |         y = Variable(y)
 95 |         loss = self.ranking_loss(dist_an, dist_ap, y)
 96 |         return loss
 97 | 
 98 | 
 99 | class CenterLoss(nn.Module):
100 |     """Center loss.
101 |     
102 |     Reference:
103 |     Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
104 |     
105 |     Args:
106 |         num_classes (int): number of classes.
107 |         feat_dim (int): feature dimension.
108 |     """
109 |     def __init__(self, num_classes=10, feat_dim=2, use_gpu=True):
110 |         super(CenterLoss, self).__init__()
111 |         self.num_classes = num_classes
112 |         self.feat_dim = feat_dim
113 |         self.use_gpu = use_gpu
114 | 
115 |         if self.use_gpu:
116 |             self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).cuda())
117 |         else:
118 |             self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim))
119 | 
120 |     def forward(self, x, labels):
121 |         """
122 |         Args:
123 |             x: feature matrix with shape (batch_size, feat_dim).
124 |             labels: ground truth labels with shape (num_classes).
125 |         """
126 |         batch_size = x.size(0)
127 |         distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
128 |                   torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
129 |         distmat.addmm_(1, -2, x, self.centers.t())
130 | 
131 |         classes = torch.arange(self.num_classes).long()
132 |         if self.use_gpu:
133 |             classes = classes.cuda()
134 |         classes = Variable(classes)
135 |         labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
136 |         mask = labels.eq(classes.expand(batch_size, self.num_classes))
137 | 
138 |         dist = []
139 |         for i in range(batch_size):
140 |             value = distmat[i][mask[i]]
141 |             value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
142 |             dist.append(value)
143 |         dist = torch.cat(dist)
144 |         loss = dist.mean()
145 | 
146 |         return loss
147 | 
148 | 
149 | if __name__ == '__main__':
150 |     pass
151 | 


--------------------------------------------------------------------------------
/6评估模型的方法.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, absolute_import
 2 | import numpy as np
 3 | import copy
 4 | 
 5 | 
 6 | def evaluate(distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50):
 7 |     # 评估函数,参数有(距离矩阵,查询行人的id,图库行人的id,查询的摄像头,图库的摄像头,最大秩)
 8 |     num_q, num_g = distmat.shape  # num_q查询的行人数=距离矩阵的行, num_g图库的行人数=距离矩阵的列
 9 |     if num_g < max_rank:  # 图库的行人数 < 最大秩
10 |         max_rank = num_g  # 更改最大秩 = 图库的行人数  ,说明图库的样本数太小了
11 |         print("Note: number of gallery samples is quite small, got {}".format(num_g))
12 |     indices = np.argsort(distmat, axis=1)  # top_k
13 |     # 将矩阵distmat按照axis排序，并返回排序后的下标
14 |     matches = (g_pids[indices] == q_pids[:, np.newaxis])  # q_pids[:, np.newaxis] 在原有维度后面加一个维度 (n,1)
15 |     # 匹配,相同返回true,不同返回false,
16 |     matches = matches.astype(np.int32)  # 将bool类型的true false转换成int类型的1, 0
17 | 
18 |     # compute cmc curve for each query  对于每个查询计算它的cmc曲线
19 |     all_cmc = []
20 |     all_AP = []
21 |     num_valid_q = 0.
22 |     for q_idx in range(num_q):  # 遍历每个查询的行人
23 |         # get query pid and camid
24 |         q_pid = q_pids[q_idx]  # 获得每个查询的行人身份
25 |         q_camid = q_camids[q_idx]  # 获得对应的摄像头id
26 | 
27 |         # remove gallery samples that have the same pid and camid with query 从图库样本中删除与查询集中有相同身份和摄像头id
28 |         order = indices[q_idx]  # 查询集索引
29 |         remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)  # 相等为1,不等为0
30 |         keep = np.invert(remove)  # np.invert() 位非,对每一位取反  ,则删去了相等的,变成去除相等的为0 ,保留不等的为1
31 | 
32 |         # compute cmc curve  累积匹配曲线
33 |         orig_cmc = matches[q_idx][keep]  # binary vector, positions with value 1 are correct matches
34 |         # 二进制向量，值为1的位置是正确的匹配
35 |         if not np.any(orig_cmc):  # np.any  相当于或运算.如果可迭代对象orig_cmc中任意存在每一个元素为True则返回True,
36 |             # this condition is true when query identity does not appear in gallery
37 |             # 当查询的身份未出现在图库中时，此条件为真
38 |             continue
39 | 
40 |         cmc = orig_cmc.cumsum()  # 返回累加和,不改变数据形状
41 |         cmc[cmc > 1] = 1  # cmc最大为1,超过1的置为1
42 | 
43 |         all_cmc.append(cmc[:max_rank])  # 将前max_rank的cmc添加到all_cmc列表的后面   rank1到rank50
44 |         num_valid_q += 1.  # 有效的查询身份+1
45 | 
46 |         # compute average precision  计算平均精度
47 |         # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
48 |         num_rel = orig_cmc.sum()  # 所有元素求和
49 |         tmp_cmc = orig_cmc.cumsum()  # 累加和,不改变数据形状
50 |         tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)]  # enumerate() 函数,返回数据下标和数据(i,x)
51 |         # 计算top_i的cmc ..x / (i+1.)
52 |         tmp_cmc = np.asarray(tmp_cmc) * orig_cmc  # np.asarray(tmp_cmc),数据类型转换为数组,  只保留正确的匹配 , 错误的值为0
53 |         AP = tmp_cmc.sum() / num_rel  # 平均精度 = 正确匹配的元素求和  除以原来所有元素的总和
54 |         all_AP.append(AP)
55 | 
56 |     assert num_valid_q > 0, "Error: all query identities do not appear in gallery"
57 | 
58 |     all_cmc = np.asarray(all_cmc).astype(np.float32)
59 |     all_cmc = all_cmc.sum(0) / num_valid_q
60 |     mAP = np.mean(all_AP)
61 | 
62 |     return all_cmc, mAP
63 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # something you should know for video_reid
 2 | resnet50的结构图   https://blog.csdn.net/Seven_year_Promise/article/details/69358681
 3 | 
 4 | resnet50的详细介绍  https://blog.csdn.net/Seven_year_Promise/article/details/69360488
 5 | 
 6 | code forked from https://github.com/jiyanggao/Video-Person-ReID
 7 | 
 8 | ###############video_reid 数据集介绍###############
 9 | 1. MARS ----> https://blog.csdn.net/qq_34132310/article/details/83869605
10 | 


--------------------------------------------------------------------------------
/info/query_IDX.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AsuradaYuci/understand_videobased_reid/cef368960212808351dfcd1e65487b8683806257/info/query_IDX.mat


--------------------------------------------------------------------------------
/info/tracks_test_info.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AsuradaYuci/understand_videobased_reid/cef368960212808351dfcd1e65487b8683806257/info/tracks_test_info.mat


--------------------------------------------------------------------------------
/info/tracks_train_info.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AsuradaYuci/understand_videobased_reid/cef368960212808351dfcd1e65487b8683806257/info/tracks_train_info.mat


--------------------------------------------------------------------------------
/main_video_person_reid.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, absolute_import
  2 | import os
  3 | import sys
  4 | import time
  5 | import datetime
  6 | import argparse
  7 | import os.path as osp
  8 | import numpy as np
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.backends.cudnn as cudnn
 13 | from torch.utils.data import DataLoader
 14 | from torch.autograd import Variable
 15 | from torch.optim import lr_scheduler
 16 | 
 17 | import data_manager
 18 | from video_loader import VideoDataset
 19 | import transforms as T
 20 | import models
 21 | from models import resnet3d
 22 | from losses import CrossEntropyLabelSmooth, TripletLoss
 23 | from utils import AverageMeter, Logger, save_checkpoint
 24 | from eval_metrics import evaluate
 25 | from samplers import RandomIdentitySampler
 26 | 
 27 | # 命令行参数
 28 | parser = argparse.ArgumentParser(description='Train video model with cross entropy loss')
 29 | # Datasets   数据集
 30 | parser.add_argument('-d', '--dataset', type=str, default='mars',
 31 |                     choices=data_manager.get_names())
 32 | parser.add_argument('-j', '--workers', default=4, type=int,
 33 |                     help="number of data loading workers (default: 4)")
 34 | parser.add_argument('--height', type=int, default=224,
 35 |                     help="height of an image (default: 224)")
 36 | parser.add_argument('--width', type=int, default=112,
 37 |                     help="width of an image (default: 112)")
 38 | parser.add_argument('--seq-len', type=int, default=4, help="number of images to sample in a tracklet")
 39 | # Optimization options  优化选择
 40 | parser.add_argument('--max-epoch', default=800, type=int,
 41 |                     help="maximum epochs to run")
 42 | parser.add_argument('--start-epoch', default=0, type=int,
 43 |                     help="manual epoch number (useful on restarts)")
 44 | parser.add_argument('--train-batch', default=32, type=int,
 45 |                     help="train batch size")
 46 | parser.add_argument('--test-batch', default=1, type=int, help="has to be 1")
 47 | parser.add_argument('--lr', '--learning-rate', default=0.0003, type=float,
 48 |                     help="initial learning rate, use 0.0001 for rnn, use 0.0003 for pooling and attention")
 49 | parser.add_argument('--stepsize', default=200, type=int,
 50 |                     help="stepsize to decay learning rate (>0 means this is enabled)")
 51 | parser.add_argument('--gamma', default=0.1, type=float,
 52 |                     help="learning rate decay")
 53 | parser.add_argument('--weight-decay', default=5e-04, type=float,
 54 |                     help="weight decay (default: 5e-04)")
 55 | parser.add_argument('--margin', type=float, default=0.3, help="margin for triplet loss")
 56 | parser.add_argument('--num-instances', type=int, default=4,
 57 |                     help="number of instances per identity")
 58 | parser.add_argument('--htri-only', action='store_true', default=False,
 59 |                     help="if this is True, only htri loss is used in training")
 60 | # Architecture  网络结构
 61 | parser.add_argument('-a', '--arch', type=str, default='resnet50tp', help="resnet503d, resnet50tp, resnet50ta, "
 62 |                                                                          "resnetrnn")
 63 | parser.add_argument('--pool', type=str, default='avg', choices=['avg', 'max'])
 64 | 
 65 | # Miscs
 66 | parser.add_argument('--print-freq', type=int, default=80, help="print frequency")
 67 | parser.add_argument('--seed', type=int, default=1, help="manual seed")
 68 | parser.add_argument('--pretrained-model', type=str,
 69 |                     default='/home/ying/Desktop/Video-Person-ReID-master/resnet-50-kinetics.pth',
 70 |                     help='need to be set for resnet3d models')
 71 | parser.add_argument('--evaluate', action='store_true', help="evaluation only")
 72 | parser.add_argument('--eval-step', type=int, default=50,
 73 |                     help="run evaluation for every N epochs (set to -1 to test after training)")
 74 | parser.add_argument('--save-dir', type=str, default='log')
 75 | parser.add_argument('--use-cpu', action='store_true', help="use cpu")
 76 | parser.add_argument('--gpu-devices', default='0', type=str, help='gpu device ids for CUDA_VISIBLE_DEVICES')
 77 | 
 78 | # 解析命令行参数
 79 | args = parser.parse_args()
 80 | 
 81 | 
 82 | def main():
 83 |     torch.manual_seed(args.seed)  # 为CPU设置种子用于生成随机数，以使得结果是确定的
 84 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices  # 在代码中指定需要使用的GPU
 85 |     use_gpu = torch.cuda.is_available()  # 查看当前环境是否支持CUDA,支持返回true，不支持返回false
 86 |     if args.use_cpu:
 87 |         use_gpu = False
 88 | 
 89 |     if not args.evaluate:  # 如果不是评估，那就是训练，输出训练日志；否则输出测试日志。
 90 |         sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
 91 |     else:
 92 |         sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
 93 |     print("==========\nArgs:{}\n==========".format(args))  # 打印所有参数
 94 | 
 95 |     if use_gpu:  # 如果使用gpu，输出选定的gpu，
 96 |         print("Currently using GPU {}".format(args.gpu_devices))
 97 |         cudnn.benchmark = True  # 在程序刚开始加这条语句可以提升一点训练速度,没什么额外开销
 98 |         torch.cuda.manual_seed_all(args.seed)  # 为GPU设置种子用于生成随机数，以使得结果是确定的
 99 |     else:
100 |         print("Currently using CPU (GPU is highly recommended)")
101 | 
102 |     print("Initializing dataset {}".format(args.dataset))
103 |     dataset = data_manager.init_dataset(name=args.dataset)  # 初始化数据集,从data_manager.py文件中加载。
104 | 
105 |     # import transforms as T.
106 |     # T.Compose=一起组合几个变换。
107 |     transform_train = T.Compose([
108 |         T.Random2DTranslation(args.height, args.width),  # 以一个概率进行，首先将图像大小增加到（1 + 1/8），然后执行随机裁剪。
109 |         T.RandomHorizontalFlip(),  # 以给定的概率(0.5)随机水平翻转给定的PIL图像。
110 |         T.ToTensor(),  # 将``PIL Image``或``numpy.ndarray``转换为张量。
111 |         T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # 用平均值和标准偏差归一化张量图像。
112 |         # input[channel] = (input[channel] - mean[channel]) / std[channel]
113 |     ])
114 | 
115 |     transform_test = T.Compose([
116 |         T.Resize((args.height, args.width)),  # 将输入PIL图像的大小调整为给定大小。
117 |         T.ToTensor(),
118 |         T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
119 |     ])
120 | 
121 |     # 设置pin_memory=True，则意味着生成的Tensor数据最开始是属于内存中的锁页内存，这样将内存的Tensor转义到GPU的显存就会更快一些。
122 |     pin_memory = True if use_gpu else False
123 | 
124 |     # DataLoader数据加载器。 组合数据集和采样器，并在数据集上提供单进程或多进程迭代器。
125 |     trainloader = DataLoader(
126 |         # VideoDataset:基于视频的person reid的数据集.(训练的数据集，视频序列长度，采样方法：随机，进行数据增强）
127 |         VideoDataset(dataset.train, seq_len=args.seq_len, sample='random', transform=transform_train),
128 |         # 随机抽样N个身份，然后对于每个身份，随机抽样K个实例，因此批量大小为N * K.
129 |         sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances),
130 |         batch_size=args.train_batch,  # 训练的批次大小
131 |         num_workers=args.workers,  # 多进程的数目
132 |         pin_memory=pin_memory,
133 |         drop_last=True,
134 |     )  # 如果数据集大小不能被批量大小整除，则设置为“True”以删除最后一个不完整的批次。
135 | 
136 |     queryloader = DataLoader(
137 |         VideoDataset(dataset.query, seq_len=args.seq_len, sample='dense', transform=transform_test),
138 |         batch_size=args.test_batch,
139 |         shuffle=False,  # 设置为“True”以使数据在每个时期重新洗牌（默认值：False）。
140 |         num_workers=args.workers,
141 |         pin_memory=pin_memory,
142 |         drop_last=False,  # 如果“False”和数据集的大小不能被批量大小整除，那么最后一批将更小。
143 |     )
144 | 
145 |     galleryloader = DataLoader(
146 |         VideoDataset(dataset.gallery, seq_len=args.seq_len, sample='dense', transform=transform_test),
147 |         batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
148 |         pin_memory=pin_memory, drop_last=False,
149 |     )
150 | 
151 |     print("Initializing model: {}".format(args.arch))  # 模型的初始化
152 | 
153 |     if args.arch == 'resnet503d':
154 |         model = resnet3d.resnet50(num_classes=dataset.num_train_pids, sample_width=args.width,
155 |                                   sample_height=args.height, sample_duration=args.seq_len)
156 |         # 如果不存在预训练模型，则报错
157 |         if not os.path.exists(args.pretrained_model):
158 |             raise IOError("Can't find pretrained model: {}".format(args.pretrained_model))
159 |         # 导入预训练的模型
160 |         print("Loading checkpoint from '{}'".format(args.pretrained_model))
161 |         checkpoint = torch.load(args.pretrained_model)
162 |         state_dict = {}  # 状态字典,从checkpoint文件中加载参数
163 |         for key in checkpoint['state_dict']:
164 |             if 'fc' in key:
165 |                 continue
166 |             state_dict[key.partition("module.")[2]] = checkpoint['state_dict'][key]
167 |         model.load_state_dict(state_dict, strict=False)
168 |     else:
169 |         model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'})
170 |     print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0))
171 | 
172 |     # 损失函数：xent：softmax交叉熵损失函数。htri：三元组损失函数。
173 |     criterion_xent = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids, use_gpu=use_gpu)
174 |     criterion_htri = TripletLoss(margin=args.margin)
175 |     # 优化器：adam
176 |     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
177 |     # stepsize，逐步减少学习率（> 0表示已启用）
178 |     if args.stepsize > 0:
179 |         scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
180 |         # lr_scheduler学习率计划，StepLR,将每个参数组的学习速率设置为每个步长时期由gamma衰减的初始lr.
181 |     start_epoch = args.start_epoch  # 手动时期编号（重启时有用）
182 | 
183 |     if use_gpu:
184 |         model = nn.DataParallel(model).cuda()  # 多GPU训练
185 |         # DataParallel是torch.nn下的一个类，需要制定的参数是module（可以多gpu运行的类函数）和input（数据集）
186 | 
187 |     if args.evaluate:  # 这里的evaluate没有意义，应该添加代码导入保存的checkpoint，再test
188 |         print("Evaluate only")  # 进行评估
189 |         test(model, queryloader, galleryloader, args.pool, use_gpu)
190 |         return
191 | 
192 |     start_time = time.time()  # 开始的时间
193 |     best_rank1 = -np.inf  # 初始化，负无穷
194 |     if args.arch == 'resnet503d':  # 如果模型为resnet503d,
195 |         torch.backends.cudnn.benchmark = False
196 | 
197 |     for epoch in range(start_epoch, args.max_epoch):  # epoch,从开始到最大，进行训练。
198 |         print("==> Epoch {}/{}".format(epoch+1, args.max_epoch))
199 |         
200 |         train(model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu)
201 |         
202 |         if args.stepsize > 0:
203 |             scheduler.step()
204 | 
205 |         # 如果运行一次评估的需要的epoch数大于0，并且当前epoch+1能整除这个epoch数，或者等于最大epoch数。那么就进行一次评估。
206 |         if args.eval_step > 0 and (epoch+1) % args.eval_step == 0 or (epoch+1) == args.max_epoch:
207 |             print("==> Test")
208 |             rank1 = test(model, queryloader, galleryloader, args.pool, use_gpu)
209 |             is_best = rank1 > best_rank1  # 比较，大于则返回true，否则返回false。
210 |             if is_best:
211 |                 best_rank1 = rank1
212 | 
213 |             if use_gpu:
214 |                 state_dict = model.module.state_dict()
215 |                 # 函数static_dict()用于返回包含模块所有状态的字典，包括参数和缓存。
216 |             else:
217 |                 state_dict = model.state_dict()
218 |             # 保存checkpoint文件
219 |             save_checkpoint({
220 |                 'state_dict': state_dict,
221 |                 'rank1': rank1,
222 |                 'epoch': epoch,
223 |             }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch+1) + '.pth.tar'))
224 |     # 经过的时间
225 |     elapsed = round(time.time() - start_time)  # round() 方法返回浮点数x的四舍五入值
226 |     elapsed = str(datetime.timedelta(seconds=elapsed))  # 对象代表两个时间之间的时间差,
227 |     print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
228 | 
229 | 
230 | def train(model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu):
231 | 
232 |     model.train()  # 选择训练的数据集
233 |     losses = AverageMeter()  # 计算和保存当前值和平均值。
234 | 
235 |     for batch_idx, (imgs, pids, _) in enumerate(trainloader):  # trainloader，121行，
236 |         # 从trainloader中获得批次的索引，图像和行人的id。
237 |         if use_gpu:
238 |             imgs, pids = imgs.cuda(), pids.cuda()  # 将数据转到gpu上
239 |         imgs, pids = Variable(imgs), Variable(pids)  # 将imgs, pids装进Variable中
240 |         outputs, features = model(imgs)  # 喂给模型图片
241 |         if args.htri_only:
242 |             # only use hard triplet loss to train the network，只使用三元组损失训练网络
243 |             loss = criterion_htri(features, pids)
244 |         else:
245 |             # combine hard triplet loss with cross entropy loss 三元组损失加交叉熵损失函数
246 |             xent_loss = criterion_xent(outputs, pids)
247 |             htri_loss = criterion_htri(features, pids)
248 |             loss = xent_loss + htri_loss
249 |         optimizer.zero_grad()  # 将所有参数的梯度置为0
250 |         loss.backward()  # 梯度反向传播
251 |         optimizer.step()  # 进行adam优化
252 |         losses.update(loss.data[0], pids.size(0))  # 参数更新
253 | 
254 |         if (batch_idx+1) % args.print_freq == 0:  # 输出的频率，多少批次输出一次
255 |             print("Batch {}/{}\t Loss {:.6f} ({:.6f})".format(batch_idx+1, len(trainloader), losses.val, losses.avg))
256 | 
257 | 
258 | def test(model, queryloader, galleryloader, pool, use_gpu, ranks=[1, 5, 10, 20]):
259 | 
260 |     model.eval()  # 模型的评估
261 | 
262 |     qf, q_pids, q_camids = [], [], []
263 |     for batch_idx, (imgs, pids, camids) in enumerate(queryloader):
264 |         if use_gpu:
265 |             imgs = imgs.cuda()
266 |         imgs = Variable(imgs, volatile=True)  # 将imgs装进Variable中,
267 |         # volatile=True的节点不会求导，即使requires_grad=True，也不会进行反向传播，对于不需要反向传播的情景(inference，测试推断)，
268 |         # 该参数可以实现一定速度的提升，并节省一半的显存，因为其不需要保存梯度
269 | 
270 |         b, n, s, c, h, w = imgs.size()  # b=1, n=batchs, s=图片的长度
271 |         assert(b == 1)  # 断言函数
272 |         imgs = imgs.view(b*n, s, c, h, w)
273 |         features = model(imgs)  # 喂给模型图片，获得特征
274 |         features = features.view(n, -1)  # view()函数作用是将一个多行的Tensor,拼接成一行。
275 |         features = torch.mean(features, 0)  # 取平均值
276 |         features = features.data.cpu()
277 |         qf.append(features)  # 向列表尾部追加一个新元素，序列特征
278 |         q_pids.extend(pids)   # 向列表尾部追加一个列表,人的身份person id
279 |         q_camids.extend(camids)  # 摄像机的id
280 |     qf = torch.stack(qf)  # 堆叠
281 |     q_pids = np.asarray(q_pids)  # 将列表转换为数组
282 |     q_camids = np.asarray(q_camids)
283 | 
284 |     print("Extracted features for query set, obtained {}-by-{} matrix".format(qf.size(0), qf.size(1)))
285 | 
286 |     gf, g_pids, g_camids = [], [], []  # 图库，gallery
287 |     for batch_idx, (imgs, pids, camids) in enumerate(galleryloader):
288 |         if use_gpu:
289 |             imgs = imgs.cuda()
290 |         imgs = Variable(imgs, volatile=True)
291 |         b, n, s, c, h, w = imgs.size()
292 |         imgs = imgs.view(b*n, s, c, h, w)
293 |         assert(b == 1)
294 |         features = model(imgs)
295 |         features = features.view(n, -1)
296 |         if pool == 'avg':  # 采用平均池化还是最大池化
297 |             features = torch.mean(features, 0)
298 |         else:
299 |             features, _ = torch.max(features, 0)
300 |         features = features.data.cpu()
301 |         gf.append(features)
302 |         g_pids.extend(pids)
303 |         g_camids.extend(camids)
304 |     gf = torch.stack(gf)
305 |     g_pids = np.asarray(g_pids)
306 |     g_camids = np.asarray(g_camids)
307 | 
308 |     print("Extracted features for gallery set, obtained {}-by-{} matrix".format(gf.size(0), gf.size(1)))
309 |     print("Computing distance matrix")
310 |     # 计算距离矩阵
311 |     m, n = [gf.size(0), qf.size(0)]  # 矩阵的行
312 |     distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \
313 |               torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t()
314 |     distmat.addmm_(1, -2, qf, gf.t())
315 |     distmat = distmat.numpy()
316 | 
317 |     print("Computing CMC and mAP")
318 |     cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids)
319 | 
320 |     print("Results ----------")
321 |     print("mAP: {:.1%}".format(mAP))
322 |     print("CMC curve")
323 |     for r in ranks:
324 |         print("Rank-{:<3}: {:.1%}".format(r, cmc[r-1]))
325 |     print("------------------")
326 | 
327 |     return cmc[0]
328 | 
329 | 
330 | if __name__ == '__main__':
331 |     main()
332 | 


--------------------------------------------------------------------------------
/models/ResNet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import torch
  4 | from torch import nn
  5 | from torch.nn import functional as F
  6 | from torch.autograd import Variable
  7 | import torchvision
  8 | 
  9 | __all__ = ['ResNet50TP', 'ResNet50TA', 'ResNet50RNN']
 10 | # 用 __all__ 在模块级别暴露接口,1.提供了哪些是公开接口的约定
 11 | # 2.控制 from xxx import * 的行为,import * 就只会导入 __all__ 列出的成员。
 12 | # 3.list类型,以字面量的形式显式写出来.同时应该写在所有 import 语句下面
 13 | 
 14 | 
 15 | class ResNet50TP(nn.Module):  # 定义时间池化网络模型
 16 |     def __init__(self, num_classes, loss={'xent'}, **kwargs):  # **kwargs表示的就是形参中按照关键字传值把多余的传值以字典的方式呈现
 17 |         super(ResNet50TP, self).__init__()
 18 |         # 首先找到ResNet50TP的父类(nn.Module),然后把类ResNet50TP的对象转换为父类的对象
 19 |         self.loss = loss  # 损失函数为标签平滑正则化交叉熵损失函数
 20 |         resnet50 = torchvision.models.resnet50(pretrained=True)  # 调用封装好的resnet50模型,
 21 |         # pretrained=True 调用一个在imagenet上预训练过的模型
 22 |         self.base = nn.Sequential(*list(resnet50.children())[:-2])  # 快速构建基本网络,选择resnet50的子网络,去掉原模型的最后两层
 23 |         # nn.Sequential是一个Sequential容器，模块将按照构造函数中传递的顺序添加到模块中
 24 |         self.feat_dim = 2048  # 输出的特征维度为2048
 25 |         self.classifier = nn.Linear(self.feat_dim, num_classes)  # 分类器
 26 |         # nn.Linear(x,y)一种线性变换y = Ax + b
 27 | 
 28 |     def forward(self, x):  # 前向传播函数定义,输入为x,5维的tensor (batch_size,seq_len，channels,width,height,)
 29 |         b = x.size(0)  # batch_size
 30 |         t = x.size(1)  # seq_len
 31 |         x = x.view(b*t, x.size(2), x.size(3), x.size(4))  # b*t= 32 × 4 =128 图片总数
 32 |         # view()会将原有数据重新分配为一个新的张量
 33 |         x = self.base(x)  # x输入到基本网络中
 34 |         x = F.avg_pool2d(x, x.size()[2:])  # avg_pool2d(x, x.size(2), x.size(3), x.size(4))
 35 |         # 2d平均池化操作,input tensor (minibatch x in_channels x iH x iW)
 36 |         x = x.view(b, t, -1)  # -1 表示维度从其他维度推断出来的
 37 |         # 将数据重新分配
 38 |         x = x.permute(0, 2, 1)
 39 |         # 调整数据的维度,将1,2维数据互换
 40 |         f = F.avg_pool1d(x, t)  # 1d的平均池化
 41 |         f = f.view(b, self.feat_dim)  # b为行人的身份数,也是矩阵的行数,矩阵有2048列
 42 |         if not self.training:  # 如果是测试,返回特征向量
 43 |             return f
 44 |         y = self.classifier(f)  # 返回分类器输出结果
 45 | 
 46 |         if self.loss == {'xent'}:  # 如果损失函数是标签平滑正则化softmax交叉熵损失函数
 47 |             return y
 48 |         elif self.loss == {'xent', 'htri'}:  # 损失函数是交叉熵损失函数+难样本挖掘三元组损失
 49 |             return y, f
 50 |         elif self.loss == {'cent'}:  # 中心损失
 51 |             return y, f
 52 |         else:
 53 |             raise KeyError("Unsupported loss: {}".format(self.loss))
 54 | 
 55 | 
 56 | class ResNet50TA(nn.Module):  # 时间注意力模型
 57 |     def __init__(self, num_classes, loss={'xent'}, **kwargs):
 58 |         super(ResNet50TA, self).__init__()
 59 |         self.loss = loss
 60 |         resnet50 = torchvision.models.resnet50(pretrained=True)
 61 |         self.base = nn.Sequential(*list(resnet50.children())[:-2])
 62 |         self.att_gen = 'softmax'  # method for attention generation: softmax or sigmoid
 63 |         # 注意力的生成,通过softmax or sigmoid
 64 |         self.feat_dim = 2048  # feature dimension 特征维度为2048
 65 |         self.middle_dim = 256  # middle layer dimension  中间层的维度为256
 66 |         self.classifier = nn.Linear(self.feat_dim, num_classes)  # 线性输出
 67 |         self.attention_conv = nn.Conv2d(self.feat_dim, self.middle_dim, (7, 4))  # 输入为2048,滤波器为256
 68 |         # 7,4 cooresponds to 224, 112 input image size
 69 |         self.attention_tconv = nn.Conv1d(self.middle_dim, 1, 3, padding=1)
 70 | 
 71 |     def forward(self, x):
 72 |         b = x.size(0)  # b = 32
 73 |         t = x.size(1)  # t = 4
 74 |         x = x.view(b*t, x.size(2), x.size(3), x.size(4))  # 128x3x224x112
 75 |         x = self.base(x)  # 将图片输入到模型中  128x2048x7x4
 76 |         a = F.relu(self.attention_conv(x))  # 激励函数 128x256x1x1
 77 |         a = a.view(b, t, self.middle_dim)  # 将原有数据重新分配为一个新的张量  (b,t,256) => 32x4x256
 78 |         a = a.permute(0, 2, 1)  # 调整数据的维度,将1,2维数据互换  32x256x4
 79 |         a = F.relu(self.attention_tconv(a))  # 激励函数 32x1x4
 80 |         a = a.view(b, t)  # 将原有数据重新分配为一个新的张量  (b,t) => 32x4
 81 |         x = F.avg_pool2d(x, x.size()[2:])  # avg_pool2d(x, x.size(3), x.size(4)) 2d的平均池化128x2048x1x1 
 82 |         # a = 注意力分数
 83 |         if self. att_gen == 'softmax':
 84 |             a = F.softmax(a, dim=1)  # dim = 1,在维度1计算softmax
 85 |         elif self.att_gen == 'sigmoid':
 86 |             a = F.sigmoid(a)
 87 |             a = F.normalize(a, p=1, dim=1)  # p =1 标准公式中的指数值, dim = 1 在维度一进行归一化操作
 88 |         else: 
 89 |             raise KeyError("Unsupported attention generation function: {}".format(self.att_gen))
 90 |         x = x.view(b, t, -1)  # 将x数据重新进行分配 32x4x2048
 91 |         a = torch.unsqueeze(a, -1)  # 在a所在维度上增加1  (b,t,1) 32x4x1
 92 |         a = a.expand(b, t, self.feat_dim)  # 指定单个维度扩大为更大的尺寸,(b,t,2048) 32x4x2048
 93 |         att_x = torch.mul(x, a)  # 用标量值a乘以输入x的每个元素，并返回一个新的结果张量。32x4x2048
 94 |         att_x = torch.sum(att_x, 1)  # 返回输入张量给定维度上每行的和。返回1列数  32x2048
 95 |         
 96 |         f = att_x.view(b, self.feat_dim)  # 将att_x数据重新进行分配,(b,2048)  32x2048
 97 |         if not self.training:
 98 |             return f
 99 |         y = self.classifier(f)  # 线性输出,根据特征向量f分类  32x625
100 | 
101 |         if self.loss == {'xent'}:
102 |             return y
103 |         elif self.loss == {'xent', 'htri'}:
104 |             return y, f
105 |         elif self.loss == {'cent'}:
106 |             return y, f
107 |         else:
108 |             raise KeyError("Unsupported loss: {}".format(self.loss))
109 | 
110 | 
111 | class ResNet50RNN(nn.Module):
112 |     def __init__(self, num_classes, loss={'xent'}, **kwargs):
113 |         super(ResNet50RNN, self).__init__()
114 |         self.loss = loss
115 |         resnet50 = torchvision.models.resnet50(pretrained=True)
116 |         self.base = nn.Sequential(*list(resnet50.children())[:-2])
117 |         self.hidden_dim = 512
118 |         self.feat_dim = 2048
119 |         self.classifier = nn.Linear(self.hidden_dim, num_classes)  # 输出层
120 |         self.lstm = nn.LSTM(
121 |             input_size=self.feat_dim,  # 每个小段视频的特征维度
122 |             hidden_size=self.hidden_dim,  # LSTM的隐藏单元
123 |             num_layers=1,  # 有几层lstm层
124 |             batch_first=True  # 输入和输出会是以batch_size为第一维度的特征集
125 |                             )
126 | 
127 |     def forward(self, x):
128 |         b = x.size(0)
129 |         t = x.size(1)
130 |         x = x.view(b*t, x.size(2), x.size(3), x.size(4))
131 |         x = self.base(x)  # x输入到基本网络中
132 |         x = F.avg_pool2d(x, x.size()[2:])  # 进行2d的平均池化
133 |         x = x.view(b, t, -1)  # 对x的数据重新进行分配,  输入x的形状为(batch,time_step,input_size)
134 |         # lstm有两个隐藏层状态,h_n是分线,h_c是主线
135 |         output, (h_n, h_c) = self.lstm(x)  # 输出为(batch,time_step,output_size)
136 |         output = output.permute(0, 2, 1)
137 |         f = F.avg_pool1d(output, t)  # 关于时间步的1d平均池化
138 |         f = f.view(b, self.hidden_dim)  # 对数据重新进行分配,(b,512)
139 |         if not self.training:
140 |             return f
141 |         y = self.classifier(f)
142 | 
143 |         if self.loss == {'xent'}:
144 |             return y
145 |         elif self.loss == {'xent', 'htri'}:
146 |             return y, f
147 |         elif self.loss == {'cent'}:
148 |             return y, f
149 |         else:
150 |             raise KeyError("Unsupported loss: {}".format(self.loss))
151 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from .ResNet import *  # 导入模块，每次使用模块中的函数，直接使用函数就可以了
 4 | # 是把ResNet模块中所有函数都导入进来; 注：相当于导入的是一个文件夹中所有文件，所有函数都是绝对路径。
 5 | __factory = {
 6 |     'resnet50tp': ResNet50TP,
 7 |     'resnet50ta': ResNet50TA,
 8 |     'resnet50rnn': ResNet50RNN,
 9 | }  # 字典
10 | 
11 | 
12 | def get_names():  # 获得相应模型的名称(key)
13 |     return __factory.keys()
14 | 
15 | 
16 | def init_model(name, *args, **kwargs):  # 初始化模型
17 |     if name not in __factory.keys():  # 如果模型名称不在模型字典中,报错,keyerror	映射中没有这个键
18 |         raise KeyError("Unknown model: {}".format(name))
19 |     return __factory[name](*args, **kwargs)  # 返回字典中相应key对应的value,这里是模型的函数名
20 | 


--------------------------------------------------------------------------------
/models/resnet3d.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | import math
  6 | from functools import partial
  7 | 
  8 | __all__ = [
  9 |     'ResNet', 'resnet10', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
 10 |     'resnet152', 'resnet200'
 11 | ]
 12 | 
 13 | 
 14 | def conv3x3x3(in_planes, out_planes, stride=1):
 15 |     # 3x3x3 convolution with padding 3*3*3卷积块    https://blog.csdn.net/weicao1990/article/details/80283443
 16 |     # 对d帧h*w的彩色RGB图像进行卷积,即假设输入的大小为,(d,w,h),通道数为c,卷积核(滤波器)为f,即滤波器的维度为f*f*f*c,卷积核的数目为1,
 17 |     # 输出  (d-f+1)*(w-f+1)*(h-f+1)*1
 18 |     return nn.Conv3d(
 19 |         in_planes,  # 输入平面,输入信号的通道
 20 |         out_planes,  # 输出平面,卷积产生的通道
 21 |         kernel_size=3,  # 卷积核大小为3
 22 |         stride=stride,  # 卷积步长为1
 23 |         padding=1,  # 填充,输入的每一条边补充0的层数   如果padding不是0，会在输入的每一边添加相应数目0
 24 |         bias=False  # 不添加偏置
 25 |     )
 26 | 
 27 | 
 28 | def downsample_basic_block(x, planes, stride):  # 下采样基本块,进行了一次平均池化
 29 |     out = F.avg_pool3d(x, kernel_size=1, stride=stride)  # 3d平均池化  输入大小:(N,C,D_in,H_in,W_in)
 30 |     # D_out = (D_in-1)/(stride+1)  H_out = (H_in-1)/(stride+1)  W_out = (W_in-1)/(stride+1)  输出:(N,C,D_out,H_out,W_out)
 31 |     zero_pads = torch.Tensor(
 32 |         out.size(0), planes - out.size(1), out.size(2), out.size(3),
 33 |         out.size(4)).zero_()  # 用0填充该tensor
 34 |     if isinstance(out.data, torch.cuda.FloatTensor):  # isinstance(object, classinfo)
 35 |         # 如果对象的类型与参数二的类型（classinfo）相同则返回 True，否则返回 False。
 36 |         zero_pads = zero_pads.cuda()  # 转换成GPU张量
 37 | 
 38 |     out = Variable(torch.cat([out.data, zero_pads], dim=1))  # 将out.data, zero_pads按列放在一起
 39 | 
 40 |     return out
 41 | 
 42 | 
 43 | class BasicBlock(nn.Module):  # resnet的基本块,都是3*3*3卷积
 44 |     expansion = 1
 45 | 
 46 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 47 |         super(BasicBlock, self).__init__()
 48 |         self.conv1 = conv3x3x3(inplanes, planes, stride)  # 第一次3*3*3卷积
 49 |         self.bn1 = nn.BatchNorm3d(planes)  # 对小批量(mini-batch)4d数据组成的5d输入进行批标准化(Batch Normalization)操作
 50 |         # (N, C, D, H, W)
 51 |         self.relu = nn.ReLU(inplace=True)  # 激励函数{ReLU}(x)= max(0, x)  inplace-选择是否进行覆盖运算
 52 | 
 53 |         self.conv2 = conv3x3x3(planes, planes)  # 第二次 3*3*3卷积
 54 |         self.bn2 = nn.BatchNorm3d(planes)
 55 |         self.downsample = downsample  # 跳跃链接
 56 |         self.stride = stride
 57 | 
 58 |     def forward(self, x):  # 前向传播,朴素残差模块(不带bottleneck)
 59 |         residual = x  # 残差
 60 | 
 61 |         out = self.conv1(x)
 62 |         out = self.bn1(out)
 63 |         out = self.relu(out)
 64 | 
 65 |         out = self.conv2(out)
 66 |         out = self.bn2(out)
 67 | 
 68 |         if self.downsample is not None:
 69 |             residual = self.downsample(x)
 70 | 
 71 |         out += residual  # H(x)=F(x)+x
 72 |         out = self.relu(out)
 73 | 
 74 |         return out
 75 | 
 76 | 
 77 | class Bottleneck(nn.Module):  # bottleneck残差模块
 78 |     expansion = 4
 79 | 
 80 |     def __init__(self, inplanes, planes, stride=1, downsample=None):  # planes = 64
 81 |         super(Bottleneck, self).__init__()
 82 |         self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=1, bias=False)
 83 |         self.bn1 = nn.BatchNorm3d(planes)
 84 | 
 85 |         self.conv2 = nn.Conv3d(
 86 |             planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 87 |         self.bn2 = nn.BatchNorm3d(planes)
 88 | 
 89 |         self.conv3 = nn.Conv3d(planes, planes * 4, kernel_size=1, bias=False)  # planes = 256
 90 |         self.bn3 = nn.BatchNorm3d(planes * 4)
 91 | 
 92 |         self.relu = nn.ReLU(inplace=True)
 93 |         self.downsample = downsample
 94 |         self.stride = stride
 95 | 
 96 |     def forward(self, x):
 97 |         residual = x
 98 | 
 99 |         out = self.conv1(x)
100 |         out = self.bn1(out)
101 |         out = self.relu(out)
102 | 
103 |         out = self.conv2(out)
104 |         out = self.bn2(out)
105 |         out = self.relu(out)
106 | 
107 |         out = self.conv3(out)
108 |         out = self.bn3(out)
109 | 
110 |         if self.downsample is not None:
111 |             residual = self.downsample(x)
112 | 
113 |         out += residual
114 |         out = self.relu(out)
115 | 
116 |         return out
117 | 
118 | 
119 | class ResNet(nn.Module):
120 | 
121 |     def __init__(self,
122 |                  block,
123 |                  layers,
124 |                  sample_height,
125 |                  sample_width,
126 |                  sample_duration,  # 样本的持续时间,图片的数目 d
127 |                  shortcut_type='B',
128 |                  num_classes=400):
129 |         self.inplanes = 64  # 输入为64个通道
130 |         super(ResNet, self).__init__()
131 |         self.conv1 = nn.Conv3d(  # part1.第一次卷积,
132 |             3,  # 输入的通道为3  (d,h,w)
133 |             64,  # 输出的通道为64
134 |             kernel_size=7,  # 卷积核(滤波器的尺寸)  7*7*7
135 |             stride=(1, 2, 2),  # 步长
136 |             padding=(3, 3, 3),  # padding的深度为3
137 |             bias=False)
138 |         self.bn1 = nn.BatchNorm3d(64)
139 |         self.relu = nn.ReLU(inplace=True)
140 |         # part2: 最大池化.滤波器为3*3*3, 步长为2, 填充的深度为1
141 |         self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
142 | 
143 |         self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type)  # 构建一个bottleneck残差模块,
144 |         # 输入通道数为64,  输出通道数变为256  这一层构建layers[0]=3个卷积块
145 | 
146 |         self.layer2 = self._make_layer(
147 |             block, 128, layers[1], shortcut_type, stride=2)
148 |         # 输入通道数为128,  输出通道数变为512  这一层构建layers[1]=4个卷积块
149 | 
150 |         self.layer3 = self._make_layer(
151 |             block, 256, layers[2], shortcut_type, stride=2)
152 |         # 输入通道数为256,  输出通道数变为1024  这一层构建layers[2]=6个卷积块
153 | 
154 |         self.layer4 = self._make_layer(
155 |             block, 512, layers[3], shortcut_type, stride=2)
156 |         # 输入通道数为512,  输出通道数变为2048  这一层构建layers[3]=3个卷积块
157 | 
158 |         last_duration = int(math.ceil(sample_duration / 16.0))  # 确定最后平均池化的滤波器大小
159 |         last_height = int(math.ceil(sample_height / 32.0))
160 |         last_width = int(math.ceil(sample_width / 32.0))
161 |         self.avgpool = nn.AvgPool3d(
162 |             (last_duration, last_height, last_width), stride=1)
163 | 
164 |         self.fc = nn.Linear(512 * block.expansion, num_classes)  # 全连接层,分类器
165 | 
166 |         for m in self.modules():  # 遍历网络中的所有模型
167 |             if isinstance(m, nn.Conv3d):  # 如果模型的类型与nn.Conv3d一样,其权重初始化为
168 |                 m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out')
169 |             elif isinstance(m, nn.BatchNorm3d):
170 |                 m.weight.data.fill_(1)
171 |                 m.bias.data.zero_()
172 | 
173 |     def _make_layer(self, block, planes, blocks, shortcut_type, stride=1):
174 |         # block = bottleneck 残差模块   planes = 输入的通道数
175 |         # blocks = 要构建几个残差块
176 |         downsample = None
177 |         if stride != 1 or self.inplanes != planes * block.expansion:  # 步长不等于1,或者通道数不等于当前的通道乘以4
178 |             if shortcut_type == 'A':  # 跳跃链接方式"A",进行平均池化
179 |                 downsample = partial(  # 偏函数Partial(第一个参数 = 一个函数,第二部分=可变参数,第三个参数)
180 |                     downsample_basic_block,  # 将所要承载的函数作为partial()函数的第一个参数，
181 |                     planes=planes * block.expansion,  # 原函数的各个参数依次作为partial()函数后续的参数，除非使用关键字参数.
182 |                     stride=stride)  # 这样得到一个新的函数,对原来的downsample_basic_block函数进行了扩展
183 |             else:  # 跳跃链接的方式"B",  进行一次卷积操作+批量归一化
184 |                 downsample = nn.Sequential(
185 |                     nn.Conv3d(  # 一次1*1*1  256个通道的卷积
186 |                         self.inplanes,
187 |                         planes * block.expansion,  # 256
188 |                         kernel_size=1,
189 |                         stride=stride,
190 |                         bias=False),
191 |                     nn.BatchNorm3d(planes * block.expansion)  # 批量归一化操作
192 |                 )
193 | 
194 |         layers = []  # 建立一个层的列表
195 | 
196 |         layers.append(block(self.inplanes, planes, stride, downsample))  # 将残差块内容放入layers列表中,
197 |         # 第一个残差块是带有降采样的跳跃链接
198 |         self.inplanes = planes * block.expansion  # 输入的通道数*4
199 |         for i in range(1, blocks):  # blocks = layers[]  从第二个开始遍历layers列表  根据[n]判断要建立几个基本残差块
200 |             layers.append(block(self.inplanes, planes))  # 从第二个残差块开始,跳跃链接没有降采样
201 | 
202 |         return nn.Sequential(*layers)  # 返回构建好的残差块
203 |     
204 |     def load_matched_state_dict(self, state_dict):  # 从预训练模型中加载匹配的状态字典,即模型参数
205 |  
206 |         own_state = self.state_dict()
207 |         for name, param in state_dict.items():
208 |             if name not in own_state:
209 |                 continue
210 |             # if isinstance(param, Parameter):
211 |                 # backwards compatibility for serialized parameters
212 |             param = param.data
213 |             print("loading "+name)
214 |             own_state[name].copy_(param)
215 | 
216 |     def forward(self, x):  # 整个Resnet的前向传播
217 |         # default size is (b, s, c, w, h), s for seq_len, c for channel
218 |         # convert for 3d cnn, (b, c, s, w, h)
219 |         x = x.permute(0,2,1,3,4)
220 |         x = self.conv1(x)
221 |         x = self.bn1(x)
222 |         x = self.relu(x)
223 |         x = self.maxpool(x)
224 | 
225 |         x = self.layer1(x)
226 |         x = self.layer2(x)
227 |         x = self.layer3(x)
228 |         x = self.layer4(x)
229 |         x = self.avgpool(x)
230 |         x = x.view(x.size(0), -1)
231 |         y = self.fc(x)
232 | 
233 |         return y, x
234 | 
235 | 
236 | def get_fine_tuning_parameters(model, ft_begin_index):
237 |     if ft_begin_index == 0:
238 |         return model.parameters()
239 | 
240 |     ft_module_names = []
241 |     for i in range(ft_begin_index, 5):
242 |         ft_module_names.append('layer{}'.format(i))
243 |     ft_module_names.append('fc')
244 | 
245 |     parameters = []
246 |     for k, v in model.named_parameters():
247 |         for ft_module in ft_module_names:
248 |             if ft_module in k:
249 |                 parameters.append({'params': v})
250 |                 break
251 |         else:
252 |             parameters.append({'params': v, 'lr': 0.0})
253 | 
254 |     return parameters
255 | 
256 | 
257 | def resnet10(**kwargs):
258 |     """Constructs a ResNet-18 model.
259 |     """
260 |     model = ResNet(BasicBlock, [1, 1, 1, 1], **kwargs)
261 |     return model
262 | 
263 | 
264 | def resnet18(**kwargs):
265 |     """Constructs a ResNet-18 model.
266 |     """
267 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
268 |     return model
269 | 
270 | 
271 | def resnet34(**kwargs):
272 |     """Constructs a ResNet-34 model.
273 |     """
274 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
275 |     return model
276 | 
277 | 
278 | def resnet50(**kwargs):
279 |     """Constructs a ResNet-50 model.
280 |     """
281 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
282 |     return model
283 | 
284 | 
285 | def resnet101(**kwargs):
286 |     """Constructs a ResNet-101 model.
287 |     """
288 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
289 |     return model
290 | 
291 | 
292 | def resnet152(**kwargs):
293 |     """Constructs a ResNet-101 model.
294 |     """
295 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
296 |     return model
297 | 
298 | 
299 | def resnet200(**kwargs):
300 |     """Constructs a ResNet-101 model.
301 |     """
302 |     model = ResNet(Bottleneck, [3, 24, 36, 3], **kwargs)
303 |     return model
304 | 
305 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # @author = yuci
  4 | # Date: 18-10-2 下午12:44
  5 | 
  6 | 
  7 | from __future__ import print_function, absolute_import
  8 | import os
  9 | import sys
 10 | import argparse
 11 | import os.path as osp
 12 | import numpy as np
 13 | 
 14 | import torch
 15 | import torch.nn as nn
 16 | import torch.backends.cudnn as cudnn
 17 | from torch.utils.data import DataLoader
 18 | from torch.autograd import Variable
 19 | 
 20 | 
 21 | import data_manager
 22 | from video_loader import VideoDataset
 23 | import transforms as T
 24 | import models
 25 | from models import resnet3d
 26 | from utils import Logger, visualize_ranked_results
 27 | from eval_metrics import evaluate
 28 | 
 29 | 
 30 | parser = argparse.ArgumentParser(description='Train video model with cross entropy loss')
 31 | # Datasets
 32 | parser.add_argument('-d', '--dataset', type=str, default='mars',
 33 |                     choices=data_manager.get_names())
 34 | parser.add_argument('-j', '--workers', default=4, type=int,
 35 |                     help="number of data loading workers (default: 4)")
 36 | parser.add_argument('--height', type=int, default=224,
 37 |                     help="height of an image (default: 224)")
 38 | parser.add_argument('--width', type=int, default=112,
 39 |                     help="width of an image (default: 112)")
 40 | parser.add_argument('--seq-len', type=int, default=4, help="number of images to sample in a tracklet")
 41 | # Optimization options
 42 | parser.add_argument('--max-epoch', default=800, type=int,
 43 |                     help="maximum epochs to run")
 44 | parser.add_argument('--start-epoch', default=0, type=int,
 45 |                     help="manual epoch number (useful on restarts)")
 46 | parser.add_argument('--train-batch', default=32, type=int,
 47 |                     help="train batch size")
 48 | parser.add_argument('--test-batch', default=1, type=int, help="has to be 1")
 49 | parser.add_argument('--lr', '--learning-rate', default=0.0003, type=float,
 50 |                     help="initial learning rate, use 0.0001 for rnn, use 0.0003 for pooling and attention")
 51 | parser.add_argument('--stepsize', default=200, type=int,
 52 |                     help="stepsize to decay learning rate (>0 means this is enabled)")
 53 | parser.add_argument('--gamma', default=0.1, type=float,
 54 |                     help="learning rate decay")
 55 | parser.add_argument('--weight-decay', default=5e-04, type=float,
 56 |                     help="weight decay (default: 5e-04)")
 57 | parser.add_argument('--margin', type=float, default=0.3, help="margin for triplet loss")
 58 | parser.add_argument('--num-instances', type=int, default=4,
 59 |                     help="number of instances per identity")
 60 | parser.add_argument('--htri-only', action='store_true', default=False,
 61 |                     help="if this is True, only htri loss is used in training")
 62 | # Architecture
 63 | parser.add_argument('-a', '--arch', type=str, default='resnet50tp',
 64 |                     help="resnet503d, resnet50tp, resnet50ta, resnetrnn")
 65 | parser.add_argument('--pool', type=str, default='avg', choices=['avg', 'max'])
 66 | 
 67 | # Miscs
 68 | parser.add_argument('--print-freq', type=int, default=78, help="print frequency")
 69 | parser.add_argument('--seed', type=int, default=1, help="manual seed")
 70 | parser.add_argument('--pretrained-model', type=str, default='/home/ying/Desktop/video/resnet-50-kinetics.pth',
 71 |                     help='need to be set for resnet3d models')
 72 | parser.add_argument('--best-model', type=str, default='/home/ying/Desktop/video/log/best_model.pth.tar',
 73 |                     help='need to be set for evaluate the model')
 74 | parser.add_argument('--evaluate', action='store_true', help="evaluation only")
 75 | parser.add_argument('--eval-step', type=int, default=50,
 76 |                     help="run evaluation for every N epochs (set to -1 to test after training)")
 77 | parser.add_argument('--save-dir', type=str, default='log')
 78 | parser.add_argument('--use-cpu', action='store_true', help="use cpu")
 79 | parser.add_argument('--gpu-devices', default='0', type=str, help='gpu device ids for CUDA_VISIBLE_DEVICES')
 80 | parser.add_argument('--vis-ranked-res', action='store_true',
 81 |                     help="visualize ranked results, only available in evaluation mode (default: False)")
 82 | 
 83 | args = parser.parse_args()
 84 | 
 85 | 
 86 | def main():
 87 |     torch.manual_seed(args.seed)
 88 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
 89 |     use_gpu = torch.cuda.is_available()
 90 |     if args.use_cpu:
 91 |         use_gpu = False
 92 | 
 93 |     sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
 94 |     print("==========\nArgs:{}\n==========".format(args))
 95 | 
 96 |     if use_gpu:
 97 |         print("Currently using GPU {}".format(args.gpu_devices))
 98 |         cudnn.benchmark = True
 99 |         torch.cuda.manual_seed_all(args.seed)
100 |     else:
101 |         print("Currently using CPU (GPU is highly recommended)")
102 | 
103 |     print("Initializing dataset {}".format(args.dataset))
104 |     dataset = data_manager.init_dataset(name=args.dataset)
105 | 
106 |     transform_test = T.Compose([
107 |         T.Resize((args.height, args.width)),
108 |         T.ToTensor(),
109 |         T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
110 |     ])
111 | 
112 |     pin_memory = True if use_gpu else False
113 | 
114 |     queryloader = DataLoader(
115 |         VideoDataset(dataset.query, seq_len=args.seq_len, sample='dense', transform=transform_test),
116 |         batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
117 |         pin_memory=pin_memory, drop_last=False,
118 |     )
119 | 
120 |     galleryloader = DataLoader(
121 |         VideoDataset(dataset.gallery, seq_len=args.seq_len, sample='dense', transform=transform_test),
122 |         batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
123 |         pin_memory=pin_memory, drop_last=False,
124 |     )
125 |     if args.arch == 'resnet503d':
126 |         cudnn.benchmark = False
127 | 
128 |     print("Initializing model: {}".format(args.arch))
129 |     if args.arch == 'resnet503d':
130 |         model = resnet3d.resnet50(num_classes=dataset.num_train_pids, sample_width=args.width,
131 |                                   sample_height=args.height, sample_duration=args.seq_len)
132 |         if not os.path.exists(args.best_model):
133 |             raise IOError("Can't find best model: {}".format(args.best_model))
134 |         print("Loading checkpoint from '{}'".format(args.best_model))
135 |         checkpoint = torch.load(args.best_model)
136 |         state_dict = {}
137 |         for key in checkpoint['state_dict']:
138 |             state_dict[key] = checkpoint['state_dict'][key]
139 |         model.load_state_dict(state_dict, strict=False)
140 |     else:
141 |         model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'})
142 |         if not os.path.exists(args.best_model):
143 |             raise IOError("Can't find best model: {}".format(args.best_model))
144 |         print("Loading checkpoint from '{}'".format(args.best_model))
145 |         checkpoint = torch.load(args.best_model)
146 |         state_dict = {}
147 |         for key in checkpoint['state_dict']:
148 |             state_dict[key] = checkpoint['state_dict'][key]
149 |         model.load_state_dict(state_dict, strict=False)
150 |     print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters()) / 1000000.0))
151 | 
152 |     if use_gpu:
153 |         model = nn.DataParallel(model).cuda()
154 | 
155 |     if args.evaluate:
156 |         print("Evaluate only")
157 |         test(model, queryloader, galleryloader, args.pool, use_gpu)
158 |         # distmat =  test(model, queryloader, galleryloader, args.pool, use_gpu)  # rnn时不能这么做,否则out of memory
159 |         # if args.vis_ranked_res:
160 |         #     visualize_ranked_results(
161 |         #         distmat, dataset,
162 |         #         save_dir=osp.join(args.save_dir, 'ranked_results'),
163 |         #         topk=20,
164 |         #     )
165 | 
166 |         return
167 | 
168 | 
169 | # def test(model, queryloader, galleryloader, pool, use_gpu, ranks=[1, 5, 10, 20], return_distmat=False):
170 | def test(model, queryloader, galleryloader, pool, use_gpu, ranks=[1, 5, 10, 20]):
171 |     model.eval()
172 | 
173 |     qf, q_pids, q_camids = [], [], []
174 |     for batch_idx, (imgs, pids, camids) in enumerate(queryloader):
175 |         if use_gpu:
176 |             imgs = imgs.cuda()
177 |         imgs = Variable(imgs, volatile=True)
178 |         # b=1, n=number of clips, s=16
179 |         b, n, s, c, h, w = imgs.size()
180 |         assert (b == 1)
181 |         imgs = imgs.view(b * n, s, c, h, w)
182 |         features = model(imgs)
183 |         features = features.view(n, -1)
184 |         features = torch.mean(features, 0)
185 |         features = features.data.cpu()
186 |         qf.append(features)
187 |         q_pids.extend(pids)
188 |         q_camids.extend(camids)
189 |     qf = torch.stack(qf)
190 |     q_pids = np.asarray(q_pids)
191 |     q_camids = np.asarray(q_camids)
192 | 
193 |     print("Extracted features for query set, obtained {}-by-{} matrix".format(qf.size(0), qf.size(1)))
194 | 
195 |     gf, g_pids, g_camids = [], [], []
196 |     for batch_idx, (imgs, pids, camids) in enumerate(galleryloader):
197 |         if use_gpu:
198 |             imgs = imgs.cuda()
199 |         imgs = Variable(imgs, volatile=True)
200 |         b, n, s, c, h, w = imgs.size()
201 |         imgs = imgs.view(b * n, s, c, h, w)
202 |         assert (b == 1)
203 |         features = model(imgs)
204 |         features = features.view(n, -1)
205 |         if pool == 'avg':
206 |             features = torch.mean(features, 0)
207 |         else:
208 |             features, _ = torch.max(features, 0)
209 |         features = features.data.cpu()
210 |         gf.append(features)
211 |         g_pids.extend(pids)
212 |         g_camids.extend(camids)
213 |     gf = torch.stack(gf)
214 |     g_pids = np.asarray(g_pids)
215 |     g_camids = np.asarray(g_camids)
216 | 
217 |     print("Extracted features for gallery set, obtained {}-by-{} matrix".format(gf.size(0), gf.size(1)))
218 |     print("Computing distance matrix")
219 | 
220 |     m, n = qf.size(0), gf.size(0)
221 |     distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \
222 |               torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t()
223 |     distmat.addmm_(1, -2, qf, gf.t())
224 |     distmat = distmat.numpy()
225 | 
226 |     print("Computing CMC and mAP")
227 |     cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids)
228 | 
229 |     print("Results ----------")
230 |     print("mAP: {:.1%}".format(mAP))
231 |     print("CMC curve")
232 |     for r in ranks:
233 |         print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1]))
234 |     print("------------------")
235 |     # if return_distmat:
236 |     #     return distmat
237 |     return cmc[0]
238 | 
239 | 
240 | if __name__ == '__main__':
241 |     main()
242 | 


--------------------------------------------------------------------------------