├── .idea └── vcs.xml ├── CocoFolder.py ├── Mytransforms.py ├── README.md ├── coco_loader.py ├── evaluation ├── eval_caffe.py ├── eval_mechanism.py ├── eval_pytorch.py ├── ski.jpg ├── test.sh ├── test_pose.py └── test_util.py ├── experiments ├── baseline │ ├── config.yml │ └── train_pose.py └── fpn │ ├── config.yml │ └── train_pose.py ├── logger.py ├── model ├── __init__.py ├── fpn.py ├── resnet50_conv3.py └── vgg_1branch.py ├── pose_estimation.py ├── preprocessing ├── convert_model.py └── generate_json_mask.py ├── train ├── config.yml ├── log │ └── train-2018-02-19-06-37-30.log ├── retrain.sh ├── train_model.sh └── train_pose.py ├── utils.py ├── vis_scripts.py └── vis_util.py /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /CocoFolder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data as data 3 | import numpy as np 4 | import shutil 5 | import time 6 | import random 7 | import os 8 | import math 9 | import json 10 | from PIL import Image 11 | import cv2 12 | import Mytransforms 13 | 14 | def read_data_file(file_dir): 15 | 16 | lists = [] 17 | with open(file_dir, 'r') as fp: 18 | line = fp.readline() 19 | while line: 20 | path = line.strip() 21 | lists.append(path) 22 | line = fp.readline() 23 | 24 | return lists 25 | 26 | def read_json_file(file_dir): 27 | """ 28 | filename: JSON file 29 | 30 | return: two list: key_points list and centers list 31 | """ 32 | fp = open(file_dir) 33 | data = json.load(fp) 34 | kpts = [] 35 | centers = [] 36 | scales = [] 37 | 38 | for info in data: 39 | kpt = [] 40 | center = [] 41 | scale = [] 42 | lists = info['info'] 43 | for x in lists: 44 | kpt.append(x['keypoints']) 45 | center.append(x['pos']) 46 | scale.append(x['scale']) 47 | kpts.append(kpt) 48 | centers.append(center) 49 | scales.append(scale) 50 | fp.close() 51 | 52 | return kpts, centers, scales 53 | 54 | def generate_heatmap(heatmap, kpt, stride, sigma): 55 | 56 | height, width, num_point = heatmap.shape 57 | start = stride / 2.0 - 0.5 58 | 59 | num = len(kpt) 60 | length = len(kpt[0]) 61 | for i in range(num): 62 | for j in range(length): 63 | if kpt[i][j][2] > 1: # not labeled 64 | continue 65 | x = kpt[i][j][0] 66 | y = kpt[i][j][1] 67 | for h in range(height): 68 | for w in range(width): 69 | xx = start + w * stride 70 | yy = start + h * stride 71 | dis = ((xx - x) * (xx - x) + (yy - y) * (yy - y)) / 2.0 / sigma / sigma 72 | if dis > 4.6052: 73 | continue 74 | heatmap[h][w][j + 1] += math.exp(-dis) 75 | if heatmap[h][w][j + 1] > 1: 76 | heatmap[h][w][j + 1] = 1 77 | 78 | return heatmap 79 | 80 | def generate_vector(vector, cnt, kpts, vec_pair, stride, theta): 81 | 82 | height, width, channel = cnt.shape 83 | length = len(kpts) 84 | 85 | for j in range(length): 86 | for i in range(channel): 87 | a = vec_pair[0][i] 88 | b = vec_pair[1][i] 89 | if kpts[j][a][2] > 1 or kpts[j][b][2] > 1: 90 | continue 91 | ax = kpts[j][a][0] * 1.0 / stride 92 | ay = kpts[j][a][1] * 1.0 / stride 93 | bx = kpts[j][b][0] * 1.0 / stride 94 | by = kpts[j][b][1] * 1.0 / stride 95 | 96 | bax = bx - ax 97 | bay = by - ay 98 | norm_ba = math.sqrt(1.0 * bax * bax + bay * bay) + 1e-9 # to aviod two points have same position. 99 | bax /= norm_ba 100 | bay /= norm_ba 101 | 102 | min_w = max(int(round(min(ax, bx) - theta)), 0) 103 | max_w = min(int(round(max(ax, bx) + theta)), width) 104 | min_h = max(int(round(min(ay, by) - theta)), 0) 105 | max_h = min(int(round(max(ay, by) + theta)), height) 106 | 107 | for h in range(min_h, max_h): 108 | for w in range(min_w, max_w): 109 | px = w - ax 110 | py = h - ay 111 | 112 | dis = abs(bay * px - bax * py) 113 | if dis <= theta: 114 | vector[h][w][2 * i] = (vector[h][w][2 * i] * cnt[h][w][i] + bax) / (cnt[h][w][i] + 1) 115 | vector[h][w][2 * i + 1] = (vector[h][w][2 * i + 1] * cnt[h][w][i] + bay) / (cnt[h][w][i] + 1) 116 | cnt[h][w][i] += 1 117 | 118 | return vector 119 | 120 | class CocoFolder(data.Dataset): 121 | 122 | def __init__(self, file_dir, stride, transformer=None): 123 | 124 | self.img_list = read_data_file(file_dir[0]) 125 | self.mask_list = read_data_file(file_dir[1]) 126 | self.kpt_list, self.center_list, self.scale_list = read_json_file(file_dir[2]) 127 | self.stride = stride 128 | self.transformer = transformer 129 | self.vec_pair = [[2,3,5,6,8,9, 11,12,0,1,1, 1,1,2, 5, 0, 0, 14,15], 130 | [3,4,6,7,9,10,12,13,1,8,11,2,5,16,17,14,15,16,17]] # different from openpose 131 | self.theta = 1.0 132 | self.sigma = 7.0 133 | 134 | def __getitem__(self, index): 135 | 136 | img_path = '/home/xiangyu/data/coco/images/val2014/'+ self.img_list[index] 137 | 138 | img = np.array(cv2.imread(img_path), dtype=np.float32) 139 | mask_path = self.mask_list[index] 140 | mask = np.load(mask_path) 141 | mask = np.array(mask, dtype=np.float32) 142 | 143 | kpt = self.kpt_list[index] 144 | center = self.center_list[index] 145 | scale = self.scale_list[index] 146 | 147 | img, mask, kpt, center = self.transformer(img, mask, kpt, center, scale) 148 | 149 | height, width, _ = img.shape 150 | 151 | mask = cv2.resize(mask, (width / self.stride, height / self.stride)).reshape((height / self.stride, width / self.stride, 1)) 152 | 153 | heatmap = np.zeros((height / self.stride, width / self.stride, len(kpt[0]) + 1), dtype=np.float32) 154 | heatmap = generate_heatmap(heatmap, kpt, self.stride, self.sigma) 155 | heatmap[:,:,0] = 1.0 - np.max(heatmap[:,:,1:], axis=2) # for background 156 | heatmap = heatmap * mask 157 | 158 | vecmap = np.zeros((height / self.stride, width / self.stride, len(self.vec_pair[0]) * 2), dtype=np.float32) 159 | cnt = np.zeros((height / self.stride, width / self.stride, len(self.vec_pair[0])), dtype=np.int32) 160 | 161 | vecmap = generate_vector(vecmap, cnt, kpt, self.vec_pair, self.stride, self.theta) 162 | vecmap = vecmap * mask 163 | 164 | img = Mytransforms.normalize(Mytransforms.to_tensor(img), [128.0, 128.0, 128.0], [256.0, 256.0, 256.0]) # mean, std 165 | mask = Mytransforms.to_tensor(mask) 166 | heatmap = Mytransforms.to_tensor(heatmap) 167 | vecmap = Mytransforms.to_tensor(vecmap) 168 | 169 | # kpts to tensor 170 | #kpt = np.array(kpt) 171 | #kpt = torch.from_numpy(kpt) 172 | 173 | return img, heatmap, vecmap, mask, kpt 174 | 175 | def __len__(self): 176 | 177 | return len(self.img_list) 178 | -------------------------------------------------------------------------------- /Mytransforms.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import torch 3 | import math 4 | import random 5 | import numpy as np 6 | import numbers 7 | import types 8 | import collections 9 | import warnings 10 | import cv2 11 | 12 | def normalize(tensor, mean, std): 13 | """Normalize a ``torch.tensor`` 14 | 15 | Args: 16 | tensor (torch.tensor): tensor to be normalized. 17 | mean: (list): the mean of BGR 18 | std: (list): the std of BGR 19 | 20 | Returns: 21 | Tensor: Normalized tensor. 22 | """ 23 | 24 | for t, m, s in zip(tensor, mean, std): 25 | t.sub_(m).div_(s) 26 | return tensor 27 | 28 | def to_tensor(pic): 29 | """Convert a ``numpy.ndarray`` to tensor. 30 | 31 | See ``ToTensor`` for more details. 32 | 33 | Args: 34 | pic (numpy.ndarray): Image to be converted to tensor. 35 | 36 | Returns: 37 | Tensor: Converted image. 38 | """ 39 | 40 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 41 | 42 | return img.float() 43 | 44 | def resize(img, mask, kpt, center, ratio): 45 | """Resize the ``numpy.ndarray`` and points as ratio. 46 | 47 | Args: 48 | img (numpy.ndarray): Image to be resized. 49 | mask (numpy.ndarray): Mask to be resized. 50 | kpt (list): Keypoints to be resized. 51 | center (list): Center points to be resized. 52 | ratio (tuple or number): the ratio to resize. 53 | 54 | Returns: 55 | numpy.ndarray: Resized image. 56 | numpy.ndarray: Resized mask. 57 | lists: Resized keypoints. 58 | lists: Resized center points. 59 | """ 60 | 61 | if not (isinstance(ratio, numbers.Number) or (isinstance(ratio, collections.Iterable) and len(ratio) == 2)): 62 | raise TypeError('Got inappropriate ratio arg: {}'.format(ratio)) 63 | 64 | h, w, _ = img.shape 65 | if w < 64: 66 | img = cv2.copyMakeBorder(img, 0, 0, 0, 64 - w, cv2.BORDER_CONSTANT, value=(128, 128, 128)) 67 | mask = cv2.copyMakeBorder(mask, 0, 0, 0, 64 - w, cv2.BORDER_CONSTANT, value=(1, 1, 1)) 68 | w = 64 69 | 70 | if isinstance(ratio, numbers.Number): 71 | 72 | num = len(kpt) 73 | length = len(kpt[0]) 74 | for i in range(num): 75 | for j in range(length): 76 | kpt[i][j][0] *= ratio 77 | kpt[i][j][1] *= ratio 78 | center[i][0] *= ratio 79 | center[i][1] *= ratio 80 | 81 | return cv2.resize(img, (0, 0), fx=ratio, fy=ratio), cv2.resize(mask, (0, 0), fx=ratio, fy=ratio), kpt, center 82 | 83 | else: 84 | num = len(kpt) 85 | length = len(kpt[0]) 86 | for i in range(num): 87 | for j in range(length): 88 | kpt[i][j][0] *= ratio[0] 89 | kpt[i][j][1] *= ratio[1] 90 | center[i][0] *= ratio[0] 91 | center[i][1] *= ratio[1] 92 | return np.ascontiguousarray(cv2.resize(img, (0, 0), fx=ratio[0], fy=ratio[1])), np.ascontiguousarray(cv2.resize(mask, (0, 0), fx=ratio[0], fy=ratio[1])), kpt, center 93 | 94 | class RandomResized(object): 95 | """Resize the given numpy.ndarray to random size and aspect ratio. 96 | 97 | Args: 98 | scale_min: the min scale to resize. 99 | scale_max: the max scale to resize. 100 | """ 101 | 102 | def __init__(self, scale_min=0.5, scale_max=1.1): 103 | self.scale_min = scale_min 104 | self.scale_max = scale_max 105 | 106 | @staticmethod 107 | def get_params(img, scale_min, scale_max, scale): 108 | 109 | height, width, _ = img.shape 110 | 111 | ratio = random.uniform(scale_min, scale_max) 112 | ratio = ratio * 0.6 / scale 113 | 114 | return ratio 115 | 116 | def __call__(self, img, mask, kpt, center, scale): 117 | """ 118 | Args: 119 | img (numpy.ndarray): Image to be resized. 120 | mask (numpy.ndarray): Mask to be resized. 121 | kpt (list): keypoints to be resized. 122 | center: (list): center points to be resized. 123 | 124 | Returns: 125 | numpy.ndarray: Randomly resize image. 126 | numpy.ndarray: Randomly resize mask. 127 | list: Randomly resize keypoints. 128 | list: Randomly resize center points. 129 | """ 130 | ratio = self.get_params(img, self.scale_min, self.scale_max, scale[0]) 131 | 132 | return resize(img, mask, kpt, center, ratio) 133 | 134 | class TestResized(object): 135 | """Resize the given numpy.ndarray to the size for test. 136 | 137 | Args: 138 | size: the size to resize. 139 | """ 140 | 141 | def __init__(self, size): 142 | assert (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)) 143 | if isinstance(size, int): 144 | self.size = (size, size) 145 | else: 146 | self.size = size 147 | 148 | @staticmethod 149 | def get_params(img, output_size): 150 | 151 | height, width, _ = img.shape 152 | 153 | return (output_size[0] * 1.0 / width, output_size[1] * 1.0 / height) 154 | 155 | def __call__(self, img, mask, kpt, center): 156 | """ 157 | Args: 158 | img (numpy.ndarray): Image to be resized. 159 | mask (numpy.ndarray): Mask to be resized. 160 | kpt (list): keypoints to be resized. 161 | center: (list): center points to be resized. 162 | 163 | Returns: 164 | numpy.ndarray: Randomly resize image. 165 | numpy.ndarray: Randomly resize mask. 166 | list: Randomly resize keypoints. 167 | list: Randomly resize center points. 168 | """ 169 | ratio = self.get_params(img, self.size) 170 | 171 | return resize(img, mask, kpt, center, ratio) 172 | 173 | def rotate(img, mask, kpt, center, degree): 174 | """Rotate the ``numpy.ndarray`` and points as degree. 175 | 176 | Args: 177 | img (numpy.ndarray): Image to be rotated. 178 | mask (numpy.ndarray): Mask to be rotated. 179 | kpt (list): Keypoints to be rotated. 180 | center (list): Center points to be rotated. 181 | degree (number): the degree to rotate. 182 | 183 | Returns: 184 | numpy.ndarray: Resized image. 185 | numpy.ndarray: Resized mask. 186 | list: Resized keypoints. 187 | list: Resized center points. 188 | """ 189 | 190 | height, width, _ = img.shape 191 | 192 | img_center = (width / 2.0 , height / 2.0) 193 | 194 | rotateMat = cv2.getRotationMatrix2D(img_center, degree, 1.0) 195 | cos_val = np.abs(rotateMat[0, 0]) 196 | sin_val = np.abs(rotateMat[0, 1]) 197 | new_width = int(height * sin_val + width * cos_val) 198 | new_height = int(height * cos_val + width * sin_val) 199 | rotateMat[0, 2] += (new_width / 2.) - img_center[0] 200 | rotateMat[1, 2] += (new_height / 2.) - img_center[1] 201 | 202 | img = cv2.warpAffine(img, rotateMat, (new_width, new_height), borderValue=(128, 128, 128)) 203 | mask = cv2.warpAffine(mask, rotateMat, (new_width, new_height), borderValue=(1, 1, 1)) 204 | 205 | num = len(kpt) 206 | length = len(kpt[0]) 207 | for i in range(num): 208 | for j in range(length): 209 | x = kpt[i][j][0] 210 | y = kpt[i][j][1] 211 | p = np.array([x, y, 1]) 212 | p = rotateMat.dot(p) 213 | kpt[i][j][0] = p[0] 214 | kpt[i][j][1] = p[1] 215 | 216 | x = center[i][0] 217 | y = center[i][1] 218 | p = np.array([x, y, 1]) 219 | p = rotateMat.dot(p) 220 | center[i][0] = p[0] 221 | center[i][1] = p[1] 222 | 223 | return np.ascontiguousarray(img), np.ascontiguousarray(mask), kpt, center 224 | 225 | class RandomRotate(object): 226 | """Rotate the input numpy.ndarray and points to the given degree. 227 | 228 | Args: 229 | degree (number): Desired rotate degree. 230 | """ 231 | 232 | def __init__(self, max_degree): 233 | assert isinstance(max_degree, numbers.Number) 234 | self.max_degree = max_degree 235 | 236 | @staticmethod 237 | def get_params(max_degree): 238 | """Get parameters for ``rotate`` for a random rotate. 239 | 240 | Returns: 241 | number: degree to be passed to ``rotate`` for random rotate. 242 | """ 243 | degree = random.uniform(-max_degree, max_degree) 244 | 245 | return degree 246 | 247 | def __call__(self, img, mask, kpt, center): 248 | """ 249 | Args: 250 | img (numpy.ndarray): Image to be rotated. 251 | mask (numpy.ndarray): Mask to be rotated. 252 | kpt (list): Keypoints to be rotated. 253 | center (list): Center points to be rotated. 254 | 255 | Returns: 256 | numpy.ndarray: Rotated image. 257 | list: Rotated key points. 258 | """ 259 | degree = self.get_params(self.max_degree) 260 | 261 | return rotate(img, mask, kpt, center, degree) 262 | 263 | def crop(img, mask, kpt, center, offset_left, offset_up, w, h): 264 | 265 | num = len(kpt) 266 | length = len(kpt[0]) 267 | 268 | for x in range(num): 269 | for y in range(length): 270 | kpt[x][y][0] -= offset_left 271 | kpt[x][y][1] -= offset_up 272 | center[x][0] -= offset_left 273 | center[x][1] -= offset_up 274 | 275 | height, width, _ = img.shape 276 | mask = mask.reshape((height, width)) 277 | 278 | new_img = np.empty((h, w, 3), dtype=np.float32) 279 | new_img.fill(128) 280 | 281 | new_mask = np.empty((h, w), dtype=np.float32) 282 | new_mask.fill(1) 283 | 284 | st_x = 0 285 | ed_x = w 286 | st_y = 0 287 | ed_y = h 288 | or_st_x = offset_left 289 | or_ed_x = offset_left + w 290 | or_st_y = offset_up 291 | or_ed_y = offset_up + h 292 | 293 | if offset_left < 0: 294 | st_x = -offset_left 295 | or_st_x = 0 296 | if offset_left + w > width: 297 | ed_x = width - offset_left 298 | or_ed_x = width 299 | if offset_up < 0: 300 | st_y = -offset_up 301 | or_st_y = 0 302 | if offset_up + h > height: 303 | ed_y = height - offset_up 304 | or_ed_y = height 305 | 306 | '''print new_img.shape 307 | print st_y, ed_y, st_x, ed_x 308 | print img.shape 309 | print or_st_y, or_ed_y, or_st_x, or_ed_x 310 | 311 | st_y, ed_y, st_x, ed_x = min(st_y, ed_y), max(st_y, ed_y), min(st_x, ed_x), max(st_x, ed_x) 312 | or_st_y, or_ed_y, or_st_x, or_ed_x = min(or_st_y, or_ed_y), max(or_st_y, or_ed_y), min(or_st_x, or_ed_x), max(or_st_x, or_ed_x)''' 313 | 314 | new_img[st_y: ed_y, st_x: ed_x, :] = img[or_st_y: or_ed_y, or_st_x: or_ed_x, :].copy() 315 | new_mask[st_y: ed_y, st_x: ed_x] = mask[or_st_y: or_ed_y, or_st_x: or_ed_x].copy() 316 | 317 | return np.ascontiguousarray(new_img), np.ascontiguousarray(new_mask), kpt, center 318 | 319 | class RandomCrop(object): 320 | """Crop the given numpy.ndarray and at a random location. 321 | 322 | Args: 323 | size (int): Desired output size of the crop. 324 | """ 325 | 326 | def __init__(self, size, center_perturb_max=40): 327 | assert isinstance(size, numbers.Number) 328 | self.size = (int(size), int(size)) # (w, h) 329 | self.center_perturb_max = center_perturb_max 330 | 331 | @staticmethod 332 | def get_params(img, center, output_size, center_perturb_max): 333 | """Get parameters for ``crop`` for a random crop. 334 | 335 | Args: 336 | img (numpy.ndarray): Image to be cropped. 337 | center (list): the center of main person. 338 | output_size (tuple): Expected output size of the crop. 339 | center_perturb_max (int): the max perturb size. 340 | 341 | Returns: 342 | tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. 343 | """ 344 | ratio_x = random.uniform(0, 1) 345 | ratio_y = random.uniform(0, 1) 346 | x_offset = int((ratio_x - 0.5) * 2 * center_perturb_max) 347 | y_offset = int((ratio_y - 0.5) * 2 * center_perturb_max) 348 | center_x = center[0][0] + x_offset 349 | center_y = center[0][1] + y_offset 350 | 351 | return int(round(center_x - output_size[0] / 2)), int(round(center_y - output_size[1] / 2)) 352 | 353 | def __call__(self, img, mask, kpt, center): 354 | """ 355 | Args: 356 | img (numpy.ndarray): Image to be cropped. 357 | mask (numpy.ndarray): Mask to be cropped. 358 | kpt (list): keypoints to be cropped. 359 | center (list): center points to be cropped. 360 | 361 | Returns: 362 | numpy.ndarray: Cropped image. 363 | numpy.ndarray: Cropped mask. 364 | list: Cropped keypoints. 365 | list: Cropped center points. 366 | """ 367 | 368 | offset_left, offset_up = self.get_params(img, center, self.size, self.center_perturb_max) 369 | 370 | return crop(img, mask, kpt, center, offset_left, offset_up, self.size[0], self.size[1]) 371 | 372 | def hflip(img, mask, kpt, center): 373 | 374 | height, width, _ = img.shape 375 | mask = mask.reshape((height, width, 1)) 376 | 377 | img = img[:, ::-1, :] 378 | mask = mask[:, ::-1, :] 379 | 380 | num = len(kpt) 381 | length = len(kpt[0]) 382 | for i in range(num): 383 | for j in range(length): 384 | if kpt[i][j][2] <= 1: 385 | kpt[i][j][0] = width - 1 - kpt[i][j][0] 386 | center[i][0] = width - 1 - center[i][0] 387 | 388 | swap_pair = [[3, 6], [4, 7], [5, 8], [9, 12], [10, 13], [11, 14], [15, 16], [17, 18]] 389 | for x in swap_pair: 390 | for i in range(num): 391 | temp_point = kpt[i][x[0] - 1]#.copy() 392 | kpt[i][x[0] - 1] = kpt[i][x[1] - 1]#.copy() 393 | kpt[i][x[1] - 1] = temp_point#.copy() 394 | 395 | return np.ascontiguousarray(img), np.ascontiguousarray(mask), kpt, center 396 | 397 | class RandomHorizontalFlip(object): 398 | """Random horizontal flip the image. 399 | 400 | Args: 401 | prob (number): the probability to flip. 402 | """ 403 | 404 | def __init__(self, prob=0.5): 405 | self.prob = prob 406 | 407 | def __call__(self, img, mask, kpt, center): 408 | """ 409 | Args: 410 | img (numpy.ndarray): Image to be flipped. 411 | mask (numpy.ndarray): Mask to be flipped. 412 | kpt (list): Keypoints to be flipped. 413 | center (list): Center points to be flipped. 414 | 415 | Returns: 416 | numpy.ndarray: Randomly flipped image. 417 | list: Randomly flipped points. 418 | """ 419 | if random.random() < self.prob: 420 | return hflip(img, mask, kpt, center) 421 | return img, mask, kpt, center 422 | 423 | class Compose(object): 424 | """Composes several transforms together. 425 | 426 | Args: 427 | transforms (list of ``Transform`` objects): list of transforms to compose. 428 | 429 | Example: 430 | >>> Mytransforms.Compose([ 431 | >>> Mytransforms.CenterCrop(10), 432 | >>> Mytransforms.ToTensor(), 433 | >>> ]) 434 | """ 435 | 436 | def __init__(self, transforms): 437 | self.transforms = transforms 438 | 439 | def __call__(self, img, mask, kpt, center, scale=None): 440 | 441 | for t in self.transforms: 442 | if isinstance(t, RandomResized): 443 | img, mask, kpt, center = t(img, mask, kpt, center, scale) 444 | else: 445 | img, mask, kpt, center = t(img, mask, kpt, center) 446 | 447 | return img, mask, kpt, center 448 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Realtime_Multi-Person_Pose_Estimation.PyTorch 2 | Pytorch implementation of Realtime_Multi-Person_Pose_Estimation 3 | 4 | Original version folked from https://github.com/last-one/Pytorch_Realtime_Multi-Person_Pose_Estimation 5 | 6 | 7 | Train 8 | 1. prepare training data 9 | - Dowload COCO_train2014 val2014 from official website 10 | - cd ./preprocessing 11 | - configure generate_json_mask.py (ann_dir .... ) 12 | - run 13 | 14 | 2. start training 15 | - cd ./experiments/baseline/ 16 | - configure coco_loader (line 198 img_path) 17 | - configure train_pose.py (--train_dir ) 18 | - run 19 | 20 | 21 | 22 | Test and eval 23 | 1. test single image 24 | - ./evaluation/test_pose.py 25 | 2. evaluate caffemodel dowload from authur 26 | - ./evaluation/eval_caffe.py 53.8% (50 images) 27 | 3. evaluate pytorch model converted from caffemodel 28 | - ./preprocessing/convert_model.py 29 | - ./evaluation/eval_pytorch.py 54.4% (50 images) 54.1% (1000 images) 30 | 4. evaluate pytorch model trained by yourself 31 | - ./evaluation/eval_pytorch.py 32 | 33 | 34 | results 35 | 1. caffemodel evaluated by python scripts 36 | - 53.8% (50 images) 37 | 2. pytorch model converted from caffe by python scripts 38 | - 54.4% (50 images) 54.1% (1000 images) 39 | 3. pytorch model trained on train2014 40 | - 45.9% (50 images) 60000 iters (stepsize = 50000) 41 | 42 | 43 | 44 | experiments 45 | <1> mechanism (eval_mechanism) 46 | 1. heatmap, vecmap generated by GT -> post-processing = 60.8% 47 | 2. add redundant connection 60.8%->67.7% 48 | 3. sigma (7-9) 67.7%->68.8% 49 | 4. replace all_peaks with keypoints gt 68.8->78% (influence single or multi?) 50 | 51 | <2> VGG_1branch (./experiments/1branch) 52 | 1. merge L1 branch and L2 branch 53 | 2. trainset: valminusminival2014 testset: minival2014 54 | 3. 42.9% 55 | -------------------------------------------------------------------------------- /coco_loader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data as data 3 | import numpy as np 4 | import shutil 5 | import time 6 | import random 7 | import os 8 | import math 9 | import json 10 | from PIL import Image 11 | import cv2 12 | import Mytransforms 13 | 14 | def read_data_file(file_dir): 15 | 16 | lists = [] 17 | with open(file_dir, 'r') as fp: 18 | line = fp.readline() 19 | while line: 20 | path = line.strip() 21 | lists.append(path) 22 | line = fp.readline() 23 | 24 | return lists 25 | 26 | def read_json_file(file_dir): 27 | """ 28 | filename: JSON file 29 | 30 | return: two list: key_points list and centers list 31 | """ 32 | fp = open(file_dir) 33 | data = json.load(fp) 34 | kpts = [] 35 | centers = [] 36 | scales = [] 37 | 38 | for info in data: 39 | kpt = [] 40 | center = [] 41 | scale = [] 42 | lists = info['info'] 43 | for x in lists: 44 | kpt.append(x['keypoints']) 45 | center.append(x['pos']) 46 | scale.append(x['scale']) 47 | kpts.append(kpt) 48 | centers.append(center) 49 | scales.append(scale) 50 | fp.close() 51 | 52 | return kpts, centers, scales 53 | 54 | def generate_heatmap(heatmap, kpt, stride, sigma): 55 | 56 | height, width, num_point = heatmap.shape 57 | start = stride / 2.0 - 0.5 58 | 59 | num = len(kpt) 60 | length = len(kpt[0]) 61 | for i in range(num): 62 | for j in range(length): 63 | if kpt[i][j][2] > 1: # not labeled 64 | continue 65 | x = kpt[i][j][0] 66 | y = kpt[i][j][1] 67 | for h in range(height): 68 | for w in range(width): 69 | xx = start + w * stride 70 | yy = start + h * stride 71 | dis = ((xx - x) * (xx - x) + (yy - y) * (yy - y)) / 2.0 / sigma / sigma 72 | if dis > 4.6052: 73 | continue 74 | heatmap[h][w][j] += math.exp(-dis) 75 | if heatmap[h][w][j] > 1: 76 | heatmap[h][w][j] = 1 77 | 78 | return heatmap 79 | 80 | def generate_vector(vector, cnt, kpts, vec_pair, stride, theta): 81 | 82 | height, width, channel = cnt.shape 83 | length = len(kpts) 84 | 85 | for j in range(length): 86 | for i in range(channel): 87 | a = vec_pair[0][i] - 1 88 | b = vec_pair[1][i] - 1 89 | if kpts[j][a][2] > 1 or kpts[j][b][2] > 1: 90 | continue 91 | ax = kpts[j][a][0] * 1.0 / stride 92 | ay = kpts[j][a][1] * 1.0 / stride 93 | bx = kpts[j][b][0] * 1.0 / stride 94 | by = kpts[j][b][1] * 1.0 / stride 95 | 96 | bax = bx - ax 97 | bay = by - ay 98 | norm_ba = math.sqrt(1.0 * bax * bax + bay * bay) + 1e-9 # to aviod two points have same position. 99 | bax /= norm_ba 100 | bay /= norm_ba 101 | 102 | min_w = max(int(round(min(ax, bx) - theta)), 0) 103 | max_w = min(int(round(max(ax, bx) + theta)), width) 104 | min_h = max(int(round(min(ay, by) - theta)), 0) 105 | max_h = min(int(round(max(ay, by) + theta)), height) 106 | 107 | for h in range(min_h, max_h): 108 | for w in range(min_w, max_w): 109 | px = w - ax 110 | py = h - ay 111 | 112 | dis = abs(bay * px - bax * py) 113 | if dis <= theta: 114 | vector[h][w][2 * i] = (vector[h][w][2 * i] * cnt[h][w][i] + bax) / (cnt[h][w][i] + 1) 115 | vector[h][w][2 * i + 1] = (vector[h][w][2 * i + 1] * cnt[h][w][i] + bay) / (cnt[h][w][i] + 1) 116 | cnt[h][w][i] += 1 117 | 118 | return vector 119 | 120 | 121 | def transform_joints(kpts): 122 | ''' 123 | OURS 124 | param.model(id).part_str = {'nose', 'neck', 'Rsho', 'Relb', 'Rwri', ... 125 | 'Lsho', 'Lelb', 'Lwri', ... 126 | 'Rhip', 'Rkne', 'Rank', ... 127 | 'Lhip', 'Lkne', 'Lank', ... 128 | 'Reye', 'Leye', 'Rear', 'Lear', 'pt19'}; 129 | 130 | 131 | ''' 132 | 133 | COCO_to_ours_1 = [1, 6, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4] 134 | COCO_to_ours_2 = [1, 7, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4] 135 | 136 | if len(kpts) == 0: 137 | return kpts 138 | 139 | new_kpts = np.zeros( 140 | (len(kpts), len(COCO_to_ours_1), 3), 141 | dtype=np.float32 142 | ) 143 | num = len(new_kpts) 144 | length = len(new_kpts[0]) 145 | for i in range(num): 146 | for j in range(length): 147 | new_kpts[i][j][0] = (kpts[i][COCO_to_ours_1[j] - 1][0] + kpts[i][COCO_to_ours_2[j] - 1][0]) * 0.5 148 | new_kpts[i][j][1] = (kpts[i][COCO_to_ours_1[j] - 1][1] + kpts[i][COCO_to_ours_2[j] - 1][1]) * 0.5 149 | 150 | if kpts[i][COCO_to_ours_1[j] - 1][2] == 2 or kpts[i][COCO_to_ours_2[j] - 1][2] == 2: 151 | new_kpts[i][j][2] = 2 152 | else: 153 | new_kpts[i][j][2] = kpts[i][COCO_to_ours_1[j] - 1][2] and kpts[i][COCO_to_ours_2[j] - 1][2] 154 | 155 | return new_kpts 156 | 157 | 158 | class coco_loader(data.Dataset): 159 | 160 | def __init__(self, file_dir, stride, transformer=None): 161 | 162 | self.img_list = read_data_file(file_dir[0]) 163 | self.mask_list = read_data_file(file_dir[1]) 164 | self.kpt_list, self.center_list, self.scale_list = read_json_file(file_dir[2]) 165 | self.stride = stride 166 | self.transformer = transformer 167 | 168 | self.vec_pair = [[2, 9, 10, 2, 12, 13, 2, 3, 4, 3, 2, 6, 7, 6, 2, 1, 1, 15, 16], 169 | [9, 10, 11, 12, 13, 14, 3, 4, 5, 17, 6, 7, 8, 18, 1, 15, 16, 17, 18]] 170 | self.theta = 1.0 171 | self.sigma = 7.0 172 | ''' 173 | 1 'nose', 174 | 2' neck', 175 | 3 'Rsho', 176 | 4 'Relb', 177 | 5 'Rwri', ... 178 | 6 'Lsho', 179 | 7 'Lelb', 180 | 8 'Lwri', ... 181 | 9 'Rhip', 182 | 10'Rkne', 183 | 11'Rank', ... 184 | 12'Lhip', 185 | 13'Lkne', 186 | 14'Lank', ... 187 | 15'Reye', 188 | 16'Leye', 189 | 17'Rear', 190 | 18'Lear', 'pt19'}; 191 | 192 | self.vec_pair = [[2, 9, 10, 2, 12, 13, 2, 3, 4, 3, 2, 6, 7, 6, 2, 1, 1, 15, 16], 193 | [9, 10, 11, 12, 13, 14, 3, 4, 5, 17, 6, 7, 8, 18, 1, 15, 16, 17, 18]] 194 | ''' 195 | 196 | def __getitem__(self, index): 197 | 198 | img_path = '/home/xiangyu/data/coco/images/train2014/'+ self.img_list[index] 199 | 200 | img = np.array(cv2.imread(img_path), dtype=np.float32) 201 | mask_path = self.mask_list[index] 202 | mask = np.load(mask_path) 203 | mask = np.array(mask, dtype=np.float32) 204 | 205 | kpt = self.kpt_list[index] 206 | center = self.center_list[index] 207 | scale = self.scale_list[index] 208 | 209 | #kpt = transform_joints(kpt) 210 | 211 | img, mask, kpt, center = self.transformer(img, mask, kpt, center, scale) 212 | 213 | height, width, _ = img.shape 214 | 215 | mask = cv2.resize(mask, (width / self.stride, height / self.stride)).reshape((height / self.stride, width / self.stride, 1)) 216 | 217 | heatmap = np.zeros((height / self.stride, width / self.stride, len(kpt[0]) + 1), dtype=np.float32) 218 | heatmap = generate_heatmap(heatmap, kpt, self.stride, self.sigma) 219 | heatmap[:,:,-1] = 1.0 - np.max(heatmap[:,:,:-1], axis=2) # for background 220 | heatmap = heatmap * mask 221 | 222 | vecmap = np.zeros((height / self.stride, width / self.stride, len(self.vec_pair[0]) * 2), dtype=np.float32) 223 | cnt = np.zeros((height / self.stride, width / self.stride, len(self.vec_pair[0])), dtype=np.int32) 224 | 225 | vecmap = generate_vector(vecmap, cnt, kpt, self.vec_pair, self.stride, self.theta) 226 | vecmap = vecmap * mask 227 | 228 | img = Mytransforms.normalize(Mytransforms.to_tensor(img), [128.0, 128.0, 128.0], [256.0, 256.0, 256.0]) # mean, std 229 | mask = Mytransforms.to_tensor(mask) 230 | heatmap = Mytransforms.to_tensor(heatmap) 231 | vecmap = Mytransforms.to_tensor(vecmap) 232 | 233 | # kpts to tensor 234 | #kpt = np.array(kpt) 235 | #kpt = torch.from_numpy(kpt) 236 | 237 | return img, heatmap, vecmap, mask, kpt 238 | 239 | def __len__(self): 240 | 241 | return len(self.img_list) 242 | -------------------------------------------------------------------------------- /evaluation/eval_caffe.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append('/root/deep_learning/caffe/caffe-1.0/python/') 4 | import caffe 5 | import numpy as np 6 | import cv2 7 | from pycocotools.coco import COCO 8 | from pycocotools.cocoeval import COCOeval 9 | from scipy.ndimage.filters import gaussian_filter 10 | import math, time 11 | 12 | ''' 13 | coco_annotations 14 | u'keypoints': [ 15 | 0.u'nose', 16 | 1.u'left_eye', 17 | 2.u'right_eye', 18 | 3.u'left_ear', 19 | 4.u'right_ear', 20 | 5.u'left_shoulder', 21 | 6.u'right_shoulder', 22 | 7.u'left_elbow', 23 | 8.u'right_elbow', 24 | 9.u'left_wrist', 25 | 10.u'right_wrist', 26 | 11.u'left_hip', 27 | 12.u'right_hip', 28 | 13.u'left_knee', 29 | 14.u'right_knee', 30 | 15.u'left_ankle', 31 | 16.u'right_ankle'], 32 | 33 | OURS 34 | param.model(id).part_str = {'nose', 'neck', 'Rsho', 'Relb', 'Rwri', ... 35 | 'Lsho', 'Lelb', 'Lwri', ... 36 | 'Rhip', 'Rkne', 'Rank', ... 37 | 'Lhip', 'Lkne', 'Lank', ... 38 | 'Leye', 'Reye', 'Lear', 'Rear', 'pt19'}; 39 | 40 | COCO_Oder = [1, 0, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4] 41 | ''' 42 | limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \ 43 | [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \ 44 | [1,16], [16,18], [3,17], [6,18]] 45 | # the middle joints heatmap correpondence 46 | mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \ 47 | [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \ 48 | [55,56], [37,38], [45,46]] 49 | 50 | 51 | colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ 52 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ 53 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] 54 | 55 | 56 | boxsize = 368 57 | scale_search = [0.5, 1.0, 1.5, 2.0] 58 | stride = 8 59 | padValue = 0. 60 | thre1 = 0.1 61 | thre2 = 0.05 62 | stickwidth = 4 63 | 64 | 65 | 66 | def padRightDownCorner(img, stride, padValue): 67 | h = img.shape[0] 68 | w = img.shape[1] 69 | 70 | pad = 4 * [None] 71 | pad[0] = 0 # up 72 | pad[1] = 0 # left 73 | pad[2] = 0 if (h%stride==0) else stride - (h % stride) # down 74 | pad[3] = 0 if (w%stride==0) else stride - (w % stride) # right 75 | 76 | img_padded = img 77 | pad_up = np.tile(img_padded[0:1,:,:]*0 + padValue, (pad[0], 1, 1)) 78 | img_padded = np.concatenate((pad_up, img_padded), axis=0) 79 | pad_left = np.tile(img_padded[:,0:1,:]*0 + padValue, (1, pad[1], 1)) 80 | img_padded = np.concatenate((pad_left, img_padded), axis=1) 81 | pad_down = np.tile(img_padded[-2:-1,:,:]*0 + padValue, (pad[2], 1, 1)) 82 | img_padded = np.concatenate((img_padded, pad_down), axis=0) 83 | pad_right = np.tile(img_padded[:,-2:-1,:]*0 + padValue, (1, pad[3], 1)) 84 | img_padded = np.concatenate((img_padded, pad_right), axis=1) 85 | 86 | return img_padded, pad 87 | 88 | def normalize(origin_img): 89 | 90 | 91 | origin_img = np.array(origin_img, dtype=np.float32) 92 | origin_img -= 128.0 93 | origin_img /= 256.0 94 | 95 | return origin_img 96 | 97 | 98 | def apply_model(oriImg, net, multiplier): 99 | height, width, _ = oriImg.shape 100 | #normed_img = normalize(oriImg) 101 | normed_img = np.array(oriImg, dtype=np.float32) 102 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19)) 103 | paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38)) 104 | stride = 8 105 | 106 | for m in range(len(multiplier)): 107 | scale = multiplier[m] 108 | imageToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 109 | imgToTest_padded, pad = padRightDownCorner(imageToTest, stride, 128) 110 | 111 | imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 112 | imageToTest_padded, pad = padRightDownCorner(imageToTest, stride, 128) 113 | print imageToTest_padded.shape 114 | 115 | 116 | net.blobs['data'].reshape(*(1, 3, imageToTest_padded.shape[0], imageToTest_padded.shape[1])) 117 | # net.forward() # dry run 118 | net.blobs['data'].data[...] = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), 119 | (3, 2, 0, 1)) / 256 - 0.5; 120 | start_time = time.time() 121 | output_blobs = net.forward() 122 | print('At scale %d, The CNN took %.2f ms.' % (m, 1000 * (time.time() - start_time))) 123 | 124 | # extract outputs, resize, and remove padding 125 | heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps 126 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 127 | heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] 128 | heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) 129 | 130 | paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs 131 | paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 132 | paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] 133 | paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) 134 | 135 | heatmap_avg = heatmap_avg + heatmap / len(multiplier) 136 | paf_avg = paf_avg + paf / len(multiplier) 137 | 138 | 139 | all_peaks = [] # all of the possible points by classes. 140 | peak_counter = 0 141 | 142 | for part in range(19 - 1): 143 | x_list = [] 144 | y_list = [] 145 | map_ori = heatmap_avg[:, :, part] 146 | map = gaussian_filter(map_ori, sigma=3) 147 | 148 | map_left = np.zeros(map.shape) 149 | map_left[1:, :] = map[:-1, :] 150 | map_right = np.zeros(map.shape) 151 | map_right[:-1, :] = map[1:, :] 152 | map_up = np.zeros(map.shape) 153 | map_up[:, 1:] = map[:, :-1] 154 | map_down = np.zeros(map.shape) 155 | map_down[:, :-1] = map[:, 1:] 156 | 157 | peaks_binary = np.logical_and.reduce( 158 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1)) 159 | peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse 160 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 161 | id = range(peak_counter, peak_counter + len(peaks)) 162 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] 163 | 164 | all_peaks.append(peaks_with_score_and_id) 165 | peak_counter += len(peaks) 166 | 167 | connection_all = [] 168 | special_k = [] 169 | mid_num = 10 170 | 171 | for k in range(len(mapIdx)): 172 | score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]] 173 | candA = all_peaks[limbSeq[k][0] - 1] 174 | candB = all_peaks[limbSeq[k][1] - 1] 175 | nA = len(candA) 176 | nB = len(candB) 177 | indexA, indexB = limbSeq[k] 178 | if (nA != 0 and nB != 0): 179 | connection_candidate = [] 180 | for i in range(nA): 181 | for j in range(nB): 182 | vec = np.subtract(candB[j][:2], candA[i][:2]) 183 | norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) 184 | vec = np.divide(vec, norm) 185 | 186 | startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ 187 | np.linspace(candA[i][1], candB[j][1], num=mid_num)) 188 | 189 | vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ 190 | for I in range(len(startend))]) 191 | vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ 192 | for I in range(len(startend))]) 193 | 194 | score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) 195 | score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min( 196 | 0.5 * oriImg.shape[0] / norm - 1, 0) 197 | criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts) 198 | criterion2 = score_with_dist_prior > 0 199 | if criterion1 and criterion2: 200 | connection_candidate.append( 201 | [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]]) 202 | 203 | connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) 204 | connection = np.zeros((0, 5)) 205 | for c in range(len(connection_candidate)): 206 | i, j, s = connection_candidate[c][0:3] 207 | if (i not in connection[:, 3] and j not in connection[:, 4]): 208 | connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) 209 | if (len(connection) >= min(nA, nB)): 210 | break 211 | 212 | connection_all.append(connection) 213 | else: 214 | special_k.append(k) 215 | connection_all.append([]) 216 | 217 | subset = -1 * np.ones((0, 20)) 218 | candidate = np.array([item for sublist in all_peaks for item in sublist]) 219 | 220 | for k in range(len(mapIdx)): 221 | if k not in special_k: 222 | partAs = connection_all[k][:, 0] 223 | partBs = connection_all[k][:, 1] 224 | indexA, indexB = np.array(limbSeq[k]) - 1 225 | 226 | for i in range(len(connection_all[k])): # = 1:size(temp,1) 227 | found = 0 228 | subset_idx = [-1, -1] 229 | for j in range(len(subset)): # 1:size(subset,1): 230 | if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: 231 | subset_idx[found] = j 232 | found += 1 233 | 234 | if found == 1: 235 | j = subset_idx[0] 236 | if (subset[j][indexB] != partBs[i]): 237 | subset[j][indexB] = partBs[i] 238 | subset[j][-1] += 1 239 | subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 240 | elif found == 2: # if found 2 and disjoint, merge them 241 | j1, j2 = subset_idx 242 | print "found = 2" 243 | membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] 244 | if len(np.nonzero(membership == 2)[0]) == 0: # merge 245 | subset[j1][:-2] += (subset[j2][:-2] + 1) 246 | subset[j1][-2:] += subset[j2][-2:] 247 | subset[j1][-2] += connection_all[k][i][2] 248 | subset = np.delete(subset, j2, 0) 249 | else: # as like found == 1 250 | subset[j1][indexB] = partBs[i] 251 | subset[j1][-1] += 1 252 | subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 253 | 254 | # if find no partA in the subset, create a new subset 255 | elif not found and k < 17: 256 | row = -1 * np.ones(20) 257 | row[indexA] = partAs[i] 258 | row[indexB] = partBs[i] 259 | row[-1] = 2 260 | row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] 261 | subset = np.vstack([subset, row]) 262 | 263 | deleteIdx = []; 264 | for i in range(len(subset)): 265 | if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: 266 | deleteIdx.append(i) 267 | subset = np.delete(subset, deleteIdx, axis=0) 268 | 269 | canvas = oriImg.copy() 270 | # draw points 271 | for i in range(18): 272 | for j in range(len(all_peaks[i])): 273 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) 274 | 275 | # draw lines 276 | for i in range(17): 277 | for n in range(len(subset)): 278 | index = subset[n][np.array(limbSeq[i]) - 1] 279 | if -1 in index: 280 | continue 281 | cur_canvas = canvas.copy() 282 | Y = candidate[index.astype(int), 0] 283 | X = candidate[index.astype(int), 1] 284 | mX = np.mean(X) 285 | mY = np.mean(Y) 286 | length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 287 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) 288 | polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) 289 | cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) 290 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) 291 | 292 | return candidate, subset, canvas 293 | 294 | 295 | def main(): 296 | deployFile = '/home/xiangyu/data/pretrain/COCO/pose_deploy.prototxt' 297 | caffemodel = '/home/xiangyu/data/pretrain/COCO/pose_iter_440000.caffemodel' 298 | 299 | img_dir = '/home/xiangyu/data/coco/images/val2014/' 300 | annFile = '/home/xiangyu/data/coco/annotations/person_keypoints_minival2014.json' 301 | num_imgs = 50 # COCO 38% 302 | orderCOCO = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] #[1, 0, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4] 303 | myjsonValidate = list(dict()) 304 | 305 | cocoGt = COCO(annFile) 306 | img_names = cocoGt.imgs 307 | # filter only person 308 | cats = cocoGt.loadCats(cocoGt.getCatIds()) 309 | catIds = cocoGt.getCatIds(catNms=['person']) 310 | imgIds = cocoGt.getImgIds(catIds=catIds) 311 | 312 | #-------------------------- caffe model------------------ 313 | caffe.set_mode_gpu() 314 | caffe.set_device(0) 315 | net = caffe.Net(deployFile, caffemodel, caffe.TEST) 316 | #-------------------------------------------------------- 317 | # 318 | for i in range(num_imgs): 319 | print('{}/{}'.format(i,num_imgs)) 320 | img_info = cocoGt.loadImgs(imgIds[i])[0] 321 | image_id = img_info['id'] 322 | oriImg = cv2.imread(os.path.join(img_dir, img_info['file_name'])) 323 | multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search] 324 | # apply model 325 | candidate, subset,canvas = apply_model(oriImg, net, multiplier) 326 | cv2.imwrite(os.path.join('./result', img_info['file_name']), canvas) 327 | for j in range(len(subset)): 328 | category_id = 1 329 | keypoints = np.zeros(51) 330 | score = 0 331 | for part in range(18): 332 | if part == 1: 333 | continue 334 | index = int(subset[j][part]) 335 | if index > 0: 336 | #realpart = orderCOCO[part] - 1 337 | realpart = orderCOCO[part] 338 | if realpart == -1: 339 | continue 340 | # if part == 0: 341 | # keypoints[realpart * 3] = candidate[index][0] -0.5 342 | # keypoints[realpart * 3 + 1] = candidate[index][1] -0.5 343 | # keypoints[realpart * 3 + 2] = 1 344 | # # score = score + candidate[index][2] 345 | else: 346 | keypoints[(realpart) * 3] = candidate[index][0] 347 | keypoints[(realpart) * 3 + 1] = candidate[index][1] 348 | keypoints[(realpart) * 3 + 2] = 1 349 | # score = score + candidate[index][2] 350 | 351 | keypoints_list = keypoints.tolist() 352 | current_dict = {'image_id': image_id, 353 | 'category_id': category_id, 354 | 'keypoints': keypoints_list, 355 | 'score': subset[j][-2]} 356 | myjsonValidate.append(current_dict) 357 | #count = count + 1 358 | import json 359 | with open('evaluationResult.json', 'w') as outfile: 360 | json.dump(myjsonValidate, outfile) 361 | resJsonFile = 'evaluationResult.json' 362 | cocoDt2 = cocoGt.loadRes(resJsonFile) 363 | 364 | image_ids = [] 365 | for i in range(num_imgs): 366 | img = cocoGt.loadImgs(imgIds[i])[0] 367 | image_ids.append(img['id']) 368 | # running evaluation 369 | cocoEval = COCOeval(cocoGt, cocoDt2, 'keypoints') 370 | cocoEval.params.imgIds = image_ids 371 | cocoEval.evaluate() 372 | cocoEval.accumulate() 373 | k = cocoEval.summarize() 374 | 375 | if __name__ == '__main__': 376 | main() -------------------------------------------------------------------------------- /evaluation/eval_mechanism.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import cv2 5 | from pycocotools.coco import COCO 6 | from pycocotools.cocoeval import COCOeval 7 | from scipy.ndimage.filters import gaussian_filter 8 | import math, time 9 | import torch 10 | sys.path.append('../') 11 | 12 | 13 | limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \ 14 | [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \ 15 | [1,16], [16,18], [3,17], [6,18],\ 16 | [9,12],[3,9],[6,12],[1, 3],[1,6],[9,11], [12,14],[3,5],[6,8]]# new connection from 60.8% to 66.6% 17 | # the middle joints heatmap correpondence 18 | mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \ 19 | [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \ 20 | [55,56], [37,38], [45,46],\ 21 | [57,58],[59,60],[61,62],[63,64], [65,66],[67,68],[69,70],[71,12],[73,74]] # new connection 22 | 23 | 24 | colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ 25 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ 26 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], \ 27 | [0, 0,0],[0, 0,0],[0, 0,0],[0, 0,0],[0, 0,0],[0, 0,0],[0, 0,0],[0, 0,0],[0, 0,0],[0, 0,0],[0, 0,0],[0, 0,0]] 28 | 29 | 30 | boxsize = 368 31 | scale_search = [0.5, 1.0, 1.5, 2.0] 32 | padValue = 0. 33 | thre1 = 0.1 34 | thre2 = 0.05 35 | stickwidth = 4 36 | 37 | 38 | 39 | def padRightDownCorner(img, stride, padValue): 40 | h = img.shape[0] 41 | w = img.shape[1] 42 | 43 | pad = 4 * [None] 44 | pad[0] = 0 # up 45 | pad[1] = 0 # left 46 | pad[2] = 0 if (h%stride==0) else stride - (h % stride) # down 47 | pad[3] = 0 if (w%stride==0) else stride - (w % stride) # right 48 | 49 | img_padded = img 50 | pad_up = np.tile(img_padded[0:1,:,:]*0 + padValue, (pad[0], 1, 1)) 51 | img_padded = np.concatenate((pad_up, img_padded), axis=0) 52 | pad_left = np.tile(img_padded[:,0:1,:]*0 + padValue, (1, pad[1], 1)) 53 | img_padded = np.concatenate((pad_left, img_padded), axis=1) 54 | pad_down = np.tile(img_padded[-2:-1,:,:]*0 + padValue, (pad[2], 1, 1)) 55 | img_padded = np.concatenate((img_padded, pad_down), axis=0) 56 | pad_right = np.tile(img_padded[:,-2:-1,:]*0 + padValue, (1, pad[3], 1)) 57 | img_padded = np.concatenate((img_padded, pad_right), axis=1) 58 | 59 | return img_padded, pad 60 | 61 | def normalize(origin_img): 62 | 63 | 64 | origin_img = np.array(origin_img, dtype=np.float32) 65 | origin_img -= 128.0 66 | origin_img /= 256.0 67 | 68 | return origin_img 69 | 70 | 71 | 72 | def mechanism(img, img_anns): 73 | # -----------------------generate GT------------------------------------- 74 | COCO_TO_OURS = [0, 15, 14, 17, 16, 5, 2, 6, 3, 7, 4, 11, 8, 12, 9, 13, 10] 75 | #vec_pair = [[2, 9, 10, 2, 12, 13, 2, 3, 4, 3, 2, 6, 7, 6, 2, 1, 1, 15, 16, 9, 12, 3, 2, 9], 76 | # [9, 10, 11, 12, 13, 14, 3, 4, 5, 17, 6, 7, 8, 18, 1, 15, 16, 17, 18, 11,14, 5, 7,12]] 77 | vec_pair = [[2, 9, 10, 2, 12, 13, 2, 3, 4, 3, 2, 6, 7, 6, 2, 1, 1, 15, 16, 9, 3, 6, 1, 1, 9, 12,3,6], 78 | [9, 10, 11, 12, 13, 14, 3, 4, 5, 17, 6, 7, 8, 18, 1, 15, 16, 17, 18,12, 9,12, 3, 6,11,14, 5,8]] 79 | 80 | 81 | lists = [] 82 | 83 | numPeople = len(img_anns) 84 | persons = [] 85 | person_centers = [] 86 | for p in range(numPeople): 87 | 88 | if img_anns[p]['num_keypoints'] < 5 or img_anns[p]['area'] < 32 * 32: 89 | continue 90 | kpt = img_anns[p]['keypoints'] 91 | dic = dict() 92 | 93 | # person center 94 | person_center = [img_anns[p]['bbox'][0] + img_anns[p]['bbox'][2] / 2.0, 95 | img_anns[p]['bbox'][1] + img_anns[p]['bbox'][3] / 2.0] 96 | scale = img_anns[p]['bbox'][3] / 368.0 97 | 98 | # skip this person if the distance to exiting person is too small 99 | flag = 0 100 | for pc in person_centers: 101 | dis = math.sqrt((person_center[0] - pc[0]) * (person_center[0] - pc[0]) + (person_center[1] - pc[1]) * ( 102 | person_center[1] - pc[1])) 103 | if dis < pc[2] * 0.3: 104 | flag = 1; 105 | break 106 | if flag == 1: 107 | continue 108 | dic['objpos'] = person_center 109 | dic['keypoints'] = np.zeros((17, 3)).tolist() 110 | dic['scale'] = scale 111 | for part in range(17): 112 | dic['keypoints'][part][0] = kpt[part * 3] 113 | dic['keypoints'][part][1] = kpt[part * 3 + 1] 114 | # visiable is 1, unvisiable is 0 and not labeled is 2 115 | if kpt[part * 3 + 2] == 2: 116 | dic['keypoints'][part][2] = 1 117 | elif kpt[part * 3 + 2] == 1: 118 | dic['keypoints'][part][2] = 0 119 | else: 120 | dic['keypoints'][part][2] = 2 121 | 122 | transform_dict = dict() 123 | transform_dict['keypoints'] = np.zeros((18, 3)).tolist() 124 | for i in range(17): 125 | transform_dict['keypoints'][COCO_TO_OURS[i]][0] = dic['keypoints'][i][0] 126 | transform_dict['keypoints'][COCO_TO_OURS[i]][1] = dic['keypoints'][i][1] 127 | transform_dict['keypoints'][COCO_TO_OURS[i]][2] = dic['keypoints'][i][2] 128 | transform_dict['keypoints'][1][0] = (dic['keypoints'][5][0] + dic['keypoints'][6][0]) * 0.5 129 | transform_dict['keypoints'][1][1] = (dic['keypoints'][5][1] + dic['keypoints'][6][1]) * 0.5 130 | 131 | if dic['keypoints'][5][2] == dic['keypoints'][6][2]: 132 | transform_dict['keypoints'][1][2] = dic['keypoints'][5][2] 133 | elif dic['keypoints'][5][2] == 2 or dic['keypoints'][6][2] == 2: 134 | transform_dict['keypoints'][1][2] = 2 135 | else: 136 | transform_dict['keypoints'][1][2] = 0 137 | 138 | persons.append(transform_dict) 139 | 140 | kpt = [] 141 | for person in persons: 142 | kpt.append(person['keypoints']) 143 | 144 | if len(kpt) == 0: 145 | return [],[],img.copy() 146 | 147 | import coco_loader 148 | stride = 8 149 | theta = 1 150 | sigma = 9#7.0 151 | height, width, _ = img.shape 152 | heatmap = np.zeros((height / stride, width / stride, len(kpt[0]) + 1), dtype=np.float32) 153 | heatmap = coco_loader.generate_heatmap(heatmap, kpt, stride, sigma) 154 | heatmap[:, :, -1] = 1.0 - np.max(heatmap[:, :, :-1], axis=2) # for background 155 | 156 | vecmap = np.zeros((height / stride, width / stride, len(vec_pair[0]) * 2), dtype=np.float32) 157 | cnt = np.zeros((height / stride, width / stride, len(vec_pair[0])), dtype=np.int32) 158 | vecmap = coco_loader.generate_vector(vecmap, cnt, kpt, vec_pair, stride, theta) 159 | 160 | heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) 161 | vecmap = cv2.resize(vecmap, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) 162 | # ---------------post processing---------------------------------- 163 | all_peaks = [] # all of the possible points by classes. 164 | peak_counter = 0 165 | 166 | for part in range(19 - 1): 167 | x_list = [] 168 | y_list = [] 169 | map_ori = heatmap[:, :, part] 170 | map = gaussian_filter(map_ori, sigma=3) 171 | 172 | map_left = np.zeros(map.shape) 173 | map_left[1:, :] = map[:-1, :] 174 | map_right = np.zeros(map.shape) 175 | map_right[:-1, :] = map[1:, :] 176 | map_up = np.zeros(map.shape) 177 | map_up[:, 1:] = map[:, :-1] 178 | map_down = np.zeros(map.shape) 179 | map_down[:, :-1] = map[:, 1:] 180 | 181 | peaks_binary = np.logical_and.reduce( 182 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1)) 183 | peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse 184 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 185 | id = range(peak_counter, peak_counter + len(peaks)) 186 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] 187 | 188 | all_peaks.append(peaks_with_score_and_id) 189 | peak_counter += len(peaks) 190 | 191 | connection_all = [] 192 | special_k = [] 193 | mid_num = 10 194 | 195 | ####------------- replace heatmap with keypoints GT 68% -> 78.7%--------------- 196 | 197 | all_peaks = [] 198 | peak_counter = 0 199 | for part in range(19 - 1): 200 | peaks = [] 201 | for person in kpt: 202 | if person[part][2] is not 2: 203 | peaks.append(tuple([int(person[part][0]),int(person[part][1]),1,peak_counter])) 204 | peak_counter = peak_counter + 1 205 | #peak_counter += len(peaks) 206 | all_peaks.append(peaks) 207 | 208 | ####--------------------------------------------------------------------------- 209 | 210 | 211 | for k in range(len(mapIdx)): 212 | score_mid = vecmap[:, :, [x - 19 for x in mapIdx[k]]] 213 | candA = all_peaks[limbSeq[k][0] - 1] 214 | candB = all_peaks[limbSeq[k][1] - 1] 215 | nA = len(candA) 216 | nB = len(candB) 217 | indexA, indexB = limbSeq[k] 218 | if (nA != 0 and nB != 0): 219 | connection_candidate = [] 220 | for i in range(nA): 221 | for j in range(nB): 222 | vec = np.subtract(candB[j][:2], candA[i][:2]) 223 | norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) 224 | vec = np.divide(vec, norm) 225 | 226 | startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ 227 | np.linspace(candA[i][1], candB[j][1], num=mid_num)) 228 | 229 | vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ 230 | for I in range(len(startend))]) 231 | vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ 232 | for I in range(len(startend))]) 233 | 234 | score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) 235 | #score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min( 236 | # 0.5 * oriImg.shape[0] / norm - 1, 0) 237 | score_with_dist_prior = sum(score_midpts) / len(score_midpts) 238 | 239 | criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts) 240 | criterion2 = score_with_dist_prior > 0 241 | if criterion1 and criterion2: 242 | connection_candidate.append( 243 | [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]]) 244 | 245 | connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) 246 | connection = np.zeros((0, 5)) 247 | for c in range(len(connection_candidate)): 248 | i, j, s = connection_candidate[c][0:3] 249 | if (i not in connection[:, 3] and j not in connection[:, 4]): 250 | connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) 251 | if (len(connection) >= min(nA, nB)): 252 | break 253 | 254 | connection_all.append(connection) 255 | else: 256 | special_k.append(k) 257 | connection_all.append([]) 258 | 259 | subset = -1 * np.ones((0, 20)) 260 | candidate = np.array([item for sublist in all_peaks for item in sublist]) 261 | 262 | for k in range(len(mapIdx)): 263 | if k not in special_k: 264 | partAs = connection_all[k][:, 0] 265 | partBs = connection_all[k][:, 1] 266 | indexA, indexB = np.array(limbSeq[k]) - 1 267 | 268 | for i in range(len(connection_all[k])): # = 1:size(temp,1) 269 | found = 0 270 | subset_idx = [-1, -1] 271 | for j in range(len(subset)): # 1:size(subset,1): 272 | if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: 273 | if found<2: # add 274 | subset_idx[found] = j 275 | found += 1 276 | 277 | if found == 1: 278 | j = subset_idx[0] 279 | if (subset[j][indexB] != partBs[i]): 280 | subset[j][indexB] = partBs[i] 281 | subset[j][-1] += 1 282 | subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 283 | elif found == 2: # if found 2 and disjoint, merge them 284 | j1, j2 = subset_idx 285 | print "found = 2" 286 | membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] 287 | if len(np.nonzero(membership == 2)[0]) == 0: # merge 288 | subset[j1][:-2] += (subset[j2][:-2] + 1) 289 | subset[j1][-2:] += subset[j2][-2:] 290 | subset[j1][-2] += connection_all[k][i][2] 291 | subset = np.delete(subset, j2, 0) 292 | else: # as like found == 1 293 | subset[j1][indexB] = partBs[i] 294 | subset[j1][-1] += 1 295 | subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 296 | 297 | # if find no partA in the subset, create a new subset 298 | elif not found and k < len(mapIdx): 299 | row = -1 * np.ones(20) 300 | row[indexA] = partAs[i] 301 | row[indexB] = partBs[i] 302 | row[-1] = 2 303 | row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] 304 | subset = np.vstack([subset, row]) 305 | 306 | deleteIdx = []; 307 | for i in range(len(subset)): 308 | if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: 309 | deleteIdx.append(i) 310 | subset = np.delete(subset, deleteIdx, axis=0) 311 | 312 | canvas = img.copy() 313 | # draw points 314 | for i in range(18): 315 | for j in range(len(all_peaks[i])): 316 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) 317 | 318 | # draw lines 319 | for i in range(len(limbSeq)): 320 | for n in range(len(subset)): 321 | index = subset[n][np.array(limbSeq[i]) - 1] 322 | if -1 in index: 323 | continue 324 | cur_canvas = canvas.copy() 325 | Y = candidate[index.astype(int), 0] 326 | X = candidate[index.astype(int), 1] 327 | mX = np.mean(X) 328 | mY = np.mean(Y) 329 | length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 330 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) 331 | polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) 332 | cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) 333 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) 334 | 335 | 336 | return candidate, subset, canvas 337 | 338 | def main(): 339 | import pose_estimation 340 | 341 | model = pose_estimation.PoseModel(num_point=19, num_vector=19) 342 | 343 | img_dir = '/home/bst2017/workspace/data/coco/images/val2014/' 344 | annFile = '/home/bst2017/workspace/data/coco/annotations/person_keypoints_minival2014.json' 345 | num_imgs = 50#50 # COCO 38% 346 | orderCOCO = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] #[1, 0, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4] 347 | myjsonValidate = list(dict()) 348 | 349 | cocoGt = COCO(annFile) 350 | img_names = cocoGt.imgs 351 | # filter only person 352 | cats = cocoGt.loadCats(cocoGt.getCatIds()) 353 | catIds = cocoGt.getCatIds(catNms=['person']) 354 | imgIds = cocoGt.getImgIds(catIds=catIds) 355 | 356 | #ids = list(cocoGt.imgs.keys()) 357 | #-------------------------------------------------------- 358 | # 359 | for i in range(num_imgs): 360 | print('{}/{}'.format(i,num_imgs)) 361 | img_info = cocoGt.loadImgs(imgIds[i])[0] 362 | image_id = img_info['id'] 363 | oriImg = cv2.imread(os.path.join(img_dir, img_info['file_name'])) 364 | ann_ids = cocoGt.getAnnIds(imgIds=image_id) 365 | img_anns = cocoGt.loadAnns(ann_ids) 366 | 367 | candidate, subset,canvas = mechanism(oriImg, img_anns) 368 | cv2.imwrite(os.path.join('./result', img_info['file_name']), canvas) 369 | for j in range(len(subset)): 370 | category_id = 1 371 | keypoints = np.zeros(51) 372 | score = 0 373 | for part in range(18): 374 | if part == 1: 375 | continue 376 | index = int(subset[j][part]) 377 | if index > 0: 378 | #realpart = orderCOCO[part] - 1 379 | realpart = orderCOCO[part] 380 | if realpart == -1: 381 | continue 382 | # if part == 0: 383 | # keypoints[realpart * 3] = candidate[index][0] -0.5 384 | # keypoints[realpart * 3 + 1] = candidate[index][1] -0.5 385 | # keypoints[realpart * 3 + 2] = 1 386 | # # score = score + candidate[index][2] 387 | else: 388 | keypoints[(realpart) * 3] = candidate[index][0] 389 | keypoints[(realpart) * 3 + 1] = candidate[index][1] 390 | keypoints[(realpart) * 3 + 2] = 2 391 | # score = score + candidate[index][2] 392 | 393 | keypoints_list = keypoints.tolist() 394 | current_dict = {'image_id': image_id, 395 | 'category_id': category_id, 396 | 'keypoints': keypoints_list, 397 | 'score': subset[j][-2]} 398 | myjsonValidate.append(current_dict) 399 | #count = count + 1 400 | import json 401 | with open('evaluationResult.json', 'w') as outfile: 402 | json.dump(myjsonValidate, outfile) 403 | resJsonFile = 'evaluationResult.json' 404 | cocoDt2 = cocoGt.loadRes(resJsonFile) 405 | 406 | image_ids = [] 407 | for i in range(num_imgs): 408 | img = cocoGt.loadImgs(imgIds[i])[0] 409 | image_ids.append(img['id']) 410 | # running evaluation 411 | cocoEval = COCOeval(cocoGt, cocoDt2, 'keypoints') 412 | cocoEval.params.imgIds = image_ids 413 | cocoEval.evaluate() 414 | cocoEval.accumulate() 415 | k = cocoEval.summarize() 416 | 417 | if __name__ == '__main__': 418 | main() -------------------------------------------------------------------------------- /evaluation/eval_pytorch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import cv2 5 | from pycocotools.coco import COCO 6 | from pycocotools.cocoeval import COCOeval 7 | from scipy.ndimage.filters import gaussian_filter 8 | import math, time 9 | import torch 10 | sys.path.append('../') 11 | 12 | 13 | limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \ 14 | [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \ 15 | [1,16], [16,18], [3,17], [6,18]] 16 | # the middle joints heatmap correpondence 17 | mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \ 18 | [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \ 19 | [55,56], [37,38], [45,46]] 20 | 21 | 22 | colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ 23 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ 24 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] 25 | 26 | 27 | boxsize = 368 28 | scale_search = [0.5, 1.0, 1.5, 2.0] 29 | stride = 8 30 | padValue = 0. 31 | thre1 = 0.1 32 | thre2 = 0.05 33 | stickwidth = 4 34 | 35 | 36 | 37 | def padRightDownCorner(img, stride, padValue): 38 | h = img.shape[0] 39 | w = img.shape[1] 40 | 41 | pad = 4 * [None] 42 | pad[0] = 0 # up 43 | pad[1] = 0 # left 44 | pad[2] = 0 if (h%stride==0) else stride - (h % stride) # down 45 | pad[3] = 0 if (w%stride==0) else stride - (w % stride) # right 46 | 47 | img_padded = img 48 | pad_up = np.tile(img_padded[0:1,:,:]*0 + padValue, (pad[0], 1, 1)) 49 | img_padded = np.concatenate((pad_up, img_padded), axis=0) 50 | pad_left = np.tile(img_padded[:,0:1,:]*0 + padValue, (1, pad[1], 1)) 51 | img_padded = np.concatenate((pad_left, img_padded), axis=1) 52 | pad_down = np.tile(img_padded[-2:-1,:,:]*0 + padValue, (pad[2], 1, 1)) 53 | img_padded = np.concatenate((img_padded, pad_down), axis=0) 54 | pad_right = np.tile(img_padded[:,-2:-1,:]*0 + padValue, (1, pad[3], 1)) 55 | img_padded = np.concatenate((img_padded, pad_right), axis=1) 56 | 57 | return img_padded, pad 58 | 59 | def normalize(origin_img): 60 | 61 | 62 | origin_img = np.array(origin_img, dtype=np.float32) 63 | origin_img -= 128.0 64 | origin_img /= 256.0 65 | 66 | return origin_img 67 | 68 | 69 | def apply_model(oriImg, model, multiplier): 70 | height, width, _ = oriImg.shape 71 | #normed_img = normalize(oriImg) 72 | normed_img = np.array(oriImg, dtype=np.float32) 73 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19),dtype=np.float32) 74 | paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38),dtype=np.float32) 75 | stride = 8 76 | 77 | for m in range(len(multiplier)): 78 | scale = multiplier[m] 79 | imageToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 80 | imgToTest_padded, pad = padRightDownCorner(imageToTest, stride, 128) 81 | 82 | input_img = np.transpose(np.float32(imgToTest_padded[:, :, :, np.newaxis]), 83 | (3, 2, 0, 1)) / 256 - 0.5 # required shape (1, c, h, w) 84 | mask = np.ones((1, 1, input_img.shape[2] / stride, input_img.shape[3] / stride), dtype=np.float32) 85 | 86 | input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda()) 87 | mask_var = torch.autograd.Variable(torch.from_numpy(mask).cuda()) 88 | 89 | # get the features 90 | vec1, heat1, vec2, heat2, vec3, heat3, vec4, heat4, vec5, heat5, vec6, heat6 = model(input_var, mask_var) 91 | 92 | # get the heatmap 93 | heatmap = heat6.data.cpu().numpy() 94 | heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c) 95 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 96 | heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :] 97 | heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC) 98 | heatmap_avg = heatmap_avg + heatmap / len(multiplier) 99 | 100 | # get the paf 101 | paf = vec6.data.cpu().numpy() 102 | paf = np.transpose(np.squeeze(paf), (1, 2, 0)) # (h, w, c) 103 | paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 104 | paf = paf[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :] 105 | paf = cv2.resize(paf, (width, height), interpolation=cv2.INTER_CUBIC) 106 | paf_avg = paf_avg + paf / len(multiplier) 107 | 108 | all_peaks = [] # all of the possible points by classes. 109 | peak_counter = 0 110 | 111 | for part in range(19 - 1): 112 | x_list = [] 113 | y_list = [] 114 | map_ori = heatmap_avg[:, :, part] 115 | map = gaussian_filter(map_ori, sigma=3) 116 | 117 | map_left = np.zeros(map.shape) 118 | map_left[1:, :] = map[:-1, :] 119 | map_right = np.zeros(map.shape) 120 | map_right[:-1, :] = map[1:, :] 121 | map_up = np.zeros(map.shape) 122 | map_up[:, 1:] = map[:, :-1] 123 | map_down = np.zeros(map.shape) 124 | map_down[:, :-1] = map[:, 1:] 125 | 126 | peaks_binary = np.logical_and.reduce( 127 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1)) 128 | peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse 129 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 130 | id = range(peak_counter, peak_counter + len(peaks)) 131 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] 132 | 133 | all_peaks.append(peaks_with_score_and_id) 134 | peak_counter += len(peaks) 135 | 136 | connection_all = [] 137 | special_k = [] 138 | mid_num = 10 139 | 140 | for k in range(len(mapIdx)): 141 | score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]] 142 | candA = all_peaks[limbSeq[k][0] - 1] 143 | candB = all_peaks[limbSeq[k][1] - 1] 144 | nA = len(candA) 145 | nB = len(candB) 146 | indexA, indexB = limbSeq[k] 147 | if (nA != 0 and nB != 0): 148 | connection_candidate = [] 149 | for i in range(nA): 150 | for j in range(nB): 151 | vec = np.subtract(candB[j][:2], candA[i][:2]) 152 | norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) 153 | vec = np.divide(vec, norm) 154 | 155 | startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ 156 | np.linspace(candA[i][1], candB[j][1], num=mid_num)) 157 | 158 | vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ 159 | for I in range(len(startend))]) 160 | vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ 161 | for I in range(len(startend))]) 162 | 163 | score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) 164 | #score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min( 165 | # 0.5 * oriImg.shape[0] / norm - 1, 0) 166 | score_with_dist_prior = sum(score_midpts) / len(score_midpts) 167 | 168 | criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts) 169 | criterion2 = score_with_dist_prior > 0 170 | if criterion1 and criterion2: 171 | connection_candidate.append( 172 | [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]]) 173 | 174 | connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) 175 | connection = np.zeros((0, 5)) 176 | for c in range(len(connection_candidate)): 177 | i, j, s = connection_candidate[c][0:3] 178 | if (i not in connection[:, 3] and j not in connection[:, 4]): 179 | connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) 180 | if (len(connection) >= min(nA, nB)): 181 | break 182 | 183 | connection_all.append(connection) 184 | else: 185 | special_k.append(k) 186 | connection_all.append([]) 187 | 188 | subset = -1 * np.ones((0, 20)) 189 | candidate = np.array([item for sublist in all_peaks for item in sublist]) 190 | 191 | for k in range(len(mapIdx)): 192 | if k not in special_k: 193 | partAs = connection_all[k][:, 0] 194 | partBs = connection_all[k][:, 1] 195 | indexA, indexB = np.array(limbSeq[k]) - 1 196 | 197 | for i in range(len(connection_all[k])): # = 1:size(temp,1) 198 | found = 0 199 | subset_idx = [-1, -1] 200 | for j in range(len(subset)): # 1:size(subset,1): 201 | if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: 202 | subset_idx[found] = j 203 | found += 1 204 | 205 | if found == 1: 206 | j = subset_idx[0] 207 | if (subset[j][indexB] != partBs[i]): 208 | subset[j][indexB] = partBs[i] 209 | subset[j][-1] += 1 210 | subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 211 | elif found == 2: # if found 2 and disjoint, merge them 212 | j1, j2 = subset_idx 213 | print "found = 2" 214 | membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] 215 | if len(np.nonzero(membership == 2)[0]) == 0: # merge 216 | subset[j1][:-2] += (subset[j2][:-2] + 1) 217 | subset[j1][-2:] += subset[j2][-2:] 218 | subset[j1][-2] += connection_all[k][i][2] 219 | subset = np.delete(subset, j2, 0) 220 | else: # as like found == 1 221 | subset[j1][indexB] = partBs[i] 222 | subset[j1][-1] += 1 223 | subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 224 | 225 | # if find no partA in the subset, create a new subset 226 | elif not found and k < 17: 227 | row = -1 * np.ones(20) 228 | row[indexA] = partAs[i] 229 | row[indexB] = partBs[i] 230 | row[-1] = 2 231 | row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] 232 | subset = np.vstack([subset, row]) 233 | 234 | deleteIdx = []; 235 | for i in range(len(subset)): 236 | if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: 237 | deleteIdx.append(i) 238 | subset = np.delete(subset, deleteIdx, axis=0) 239 | 240 | canvas = oriImg.copy() 241 | # draw points 242 | for i in range(18): 243 | for j in range(len(all_peaks[i])): 244 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) 245 | 246 | # draw lines 247 | for i in range(17): 248 | for n in range(len(subset)): 249 | index = subset[n][np.array(limbSeq[i]) - 1] 250 | if -1 in index: 251 | continue 252 | cur_canvas = canvas.copy() 253 | Y = candidate[index.astype(int), 0] 254 | X = candidate[index.astype(int), 1] 255 | mX = np.mean(X) 256 | mY = np.mean(Y) 257 | length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 258 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) 259 | polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) 260 | cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) 261 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) 262 | 263 | return candidate, subset, canvas 264 | 265 | 266 | def main(): 267 | import pose_estimation 268 | #pytorch_model = '/home/xiangyu/data/pretrain/COCO/coco_pose_iter_440000.pth.tar' 269 | pytorch_model = '/home/xiangyu/samsung_pose/experiments/baseline/60000.pth.tar' 270 | model = pose_estimation.PoseModel(num_point=19, num_vector=19) 271 | 272 | img_dir = '/home/xiangyu/data/coco/images/val2014/' 273 | annFile = '/home/xiangyu/data/coco/annotations/person_keypoints_minival2014.json' 274 | num_imgs = 50 # 275 | orderCOCO = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] #[1, 0, 7, 9, 11, 6, 8, 10, 13, 15, 17, 12, 14, 16, 3, 2, 5, 4] 276 | myjsonValidate = list(dict()) 277 | 278 | cocoGt = COCO(annFile) 279 | img_names = cocoGt.imgs 280 | # filter only person 281 | cats = cocoGt.loadCats(cocoGt.getCatIds()) 282 | catIds = cocoGt.getCatIds(catNms=['person']) 283 | imgIds = cocoGt.getImgIds(catIds=catIds) 284 | 285 | #-------------------------- pytorch model------------------ 286 | state_dict = torch.load(pytorch_model)['state_dict'] 287 | model.load_state_dict(state_dict) 288 | model = model.cuda() 289 | model.eval() 290 | #-------------------------------------------------------- 291 | # 292 | for i in range(num_imgs): 293 | print('{}/{}'.format(i,num_imgs)) 294 | img_info = cocoGt.loadImgs(imgIds[i])[0] 295 | image_id = img_info['id'] 296 | oriImg = cv2.imread(os.path.join(img_dir, img_info['file_name'])) 297 | multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search] 298 | # apply model 299 | candidate, subset,canvas = apply_model(oriImg, model, multiplier) 300 | #cv2.imwrite(os.path.join('./result', img_info['file_name']), canvas) 301 | for j in range(len(subset)): 302 | category_id = 1 303 | keypoints = np.zeros(51) 304 | score = 0 305 | for part in range(18): 306 | if part == 1: 307 | continue 308 | index = int(subset[j][part]) 309 | if index > 0: 310 | #realpart = orderCOCO[part] - 1 311 | realpart = orderCOCO[part] 312 | if realpart == -1: 313 | continue 314 | # if part == 0: 315 | # keypoints[realpart * 3] = candidate[index][0] -0.5 316 | # keypoints[realpart * 3 + 1] = candidate[index][1] -0.5 317 | # keypoints[realpart * 3 + 2] = 1 318 | # # score = score + candidate[index][2] 319 | else: 320 | keypoints[(realpart) * 3] = candidate[index][0] 321 | keypoints[(realpart) * 3 + 1] = candidate[index][1] 322 | keypoints[(realpart) * 3 + 2] = 1 323 | # score = score + candidate[index][2] 324 | 325 | keypoints_list = keypoints.tolist() 326 | current_dict = {'image_id': image_id, 327 | 'category_id': category_id, 328 | 'keypoints': keypoints_list, 329 | 'score': subset[j][-2]} 330 | myjsonValidate.append(current_dict) 331 | #count = count + 1 332 | import json 333 | with open('evaluationResult.json', 'w') as outfile: 334 | json.dump(myjsonValidate, outfile) 335 | resJsonFile = 'evaluationResult.json' 336 | cocoDt2 = cocoGt.loadRes(resJsonFile) 337 | 338 | image_ids = [] 339 | for i in range(num_imgs): 340 | img = cocoGt.loadImgs(imgIds[i])[0] 341 | image_ids.append(img['id']) 342 | # running evaluation 343 | cocoEval = COCOeval(cocoGt, cocoDt2, 'keypoints') 344 | cocoEval.params.imgIds = image_ids 345 | cocoEval.evaluate() 346 | cocoEval.accumulate() 347 | k = cocoEval.summarize() 348 | 349 | if __name__ == '__main__': 350 | main() -------------------------------------------------------------------------------- /evaluation/ski.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xiangyu-CAS/Realtime_Multi-Person_Pose_Estimation.PyTorch/79cc5af5878171f83be86ffdb9af1c6ac09116a4/evaluation/ski.jpg -------------------------------------------------------------------------------- /evaluation/test.sh: -------------------------------------------------------------------------------- 1 | python test_pose.py --image ski.jpg --output ./ --model /data/xiaobing.wang/qy.feng/Pytorch_RMPE/training/openpose_coco_best.pth.tar 2 | -------------------------------------------------------------------------------- /evaluation/test_pose.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import math 4 | import time 5 | import numpy as np 6 | from scipy.ndimage.filters import gaussian_filter 7 | import sys 8 | sys.path.append('..') 9 | import torch 10 | import pose_estimation 11 | import cv2 12 | #import model.vgg_1branch as vgg_1branch 13 | 14 | # limbSeq = [[3,4], [4,5], [6,7], [7,8], [9,10], [10,11], [12,13], [13,14], [1,2], [2,9], [2,12], [2,3], [2,6], \ 15 | # [3,17],[6,18],[1,16],[1,15],[16,18],[15,17]] 16 | # 17 | # # limbSeq = [[3,4], [4,5], [6,7],[9,10],[10,11],[12,13],[13,14],[1,2],[2,9],[2,12], [2,3],[2,6], \ 18 | # # [3,17],[6,18],[1,15],[1,16],[15,17],[16,18]] 19 | # 20 | # mapIdx = [[19,20],[21,22],[23,24],[25,26],[27,28],[29,30],[31,32],[33,34],[35,36],[37,38],[39,40], \ 21 | # [41,42],[43,44],[45,46],[47,48],[49,50],[51,52],[53,54],[55,56]] 22 | 23 | # find connection in the specified sequence, center 29 is in the position 15 24 | limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \ 25 | [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \ 26 | [1,16], [16,18], [3,17], [6,18]] 27 | # the middle joints heatmap correpondence 28 | mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \ 29 | [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \ 30 | [55,56], [37,38], [45,46]] 31 | 32 | 33 | colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ 34 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ 35 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] 36 | 37 | boxsize = 368 38 | scale_search = [0.5, 1.0, 1.5, 2.0] 39 | stride = 8 40 | padValue = 0. 41 | thre_point = 0.15 42 | thre_line = 0.05 43 | stickwidth = 4 44 | 45 | def construct_model(args): 46 | 47 | model = pose_estimation.PoseModel(num_point=19, num_vector=19) 48 | #model = vgg_1branch.PoseModel(num_point=19, num_vector=19) 49 | 50 | state_dict = torch.load(args.model)['state_dict'] 51 | # from collections import OrderedDict 52 | # new_state_dict = OrderedDict() 53 | # for k, v in state_dict.items(): 54 | # name = k[7:] 55 | # new_state_dict[name] = v 56 | # state_dict = model.state_dict() 57 | # state_dict.update(new_state_dict) 58 | model.load_state_dict(state_dict) 59 | model = model.cuda() 60 | model.eval() 61 | 62 | return model 63 | 64 | def padRightDownCorner(img, stride, padValue): 65 | 66 | h = img.shape[0] 67 | w = img.shape[1] 68 | 69 | pad = 4 * [None] 70 | pad[0] = 0 # up 71 | pad[1] = 0 # left 72 | pad[2] = 0 if (h%stride==0) else stride - (h % stride) # down 73 | pad[3] = 0 if (w%stride==0) else stride - (w % stride) # right 74 | 75 | img_padded = img 76 | pad_up = np.tile(img_padded[0:1,:,:]*0 + padValue, (pad[0], 1, 1)) 77 | img_padded = np.concatenate((pad_up, img_padded), axis=0) 78 | pad_left = np.tile(img_padded[:,0:1,:]*0 + padValue, (1, pad[1], 1)) 79 | img_padded = np.concatenate((pad_left, img_padded), axis=1) 80 | pad_down = np.tile(img_padded[-2:-1,:,:]*0 + padValue, (pad[2], 1, 1)) 81 | img_padded = np.concatenate((img_padded, pad_down), axis=0) 82 | pad_right = np.tile(img_padded[:,-2:-1,:]*0 + padValue, (1, pad[3], 1)) 83 | img_padded = np.concatenate((img_padded, pad_right), axis=1) 84 | 85 | return img_padded, pad 86 | 87 | def normalize(origin_img): 88 | 89 | 90 | origin_img = np.array(origin_img, dtype=np.float32) 91 | origin_img -= 128.0 92 | origin_img /= 256.0 93 | 94 | return origin_img 95 | 96 | def process(model, input_path): 97 | 98 | origin_img = cv2.imread(input_path) 99 | normed_img = normalize(origin_img) 100 | 101 | height, width, _ = normed_img.shape 102 | 103 | multiplier = [x * boxsize / height for x in scale_search] 104 | 105 | heatmap_avg = np.zeros((height, width, 19)) # num_point 106 | paf_avg = np.zeros((height, width, 38)) # num_vector 107 | 108 | for m in range(len(multiplier)): 109 | scale = multiplier[m] 110 | 111 | # preprocess 112 | imgToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 113 | imgToTest_padded, pad = padRightDownCorner(imgToTest, stride, padValue) 114 | 115 | input_img = np.transpose(imgToTest_padded[:,:,:,np.newaxis], (3, 2, 0, 1)) # required shape (1, c, h, w) 116 | mask = np.ones((1, 1, input_img.shape[2] / stride, input_img.shape[3] / stride), dtype=np.float32) 117 | 118 | input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda()) 119 | mask_var = torch.autograd.Variable(torch.from_numpy(mask).cuda()) 120 | 121 | # get the features 122 | vec1, heat1, vec2, heat2, vec3, heat3, vec4, heat4, vec5, heat5, vec6, heat6 = model(input_var, mask_var) 123 | 124 | # get the heatmap 125 | heatmap = heat6.data.cpu().numpy() 126 | heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c) 127 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 128 | heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :] 129 | heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC) 130 | heatmap_avg = heatmap_avg + heatmap / len(multiplier) 131 | 132 | # get the paf 133 | paf = vec6.data.cpu().numpy() 134 | paf = np.transpose(np.squeeze(paf), (1, 2, 0)) # (h, w, c) 135 | paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 136 | paf = paf[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :] 137 | paf = cv2.resize(paf, (width, height), interpolation=cv2.INTER_CUBIC) 138 | paf_avg = paf_avg + paf / len(multiplier) 139 | 140 | all_peaks = [] # all of the possible points by classes. 141 | peak_counter = 0 142 | 143 | for part in range(0, 18): # 1-19 144 | map_ori = heatmap_avg[:, :, part] 145 | map = gaussian_filter(map_ori, sigma=3) 146 | 147 | map_left = np.zeros(map.shape) 148 | map_left[:, 1:] = map[:, :-1] 149 | map_right = np.zeros(map.shape) 150 | map_right[:, :-1] = map[:, 1:] 151 | map_up = np.zeros(map.shape) 152 | map_up[1:, :] = map[:-1, :] 153 | map_down = np.zeros(map.shape) 154 | map_down[:-1, :] = map[1:, :] 155 | 156 | # get the salient point and its score > thre_point 157 | peaks_binary = np.logical_and.reduce( 158 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre_point)) 159 | peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # (w, h) 160 | 161 | # a point format: (w, h, score, number) 162 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 163 | id = range(peak_counter, peak_counter + len(peaks)) 164 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i], ) for i in range(len(id))] 165 | 166 | all_peaks.append(peaks_with_score_and_id) 167 | peak_counter += len(peaks) 168 | 169 | connection_all = [] # save all of the possible lines by classes. 170 | special_k = [] # save the lines, which haven't legal points. 171 | mid_num = 10 # could adjust to accelerate (small) or improve accuracy(large). 172 | 173 | for k in range(len(mapIdx)): 174 | 175 | score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]] 176 | candA = all_peaks[limbSeq[k][0] - 1] 177 | candB = all_peaks[limbSeq[k][1] - 1] 178 | 179 | lenA = len(candA) 180 | lenB = len(candB) 181 | 182 | if lenA != 0 and lenB != 0: 183 | connection_candidate = [] 184 | for i in range(lenA): 185 | for j in range(lenB): 186 | vec = np.subtract(candB[j][:2], candA[i][:2]) # the vector of BA 187 | norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) 188 | if norm == 0: 189 | continue 190 | vec = np.divide(vec, norm) 191 | 192 | startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), np.linspace(candA[i][1], candB[j][1], num=mid_num))) 193 | 194 | # get the vector between A and B. 195 | vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] for I in range(len(startend))]) 196 | vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] for I in range(len(startend))]) 197 | 198 | score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) 199 | score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(0.5 * height / norm - 1, 0) # ??? 200 | criterion1 = len(np.nonzero(score_midpts > thre_line)[0]) > 0.8 * len(score_midpts) 201 | criterion2 = score_with_dist_prior > 0 202 | if criterion1 and criterion2: 203 | connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]]) 204 | 205 | # sort the possible line from large to small order. 206 | connection_candidate = sorted(connection_candidate, key=lambda x: x[3], reverse=True) # different from openpose, I think there should be sorted by x[3] 207 | connection = np.zeros((0, 5)) 208 | 209 | for c in range(len(connection_candidate)): 210 | i, j, s = connection_candidate[c][0: 3] 211 | if (i not in connection[:, 3] and j not in connection[:, 4]): 212 | # the number of A point, the number of B point, score, A point, B point 213 | connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) 214 | if len(connection) >= min(lenA, lenB): 215 | break 216 | connection_all.append(connection) 217 | else: 218 | special_k.append(k) 219 | connection_all.append([]) 220 | 221 | subset = -1 * np.ones((0, 20)) 222 | candidate = np.array([item for sublist in all_peaks for item in sublist]) 223 | 224 | for k in range(len(mapIdx)): 225 | if k not in special_k: 226 | partAs = connection_all[k][:, 0] 227 | partBs = connection_all[k][:, 1] 228 | indexA, indexB = np.array(limbSeq[k]) - 1 229 | 230 | for i in range(len(connection_all[k])): 231 | found = 0 232 | flag = [False, False] 233 | subset_idx = [-1, -1] 234 | for j in range(len(subset)): 235 | # fix the bug, found == 2 and not joint will lead someone occur more than once. 236 | # if more than one, we choose the subset, which has a higher score. 237 | if subset[j][indexA] == partAs[i]: 238 | if flag[0] == False: 239 | flag[0] = found 240 | subset_idx[found] = j 241 | flag[0] = True 242 | found += 1 243 | else: 244 | ids = subset_idx[flag[0]] 245 | if subset[ids][-1] < subset[j][-1]: 246 | subset_idx[flag[0]] = j 247 | if subset[j][indexB] == partBs[i]: 248 | if flag[1] == False: 249 | flag[1] = found 250 | subset_idx[found] = j 251 | flag[1] = True 252 | found += 1 253 | else: 254 | ids = subset_idx[flag[1]] 255 | if subset[ids][-1] < subset[j][-1]: 256 | subset_idx[flag[1]] = j 257 | 258 | if found == 1: 259 | j = subset_idx[0] 260 | if (subset[j][indexB] != partBs[i]): 261 | subset[j][indexB] = partBs[i] 262 | subset[j][-1] += 1 263 | subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 264 | elif found == 2: # if found equals to 2 and disjoint, merge them 265 | j1, j2 = subset_idx 266 | membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] 267 | if len(np.nonzero(membership == 2)[0]) == 0: # merge 268 | subset[j1][:-2] += (subset[j2][:-2] + 1) 269 | subset[j1][-2:] += subset[j2][-2:] 270 | subset[j1][-2] += connection_all[k][i][2] 271 | subset = np.delete(subset, j2, 0) 272 | else: # as like found == 1 273 | subset[j1][indexB] = partBs[i] 274 | subset[j1][-1] += 1 275 | subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 276 | elif not found and k < 17: 277 | row = -1 * np.ones(20) 278 | row[indexA] = partAs[i] 279 | row[indexB] = partBs[i] 280 | row[-1] = 2 281 | row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] 282 | subset = np.vstack([subset, row]) 283 | 284 | # delete som rows of subset which has few parts occur 285 | deleteIdx = [] 286 | for i in range(len(subset)): 287 | if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: 288 | deleteIdx.append(i) 289 | subset = np.delete(subset, deleteIdx, axis=0) 290 | 291 | # draw points 292 | canvas = cv2.imread(input_path) 293 | for i in range(18): 294 | for j in range(len(all_peaks[i])): 295 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) 296 | 297 | # draw lines 298 | for i in range(17): 299 | for n in range(len(subset)): 300 | index = subset[n][np.array(limbSeq[i]) - 1] 301 | if -1 in index: 302 | continue 303 | cur_canvas = canvas.copy() 304 | Y = candidate[index.astype(int), 0] 305 | X = candidate[index.astype(int), 1] 306 | mX = np.mean(X) 307 | mY = np.mean(Y) 308 | length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 309 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) 310 | polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) 311 | cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) 312 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) 313 | 314 | return canvas 315 | 316 | if __name__ == '__main__': 317 | 318 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 319 | parser = argparse.ArgumentParser() 320 | parser.add_argument('--image', type=str, help='input image', 321 | default='ski.jpg') 322 | parser.add_argument('--output', type=str, default='result.jpg', help='output image') 323 | parser.add_argument('--model', type=str, help='path to the weights file', 324 | default = '/home/xiangyu/data/pretrain/COCO/coco_pose_iter_440000.pth.tar') 325 | 326 | args = parser.parse_args() 327 | input_image = args.image 328 | output = args.output 329 | 330 | # load model 331 | model = construct_model(args) 332 | 333 | tic = time.time() 334 | print('start processing...') 335 | 336 | # generate image with body parts 337 | canvas = process(model, input_image) 338 | 339 | toc = time.time() 340 | print ('processing time is %.5f' % (toc - tic)) 341 | 342 | cv2.imwrite(output, canvas) 343 | -------------------------------------------------------------------------------- /evaluation/test_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pylab as plt 3 | import cv2 4 | import time 5 | import math 6 | import torch 7 | import sys 8 | sys.path.append('/data/xiaobing.wang/qy.feng/pytorch_openpose/') 9 | #sys.path.append('/data/xiaobing.wang/ECCV_2017/Pytorch_RMPE/') 10 | import pose_estimation 11 | from cStringIO import StringIO 12 | from scipy.ndimage.filters import gaussian_filter 13 | 14 | 15 | limbSeq = [[3,4], [4,5], [6,7], [7,8], [9,10], [10,11], [12,13], [13,14], [1,2], [2,9], [2,12], [2,3], [2,6], \ 16 | [3,17],[6,18],[1,16],[1,15],[16,18],[15,17]] 17 | 18 | mapIdx = [[19,20],[21,22],[23,24],[25,26],[27,28],[29,30],[31,32],[33,34],[35,36],[37,38],[39,40], \ 19 | [41,42],[43,44],[45,46],[47,48],[49,50],[51,52],[53,54],[55,56]] 20 | 21 | colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ 22 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ 23 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] 24 | 25 | boxsize = 368 26 | scale_search = [0.5, 1.0, 1.5, 2.0] 27 | stride = 8 28 | padValue = 0. 29 | thre_point = 0.15 30 | thre_line = 0.05 31 | stickwidth = 4 32 | 33 | 34 | def construct_model(args): 35 | 36 | model = pose_estimation.PoseModel(num_point=19, num_vector=19) 37 | state_dict = torch.load(args.model)['state_dict'] 38 | from collections import OrderedDict 39 | new_state_dict = OrderedDict() 40 | for k, v in state_dict.items(): 41 | if k in model.state_dict().keys(): 42 | print('o', k) 43 | new_state_dict[k] = v 44 | else: 45 | print('x', k) 46 | name = k[7:] 47 | new_state_dict[name] = v 48 | state_dict = model.state_dict() 49 | 50 | state_dict.update(new_state_dict) 51 | model.load_state_dict(state_dict) 52 | model = model.cuda() 53 | model.eval() 54 | 55 | return model 56 | 57 | def padRightDownCorner(img, stride, padValue): 58 | 59 | h = img.shape[0] 60 | w = img.shape[1] 61 | 62 | pad = 4 * [None] 63 | pad[0] = 0 # up 64 | pad[1] = 0 # left 65 | pad[2] = 0 if (h%stride==0) else stride - (h % stride) # down 66 | pad[3] = 0 if (w%stride==0) else stride - (w % stride) # right 67 | 68 | img_padded = img 69 | pad_up = np.tile(img_padded[0:1,:,:]*0 + padValue, (pad[0], 1, 1)) 70 | img_padded = np.concatenate((pad_up, img_padded), axis=0) 71 | pad_left = np.tile(img_padded[:,0:1,:]*0 + padValue, (1, pad[1], 1)) 72 | img_padded = np.concatenate((pad_left, img_padded), axis=1) 73 | pad_down = np.tile(img_padded[-2:-1,:,:]*0 + padValue, (pad[2], 1, 1)) 74 | img_padded = np.concatenate((img_padded, pad_down), axis=0) 75 | pad_right = np.tile(img_padded[:,-2:-1,:]*0 + padValue, (1, pad[3], 1)) 76 | img_padded = np.concatenate((img_padded, pad_right), axis=1) 77 | 78 | return img_padded, pad 79 | 80 | 81 | def normalize(origin_img): 82 | 83 | origin_img = np.array(origin_img, dtype=np.float32) 84 | origin_img -= 128.0 85 | origin_img /= 256.0 86 | 87 | return origin_img 88 | 89 | 90 | def process(model, input_path): 91 | 92 | origin_img = cv2.imread(input_path) 93 | normed_img = normalize(origin_img) 94 | 95 | height, width, _ = normed_img.shape 96 | 97 | multiplier = [x * boxsize / height for x in scale_search] 98 | 99 | heatmap_avg = np.zeros((height, width, 19)) # num_point 100 | paf_avg = np.zeros((height, width, 38)) # num_vector 101 | 102 | for m in range(len(multiplier)): 103 | scale = multiplier[m] 104 | 105 | # preprocess 106 | imgToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 107 | imgToTest_padded, pad = padRightDownCorner(imgToTest, stride, padValue) 108 | 109 | input_img = np.transpose(imgToTest_padded[:,:,:,np.newaxis], (3, 2, 0, 1)) # required shape (1, c, h, w) 110 | mask = np.ones((1, 1, input_img.shape[2] / stride, input_img.shape[3] / stride), dtype=np.float32) 111 | 112 | input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda()) 113 | mask_var = torch.autograd.Variable(torch.from_numpy(mask).cuda()) 114 | 115 | # get the features 116 | vec1, heat1, vec2, heat2, vec3, heat3, vec4, heat4, vec5, heat5, vec6, heat6 = model(input_var, mask_var) 117 | 118 | # get the heatmap 119 | heatmap = heat6.data.cpu().numpy() 120 | heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c) 121 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 122 | heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :] 123 | heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC) 124 | heatmap_avg = heatmap_avg + heatmap / len(multiplier) 125 | 126 | # get the paf 127 | paf = vec6.data.cpu().numpy() 128 | paf = np.transpose(np.squeeze(paf), (1, 2, 0)) # (h, w, c) 129 | paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 130 | paf = paf[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :] 131 | paf = cv2.resize(paf, (width, height), interpolation=cv2.INTER_CUBIC) 132 | paf_avg = paf_avg + paf / len(multiplier) 133 | 134 | all_peaks = [] # all of the possible points by classes. 135 | peak_counter = 0 136 | 137 | for part in range(1, 19): 138 | map_ori = heatmap_avg[:, :, part] 139 | map = gaussian_filter(map_ori, sigma=3) 140 | 141 | map_left = np.zeros(map.shape) 142 | map_left[:, 1:] = map[:, :-1] 143 | map_right = np.zeros(map.shape) 144 | map_right[:, :-1] = map[:, 1:] 145 | map_up = np.zeros(map.shape) 146 | map_up[1:, :] = map[:-1, :] 147 | map_down = np.zeros(map.shape) 148 | map_down[:-1, :] = map[1:, :] 149 | 150 | # get the salient point and its score > thre_point 151 | peaks_binary = np.logical_and.reduce( 152 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre_point)) 153 | peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # (w, h) 154 | 155 | # a point format: (w, h, score, number) 156 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 157 | id = range(peak_counter, peak_counter + len(peaks)) 158 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i], ) for i in range(len(id))] 159 | 160 | all_peaks.append(peaks_with_score_and_id) 161 | peak_counter += len(peaks) 162 | 163 | connection_all = [] # save all of the possible lines by classes. 164 | special_k = [] # save the lines, which haven't legal points. 165 | mid_num = 10 # could adjust to accelerate (small) or improve accuracy(large). 166 | 167 | for k in range(len(mapIdx)): 168 | 169 | score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]] 170 | candA = all_peaks[limbSeq[k][0] - 1] 171 | candB = all_peaks[limbSeq[k][1] - 1] 172 | 173 | lenA = len(candA) 174 | lenB = len(candB) 175 | 176 | if lenA != 0 and lenB != 0: 177 | connection_candidate = [] 178 | for i in range(lenA): 179 | for j in range(lenB): 180 | vec = np.subtract(candB[j][:2], candA[i][:2]) # the vector of BA 181 | norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) 182 | if norm == 0: 183 | continue 184 | vec = np.divide(vec, norm) 185 | 186 | startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), np.linspace(candA[i][1], candB[j][1], num=mid_num))) 187 | 188 | # get the vector between A and B. 189 | vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] for I in range(len(startend))]) 190 | vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] for I in range(len(startend))]) 191 | 192 | score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) 193 | score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(0.5 * height / norm - 1, 0) # ??? 194 | criterion1 = len(np.nonzero(score_midpts > thre_line)[0]) > 0.8 * len(score_midpts) 195 | criterion2 = score_with_dist_prior > 0 196 | if criterion1 and criterion2: 197 | connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]]) 198 | 199 | # sort the possible line from large to small order. 200 | connection_candidate = sorted(connection_candidate, key=lambda x: x[3], reverse=True) # different from openpose, I think there should be sorted by x[3] 201 | connection = np.zeros((0, 5)) 202 | 203 | for c in range(len(connection_candidate)): 204 | i, j, s = connection_candidate[c][0: 3] 205 | if (i not in connection[:, 3] and j not in connection[:, 4]): 206 | # the number of A point, the number of B point, score, A point, B point 207 | connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) 208 | if len(connection) >= min(lenA, lenB): 209 | break 210 | connection_all.append(connection) 211 | else: 212 | special_k.append(k) 213 | connection_all.append([]) 214 | 215 | subset = -1 * np.ones((0, 20)) 216 | candidate = np.array([item for sublist in all_peaks for item in sublist]) 217 | 218 | for k in range(len(mapIdx)): 219 | if k not in special_k: 220 | partAs = connection_all[k][:, 0] 221 | partBs = connection_all[k][:, 1] 222 | indexA, indexB = np.array(limbSeq[k]) - 1 223 | 224 | for i in range(len(connection_all[k])): 225 | found = 0 226 | flag = [False, False] 227 | subset_idx = [-1, -1] 228 | for j in range(len(subset)): 229 | # fix the bug, found == 2 and not joint will lead someone occur more than once. 230 | # if more than one, we choose the subset, which has a higher score. 231 | if subset[j][indexA] == partAs[i]: 232 | if flag[0] == False: 233 | flag[0] = found 234 | subset_idx[found] = j 235 | flag[0] = True 236 | found += 1 237 | else: 238 | ids = subset_idx[flag[0]] 239 | if subset[ids][-1] < subset[j][-1]: 240 | subset_idx[flag[0]] = j 241 | if subset[j][indexB] == partBs[i]: 242 | if flag[1] == False: 243 | flag[1] = found 244 | subset_idx[found] = j 245 | flag[1] = True 246 | found += 1 247 | else: 248 | ids = subset_idx[flag[1]] 249 | if subset[ids][-1] < subset[j][-1]: 250 | subset_idx[flag[1]] = j 251 | 252 | if found == 1: 253 | j = subset_idx[0] 254 | if (subset[j][indexB] != partBs[i]): 255 | subset[j][indexB] = partBs[i] 256 | subset[j][-1] += 1 257 | subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 258 | elif found == 2: # if found equals to 2 and disjoint, merge them 259 | j1, j2 = subset_idx 260 | membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] 261 | if len(np.nonzero(membership == 2)[0]) == 0: # merge 262 | subset[j1][:-2] += (subset[j2][:-2] + 1) 263 | subset[j1][-2:] += subset[j2][-2:] 264 | subset[j1][-2] += connection_all[k][i][2] 265 | subset = np.delete(subset, j2, 0) 266 | else: # as like found == 1 267 | subset[j1][indexB] = partBs[i] 268 | subset[j1][-1] += 1 269 | subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 270 | elif not found and k < 17: 271 | row = -1 * np.ones(20) 272 | row[indexA] = partAs[i] 273 | row[indexB] = partBs[i] 274 | row[-1] = 2 275 | row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] 276 | subset = np.vstack([subset, row]) 277 | 278 | # delete som rows of subset which has few parts occur 279 | deleteIdx = [] 280 | for i in range(len(subset)): 281 | if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: 282 | deleteIdx.append(i) 283 | subset = np.delete(subset, deleteIdx, axis=0) 284 | 285 | result = [] 286 | for n in range(len(subset)): 287 | person = [] 288 | for i in range(0,18): 289 | index = subset[n][i] 290 | if index==-1: 291 | continue 292 | ID = i 293 | Y = candidate[index.astype(int), 0] 294 | X = candidate[index.astype(int), 1] 295 | score = candidate[index.astype(int), 2] 296 | person.append([ID,X,Y,score]) 297 | final_score = subset[n][-1] 298 | 299 | #print 'final_score', final_score 300 | result.append([person, final_score]) 301 | 302 | return result 303 | -------------------------------------------------------------------------------- /experiments/baseline/config.yml: -------------------------------------------------------------------------------- 1 | workers: 6 2 | weight_decay: 0.0005 3 | momentum: 0.9 4 | display: 50 5 | max_iter: 160000 6 | batch_size: 10 7 | test_interval: 50 8 | topk: 3 9 | base_lr: 0.00004 10 | start_iters: 0 11 | best_model: 12345678.9 12 | #-------------lr_policy--------------------# 13 | # step 14 | lr_policy: 'step' 15 | policy_parameter: 16 | gamma: 0.333 17 | step_size: 50000 18 | #53100 19 | # exp 20 | # lr_policy: 'exp' 21 | # policy_parameter: 22 | # gamma: 0.99 23 | # 24 | # inv 25 | # lr_policy: 'inv' 26 | # policy_parameter: 27 | # gamma: 0.1 28 | # power: 0.1 29 | # 30 | # multistep 31 | #lr_policy: 'multistep' 32 | #policy_parameter: 33 | # stepvalue: #[20000, 35000, 45000] 34 | # gamma: 0.33#0.1 35 | # 36 | # poly 37 | #lr_policy: 'poly' 38 | #policy_parameter: 39 | #power: 1.5 40 | #max_iter: 250000 41 | # 42 | # sigmoid 43 | # lr_policy: 'sigmoid' 44 | # policy_parameter: 45 | # gamma: 0.9 46 | # stepsize: 5000 47 | #lr_policy: 'multistep-poly' 48 | #policy_parameter: 49 | #stepvalue: [12435, 24870, 37350, 49740] 50 | #max_iter: 62175 51 | #gamma: 0.333 52 | #power: 1.2 53 | -------------------------------------------------------------------------------- /experiments/baseline/train_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import torch.backends.cudnn as cudnn 5 | import torch.optim 6 | import os 7 | import sys 8 | import argparse 9 | import time 10 | 11 | sys.path.append('../../') 12 | import coco_loader 13 | import Mytransforms 14 | from utils import * 15 | import vis_util 16 | import pose_estimation 17 | from logger import Logger 18 | 19 | 20 | def parse(): 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('--config', type=str, 23 | dest='config', help='to set the parameters', 24 | default='config.yml') 25 | parser.add_argument('--gpu', default=[0], nargs='+', type=int, 26 | dest='gpu', help='the gpu used') 27 | parser.add_argument('--pretrained', default=None, type=str, 28 | dest='pretrained', help='the path of pretrained model') 29 | 30 | parser.add_argument('--snapshot', type=str, 31 | dest='snapshot', help='resume model', 32 | #default='/home/xiangyu/samsung_pose/experiments/baseline/40000_val.pth.tar' 33 | default=None 34 | ) 35 | 36 | parser.add_argument('--root', type=str, 37 | dest='root', help='the root of images', 38 | default='/data/root/data/coco/images/val2014') 39 | parser.add_argument('--train_dir', nargs='+', type=str, 40 | dest='train_dir', help='the path of train file', 41 | # default=['/home/xiangyu/data/samsung_pose_data/img_list/valminusminival2014.txt', 42 | # '/home/xiangyu/data/samsung_pose_data/mask_list/valminusminival2014.txt', 43 | # '/home/xiangyu/data/samsung_pose_data/json/valminusminival2014.json'] 44 | default=['/home/xiangyu/data/samsung_pose_data_train/img_list/train2014.txt', 45 | '/home/xiangyu/data/samsung_pose_data_train/mask_list/train2014.txt', 46 | '/home/xiangyu/data/samsung_pose_data_train/json/train2014.json'] 47 | ) 48 | parser.add_argument('--val_dir', nargs='+', type=str, 49 | dest='val_dir', help='the path of val file', 50 | default=['/home/xiangyu/data/samsung_pose_data/img_list/minival2014.txt', 51 | '/home/xiangyu/data/samsung_pose_data/mask_list/minival2014.txt', 52 | '/home/xiangyu/data/samsung_pose_data/json/minival2014.json']) 53 | parser.add_argument('--num_classes', default=1000, type=int, 54 | dest='num_classes', help='num_classes (default: 1000)') 55 | parser.add_argument('--logdir', default='./logs', type=str, 56 | dest='logdir', help='path of log') 57 | return parser.parse_args() 58 | 59 | 60 | def construct_model(args): 61 | if not args.snapshot: 62 | model = pose_estimation.PoseModel(num_point=19, num_vector=19, pretrained=True) 63 | else: 64 | print('--------load model from {}----------------'.format(args.snapshot)) 65 | model = pose_estimation.PoseModel(num_point=19, num_vector=19, pretrained=True) 66 | state_dict = torch.load(args.snapshot)['state_dict'] 67 | model.load_state_dict(state_dict) 68 | # if not args.pretrained: 69 | # model = pose_estimation.PoseModel(num_point=19, num_vector=19, pretrained=True) 70 | # else: 71 | # state_dict = torch.load(args.pretrained)['state_dict'] 72 | # from collections import OrderedDict 73 | # new_state_dict = OrderedDict() 74 | # for k, v in state_dict.items(): 75 | # name = k[7:] 76 | # new_state_dict[name] = v 77 | # model.load_state_dict(new_state_dict) 78 | 79 | # os.environ['CUDA_VISIBLE_DEVICES'] = ','.join([str(gpu) for gpu in args.gpu]) 80 | # model = torch.nn.DataParallel(model, device_ids=range(len(args.gpu))).cuda() 81 | model.cuda() # single gpu 82 | 83 | return model 84 | 85 | 86 | def get_parameters(model, config, isdefault=True): 87 | if isdefault: 88 | return model.parameters(), [1.] 89 | lr_1 = [] 90 | lr_2 = [] 91 | lr_4 = [] 92 | lr_8 = [] 93 | params_dict = dict(model.named_parameters()) 94 | for key, value in params_dict.items(): 95 | if ('model1_' not in key) and ('model0.' not in key): 96 | if key[-4:] == 'bias': 97 | lr_8.append(value) 98 | else: 99 | lr_4.append(value) 100 | elif key[-4:] == 'bias': 101 | lr_2.append(value) 102 | else: 103 | lr_1.append(value) 104 | params = [{'params': lr_1, 'lr': config.base_lr}, 105 | {'params': lr_2, 'lr': config.base_lr * 2.}, 106 | {'params': lr_4, 'lr': config.base_lr * 4.}, 107 | {'params': lr_8, 'lr': config.base_lr * 8.}] 108 | 109 | return params, [1., 2., 4., 8.] 110 | 111 | 112 | def to_np(x): 113 | return x.data.cpu().numpy() 114 | 115 | 116 | def train_val(model, args): 117 | traindir = args.train_dir 118 | valdir = args.val_dir 119 | 120 | config = Config(args.config) 121 | cudnn.benchmark = True 122 | 123 | # Set the logger 124 | logger = Logger('./log') 125 | 126 | train_loader = torch.utils.data.DataLoader( 127 | coco_loader.coco_loader(traindir, 8, 128 | Mytransforms.Compose([Mytransforms.RandomResized(), 129 | Mytransforms.RandomRotate(40), 130 | Mytransforms.RandomCrop(368), 131 | Mytransforms.RandomHorizontalFlip(), 132 | ])), 133 | batch_size=config.batch_size, shuffle=True, 134 | num_workers=config.workers, pin_memory=True) 135 | 136 | if config.test_interval != 0 and args.val_dir is not None: 137 | val_loader = torch.utils.data.DataLoader( 138 | coco_loader.coco_loader(valdir, 8, 139 | Mytransforms.Compose([Mytransforms.TestResized(368), 140 | ])), 141 | batch_size=4, shuffle=False, 142 | num_workers=config.workers, pin_memory=True) 143 | 144 | criterion = nn.MSELoss().cuda() 145 | 146 | params, multiple = get_parameters(model, config, False) 147 | 148 | optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum, 149 | weight_decay=config.weight_decay) 150 | 151 | batch_time = AverageMeter() 152 | data_time = AverageMeter() 153 | losses = AverageMeter() 154 | losses_list = [AverageMeter() for i in range(12)] 155 | top1 = AverageMeter() 156 | topk = AverageMeter() 157 | 158 | end = time.time() 159 | iters = config.start_iters 160 | best_model = config.best_model 161 | learning_rate = config.base_lr 162 | 163 | model.train() 164 | 165 | heat_weight = 46 * 46 * 19 / 2.0 # for convenient to compare with origin code 166 | vec_weight = 46 * 46 * 38 / 2.0 167 | 168 | while iters < config.max_iter: 169 | # ---------------------------------------------------- train ------------------------------------------ 170 | for i, (input, heatmap, vecmap, mask, kpt) in enumerate(train_loader): 171 | 172 | learning_rate = adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy, 173 | policy_parameter=config.policy_parameter, multiple=multiple) 174 | data_time.update(time.time() - end) 175 | 176 | input = input.cuda(async=True) 177 | heatmap = heatmap.cuda(async=True) 178 | vecmap = vecmap.cuda(async=True) 179 | mask = mask.cuda(async=True) 180 | 181 | input_var = torch.autograd.Variable(input) 182 | heatmap_var = torch.autograd.Variable(heatmap) 183 | vecmap_var = torch.autograd.Variable(vecmap) 184 | mask_var = torch.autograd.Variable(mask) 185 | 186 | vec1, heat1, vec2, heat2, vec3, heat3, vec4, heat4, vec5, heat5, vec6, heat6 = model(input_var, mask_var) 187 | loss1_1 = criterion(vec1, vecmap_var) * vec_weight 188 | loss1_2 = criterion(heat1, heatmap_var) * heat_weight 189 | loss2_1 = criterion(vec2, vecmap_var) * vec_weight 190 | loss2_2 = criterion(heat2, heatmap_var) * heat_weight 191 | loss3_1 = criterion(vec3, vecmap_var) * vec_weight 192 | loss3_2 = criterion(heat3, heatmap_var) * heat_weight 193 | loss4_1 = criterion(vec4, vecmap_var) * vec_weight 194 | loss4_2 = criterion(heat4, heatmap_var) * heat_weight 195 | loss5_1 = criterion(vec5, vecmap_var) * vec_weight 196 | loss5_2 = criterion(heat5, heatmap_var) * heat_weight 197 | loss6_1 = criterion(vec6, vecmap_var) * vec_weight 198 | loss6_2 = criterion(heat6, heatmap_var) * heat_weight 199 | 200 | loss = loss1_1 + loss1_2 + loss2_1 + loss2_2 + loss3_1 + loss3_2 + loss4_1 + loss4_2 + loss5_1 + loss5_2 + loss6_1 + loss6_2 201 | 202 | losses.update(loss.data[0], input.size(0)) 203 | loss_list = [loss1_1, loss1_2, loss2_1, loss2_2, loss3_1, loss3_2, loss4_1, loss4_2, loss5_1, loss5_2, 204 | loss6_1, loss6_2] 205 | for cnt, l in enumerate(loss_list): 206 | losses_list[cnt].update(l.data[0], input.size(0)) 207 | 208 | optimizer.zero_grad() 209 | loss.backward() 210 | optimizer.step() 211 | 212 | batch_time.update(time.time() - end) 213 | end = time.time() 214 | 215 | iters += 1 216 | if iters % config.display == 0: 217 | print('Train Iteration: {0}\t' 218 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 219 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 220 | 'Learning rate = {2}\n' 221 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( 222 | iters, config.display, learning_rate, batch_time=batch_time, 223 | data_time=data_time, loss=losses)) 224 | for cnt in range(0, 12, 2): 225 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})\t' 226 | 'Loss{1}_2 = {loss2.val:.8f} (ave = {loss2.avg:.8f})'.format(cnt / 2 + 1, cnt / 2 + 1, 227 | loss1=losses_list[cnt], 228 | loss2=losses_list[cnt + 1])) 229 | print(time.strftime( 230 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n', 231 | time.localtime())) 232 | 233 | batch_time.reset() 234 | data_time.reset() 235 | losses.reset() 236 | for cnt in range(12): 237 | losses_list[cnt].reset() 238 | 239 | # ------------------------------------------ val --------------------------------------------------------------------- 240 | # #if config.test_interval != 0 and args.val_dir is not None and iters % config.test_interval == 0: 241 | # if True: 242 | # model.eval() 243 | # for j, (input, heatmap, vecmap, mask, kpt) in enumerate(val_loader): 244 | # imgs = input.numpy() 245 | # heatmap = heatmap.numpy() 246 | # vecmap = vecmap.numpy() 247 | # mask = mask.numpy() 248 | # 249 | # canvas_targs = np.zeros(imgs.shape) 250 | # canvas_preds = np.zeros(imgs.shape) 251 | # 252 | # for i in range(len(imgs)): 253 | # img = imgs[i] 254 | # img = img.transpose(1, 2, 0) # 368, 368, 3 255 | # img = (img + 1) / 2 * 255 256 | # 257 | # # visualize GT by kpts 258 | # # canvas_kpts = img.copy() 259 | # # vis_util.draw_kpts(canvas_kpts, kpts) 260 | # 261 | # # visualize results derived from target 262 | # canvas_targ = img.copy() 263 | # canvas_targ = vis_util.draw_result(heatmap[i], vecmap[i], canvas_targ) 264 | # canvas_targ = canvas_targ.transpose(2, 0, 1) 265 | # canvas_targs[i] = canvas_targ 266 | # 267 | # # visualize predicted results 268 | # input = input.cuda(async=True) 269 | # input_var = torch.autograd.Variable(input, volatile=True) 270 | # mask_white = np.ones((mask.shape), dtype=np.float32) 271 | # mask_white_var = torch.autograd.Variable(torch.from_numpy(mask_white).cuda()) 272 | # 273 | # vec1, heat1, vec2, heat2, vec3, heat3, vec4, heat4, vec5, heat5, vec6, heat6 \ 274 | # = model(input_var,mask_white_var) 275 | # 276 | # heat_out = heat6.data.cpu().numpy() 277 | # vec_out = vec6.data.cpu().numpy() 278 | # 279 | # for i in range(len(imgs)): 280 | # img = imgs[i] 281 | # img = img.transpose(1, 2, 0) # 368, 368, 3 282 | # img = (img + 1) / 2 * 255 283 | # 284 | # canvas_pred = img.copy() 285 | # canvas_pred = vis_util.draw_result(heat_out[i], vec_out[i], canvas_pred) 286 | # canvas_pred = canvas_pred.transpose(2, 0, 1) 287 | # canvas_preds[i] = canvas_pred 288 | # 289 | # ## Log images 290 | # 291 | # imgs = { 292 | # 'target': canvas_targs, 293 | # 'predict': canvas_preds 294 | # } 295 | # for tag, images in imgs.items(): 296 | # logger.image_summary(tag, images, 0) 297 | # 298 | # #break 299 | # 300 | # 301 | # model.train() 302 | 303 | if iters % 5000 == 0: 304 | torch.save({ 305 | 'iter': iters, 306 | 'state_dict': model.state_dict(), 307 | }, str(iters) + '.pth.tar') 308 | 309 | if iters == config.max_iter: 310 | break 311 | 312 | 313 | if __name__ == '__main__': 314 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 315 | args = parse() 316 | model = construct_model(args) 317 | train_val(model, args) -------------------------------------------------------------------------------- /experiments/fpn/config.yml: -------------------------------------------------------------------------------- 1 | workers: 6 2 | weight_decay: 0.0005 3 | momentum: 0.9 4 | display: 50 5 | max_iter: 160000 6 | batch_size: 10 7 | test_interval: 50 8 | topk: 3 9 | base_lr: 0.00004 10 | start_iters: 0 11 | best_model: 12345678.9 12 | #-------------lr_policy--------------------# 13 | # step 14 | lr_policy: 'step' 15 | policy_parameter: 16 | gamma: 0.333 17 | step_size: 50000 18 | #53100 19 | # exp 20 | # lr_policy: 'exp' 21 | # policy_parameter: 22 | # gamma: 0.99 23 | # 24 | # inv 25 | # lr_policy: 'inv' 26 | # policy_parameter: 27 | # gamma: 0.1 28 | # power: 0.1 29 | # 30 | # multistep 31 | #lr_policy: 'multistep' 32 | #policy_parameter: 33 | # stepvalue: #[20000, 35000, 45000] 34 | # gamma: 0.33#0.1 35 | # 36 | # poly 37 | #lr_policy: 'poly' 38 | #policy_parameter: 39 | #power: 1.5 40 | #max_iter: 250000 41 | # 42 | # sigmoid 43 | # lr_policy: 'sigmoid' 44 | # policy_parameter: 45 | # gamma: 0.9 46 | # stepsize: 5000 47 | #lr_policy: 'multistep-poly' 48 | #policy_parameter: 49 | #stepvalue: [12435, 24870, 37350, 49740] 50 | #max_iter: 62175 51 | #gamma: 0.333 52 | #power: 1.2 53 | -------------------------------------------------------------------------------- /experiments/fpn/train_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import torch.backends.cudnn as cudnn 5 | import torch.optim 6 | import os 7 | import sys 8 | import argparse 9 | import time 10 | 11 | sys.path.append('../../') 12 | import coco_loader 13 | import Mytransforms 14 | from utils import * 15 | import vis_util 16 | import pose_estimation 17 | #from logger import Logger 18 | import model.fpn as fpn 19 | 20 | 21 | def parse(): 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--config', type=str, 24 | dest='config', help='to set the parameters', 25 | default='config.yml') 26 | parser.add_argument('--gpu', default=[0], nargs='+', type=int, 27 | dest='gpu', help='the gpu used') 28 | parser.add_argument('--pretrained', default=None, type=str, 29 | dest='pretrained', help='the path of pretrained model') 30 | 31 | parser.add_argument('--snapshot', type=str, 32 | dest='snapshot', help='resume model', 33 | #default='/home/xiangyu/samsung_pose/experiments/baseline/40000_val.pth.tar' 34 | default=None 35 | ) 36 | 37 | parser.add_argument('--root', type=str, 38 | dest='root', help='the root of images', 39 | default='/data/root/data/coco/images/val2014') 40 | parser.add_argument('--train_dir', nargs='+', type=str, 41 | dest='train_dir', help='the path of train file', 42 | # default=['/home/xiangyu/data/samsung_pose_data/img_list/valminusminival2014.txt', 43 | # '/home/xiangyu/data/samsung_pose_data/mask_list/valminusminival2014.txt', 44 | # '/home/xiangyu/data/samsung_pose_data/json/valminusminival2014.json'] 45 | default=['/home/xiangyuzhu/workspace/data/coco/samsung_pose_data/img_list/valminusminival2014.txt', 46 | '/home/xiangyuzhu/workspace/data/coco/samsung_pose_data/mask_list/valminusminival2014.txt', 47 | '/home/xiangyuzhu/workspace/data/coco/samsung_pose_data/json/valminusminival2014.json'] 48 | ) 49 | parser.add_argument('--val_dir', nargs='+', type=str, 50 | dest='val_dir', help='the path of val file', 51 | default=['/home/xiangyuzhu/workspace/data/coco/samsung_pose_data/img_list/minival2014.txt', 52 | '/home/xiangyuzhu/workspace/data/coco/samsung_pose_data/mask_list/minival2014.txt', 53 | '/home/xiangyuzhu/workspace/data/coco/samsung_pose_data/json/minival2014.json']) 54 | parser.add_argument('--num_classes', default=1000, type=int, 55 | dest='num_classes', help='num_classes (default: 1000)') 56 | parser.add_argument('--logdir', default='./logs', type=str, 57 | dest='logdir', help='path of log') 58 | return parser.parse_args() 59 | 60 | 61 | def construct_model(args): 62 | if not args.snapshot: 63 | resnet_model = '/home/xiangyuzhu/workspace/data/pretrain/ResNet/resnet50-19c8e357.pth' 64 | print('--------load pretrain model from {}----------------'.format(resnet_model)) 65 | model = fpn.Pose_Estimation(vec_num=38, heat_num=19) 66 | #model.load_weights(resnet_model) 67 | else: 68 | print('--------load snapshot from {}----------------'.format(args.snapshot)) 69 | model = fpn.Pose_Estimation(vec_num=38, heat_num=19) 70 | state_dict = torch.load(args.snapshot)['state_dict'] 71 | model.load_state_dict(state_dict) 72 | 73 | print(model) 74 | model.cuda() 75 | 76 | return model 77 | 78 | 79 | def get_parameters(model, config, isdefault=True): 80 | if isdefault: 81 | return model.parameters(), [1.] 82 | lr_1 = [] 83 | lr_2 = [] 84 | lr_4 = [] 85 | lr_8 = [] 86 | params_dict = dict(model.named_parameters()) 87 | for key, value in params_dict.items(): 88 | if ('model1_' not in key) and ('model0.' not in key): 89 | if key[-4:] == 'bias': 90 | lr_8.append(value) 91 | else: 92 | lr_4.append(value) 93 | elif key[-4:] == 'bias': 94 | lr_2.append(value) 95 | else: 96 | lr_1.append(value) 97 | params = [{'params': lr_1, 'lr': config.base_lr}, 98 | {'params': lr_2, 'lr': config.base_lr * 2.}, 99 | {'params': lr_4, 'lr': config.base_lr * 4.}, 100 | {'params': lr_8, 'lr': config.base_lr * 8.}] 101 | 102 | return params, [1., 2., 4., 8.] 103 | 104 | 105 | def to_np(x): 106 | return x.data.cpu().numpy() 107 | 108 | 109 | def train_val(model, args): 110 | traindir = args.train_dir 111 | valdir = args.val_dir 112 | 113 | config = Config(args.config) 114 | cudnn.benchmark = True 115 | 116 | # Set the logger 117 | #logger = Logger('./log') 118 | 119 | train_loader = torch.utils.data.DataLoader( 120 | coco_loader.coco_loader(traindir, 8, 121 | Mytransforms.Compose([Mytransforms.RandomResized(), 122 | Mytransforms.RandomRotate(40), 123 | Mytransforms.RandomCrop(368), 124 | Mytransforms.RandomHorizontalFlip(), 125 | ])), 126 | batch_size=config.batch_size, shuffle=True, 127 | num_workers=config.workers, pin_memory=True) 128 | 129 | # if config.test_interval != 0 and args.val_dir is not None: 130 | # val_loader = torch.utils.data.DataLoader( 131 | # coco_loader.coco_loader(valdir, 8, 132 | # Mytransforms.Compose([Mytransforms.TestResized(368), 133 | # ])), 134 | # batch_size=4, shuffle=False, 135 | # num_workers=config.workers, pin_memory=True) 136 | 137 | criterion = nn.MSELoss().cuda() 138 | 139 | params, multiple = get_parameters(model, config, False) 140 | 141 | optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum, 142 | weight_decay=config.weight_decay) 143 | 144 | batch_time = AverageMeter() 145 | data_time = AverageMeter() 146 | losses = AverageMeter() 147 | losses_list = [AverageMeter() for i in range(12)] 148 | top1 = AverageMeter() 149 | topk = AverageMeter() 150 | 151 | end = time.time() 152 | iters = config.start_iters 153 | best_model = config.best_model 154 | learning_rate = config.base_lr 155 | 156 | model.train() 157 | 158 | heat_weight = 46 * 46 * 19 / 2.0 # for convenient to compare with origin code 159 | vec_weight = 46 * 46 * 38 / 2.0 160 | 161 | while iters < config.max_iter: 162 | # ---------------------------------------------------- train ------------------------------------------ 163 | for i, (input, heatmap, vecmap, mask, kpt) in enumerate(train_loader): 164 | 165 | learning_rate = adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy, 166 | policy_parameter=config.policy_parameter, multiple=multiple) 167 | data_time.update(time.time() - end) 168 | 169 | input = input.cuda(async=True) 170 | heatmap = heatmap.cuda(async=True) 171 | vecmap = vecmap.cuda(async=True) 172 | mask = mask.cuda(async=True) 173 | 174 | input_var = torch.autograd.Variable(input) 175 | heatmap_var = torch.autograd.Variable(heatmap) 176 | vecmap_var = torch.autograd.Variable(vecmap) 177 | mask_var = torch.autograd.Variable(mask) 178 | 179 | #vec1, heat1, vec2, heat2, vec3, heat3, vec4, heat4, vec5, heat5, vec6, heat6 = model(input_var, mask_var) 180 | vec1, heat1, vec2, heat2 = model(input_var, mask_var) 181 | loss1_1 = criterion(vec1, vecmap_var) * vec_weight 182 | loss1_2 = criterion(heat1, heatmap_var) * heat_weight 183 | loss2_1 = criterion(vec2, vecmap_var) * vec_weight 184 | loss2_2 = criterion(heat2, heatmap_var) * heat_weight 185 | # loss3_1 = criterion(vec3, vecmap_var) * vec_weight 186 | # loss3_2 = criterion(heat3, heatmap_var) * heat_weight 187 | # loss4_1 = criterion(vec4, vecmap_var) * vec_weight 188 | # loss4_2 = criterion(heat4, heatmap_var) * heat_weight 189 | # loss5_1 = criterion(vec5, vecmap_var) * vec_weight 190 | # loss5_2 = criterion(heat5, heatmap_var) * heat_weight 191 | # loss6_1 = criterion(vec6, vecmap_var) * vec_weight 192 | # loss6_2 = criterion(heat6, heatmap_var) * heat_weight 193 | 194 | #loss = loss1_1 + loss1_2 + loss2_1 + loss2_2 + loss3_1 + loss3_2 + loss4_1 + loss4_2 + loss5_1 + loss5_2 + loss6_1 + loss6_2 195 | loss = loss1_1 + loss1_2 + loss2_1 + loss2_2 196 | 197 | losses.update(loss.data[0], input.size(0)) 198 | # loss_list = [loss1_1, loss1_2, loss2_1, loss2_2, loss3_1, loss3_2, loss4_1, loss4_2, loss5_1, loss5_2, 199 | # loss6_1, loss6_2] 200 | loss_list = [loss1_1, loss1_2, loss2_1, loss2_2] 201 | for cnt, l in enumerate(loss_list): 202 | losses_list[cnt].update(l.data[0], input.size(0)) 203 | 204 | optimizer.zero_grad() 205 | loss.backward() 206 | optimizer.step() 207 | 208 | batch_time.update(time.time() - end) 209 | end = time.time() 210 | 211 | iters += 1 212 | if iters % config.display == 0: 213 | print('Train Iteration: {0}\t' 214 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 215 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 216 | 'Learning rate = {2}\n' 217 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( 218 | iters, config.display, learning_rate, batch_time=batch_time, 219 | data_time=data_time, loss=losses)) 220 | for cnt in range(0, 12, 2): 221 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})\t' 222 | 'Loss{1}_2 = {loss2.val:.8f} (ave = {loss2.avg:.8f})'.format(cnt / 2 + 1, cnt / 2 + 1, 223 | loss1=losses_list[cnt], 224 | loss2=losses_list[cnt + 1])) 225 | print(time.strftime( 226 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n', 227 | time.localtime())) 228 | 229 | batch_time.reset() 230 | data_time.reset() 231 | losses.reset() 232 | for cnt in range(12): 233 | losses_list[cnt].reset() 234 | 235 | # ------------------------------------------ val --------------------------------------------------------------------- 236 | # #if config.test_interval != 0 and args.val_dir is not None and iters % config.test_interval == 0: 237 | # if True: 238 | # model.eval() 239 | # for j, (input, heatmap, vecmap, mask, kpt) in enumerate(val_loader): 240 | # imgs = input.numpy() 241 | # heatmap = heatmap.numpy() 242 | # vecmap = vecmap.numpy() 243 | # mask = mask.numpy() 244 | # 245 | # canvas_targs = np.zeros(imgs.shape) 246 | # canvas_preds = np.zeros(imgs.shape) 247 | # 248 | # for i in range(len(imgs)): 249 | # img = imgs[i] 250 | # img = img.transpose(1, 2, 0) # 368, 368, 3 251 | # img = (img + 1) / 2 * 255 252 | # 253 | # # visualize GT by kpts 254 | # # canvas_kpts = img.copy() 255 | # # vis_util.draw_kpts(canvas_kpts, kpts) 256 | # 257 | # # visualize results derived from target 258 | # canvas_targ = img.copy() 259 | # canvas_targ = vis_util.draw_result(heatmap[i], vecmap[i], canvas_targ) 260 | # canvas_targ = canvas_targ.transpose(2, 0, 1) 261 | # canvas_targs[i] = canvas_targ 262 | # 263 | # # visualize predicted results 264 | # input = input.cuda(async=True) 265 | # input_var = torch.autograd.Variable(input, volatile=True) 266 | # mask_white = np.ones((mask.shape), dtype=np.float32) 267 | # mask_white_var = torch.autograd.Variable(torch.from_numpy(mask_white).cuda()) 268 | # 269 | # vec1, heat1, vec2, heat2, vec3, heat3, vec4, heat4, vec5, heat5, vec6, heat6 \ 270 | # = model(input_var,mask_white_var) 271 | # 272 | # heat_out = heat6.data.cpu().numpy() 273 | # vec_out = vec6.data.cpu().numpy() 274 | # 275 | # for i in range(len(imgs)): 276 | # img = imgs[i] 277 | # img = img.transpose(1, 2, 0) # 368, 368, 3 278 | # img = (img + 1) / 2 * 255 279 | # 280 | # canvas_pred = img.copy() 281 | # canvas_pred = vis_util.draw_result(heat_out[i], vec_out[i], canvas_pred) 282 | # canvas_pred = canvas_pred.transpose(2, 0, 1) 283 | # canvas_preds[i] = canvas_pred 284 | # 285 | # ## Log images 286 | # 287 | # imgs = { 288 | # 'target': canvas_targs, 289 | # 'predict': canvas_preds 290 | # } 291 | # for tag, images in imgs.items(): 292 | # logger.image_summary(tag, images, 0) 293 | # 294 | # #break 295 | # 296 | # 297 | # model.train() 298 | 299 | if iters % 5000 == 0: 300 | torch.save({ 301 | 'iter': iters, 302 | 'state_dict': model.state_dict(), 303 | }, str(iters) + '.pth.tar') 304 | 305 | if iters == config.max_iter: 306 | break 307 | 308 | 309 | if __name__ == '__main__': 310 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 311 | args = parse() 312 | model = construct_model(args) 313 | train_val(model, args) -------------------------------------------------------------------------------- /logger.py: -------------------------------------------------------------------------------- 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 2 | import tensorflow as tf 3 | import numpy as np 4 | import scipy.misc 5 | 6 | try: 7 | from StringIO import StringIO # Python 2.7 8 | except ImportError: 9 | from io import BytesIO # Python 3.x 10 | 11 | 12 | class Logger(object): 13 | 14 | def __init__(self, log_dir): 15 | """Create a summary writer logging to log_dir.""" 16 | self.writer = tf.summary.FileWriter(log_dir) 17 | 18 | def scalar_summary(self, tag, value, step): 19 | """Log a scalar variable.""" 20 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 21 | self.writer.add_summary(summary, step) 22 | 23 | def image_summary(self, tag, images, step): 24 | """Log a list of images.""" 25 | 26 | img_summaries = [] 27 | for i, img in enumerate(images): 28 | # Write the image to a string 29 | try: 30 | s = StringIO() 31 | except: 32 | s = BytesIO() 33 | scipy.misc.toimage(img).save(s, format="png") 34 | 35 | # Create an Image object 36 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 37 | height=img.shape[0], 38 | width=img.shape[1]) 39 | # Create a Summary value 40 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 41 | 42 | # Create and write Summary 43 | summary = tf.Summary(value=img_summaries) 44 | self.writer.add_summary(summary, step) 45 | 46 | def histo_summary(self, tag, values, step, bins=1000): 47 | """Log a histogram of the tensor of values.""" 48 | 49 | # Create a histogram using numpy 50 | counts, bin_edges = np.histogram(values, bins=bins) 51 | 52 | # Fill the fields of the histogram proto 53 | hist = tf.HistogramProto() 54 | hist.min = float(np.min(values)) 55 | hist.max = float(np.max(values)) 56 | hist.num = int(np.prod(values.shape)) 57 | hist.sum = float(np.sum(values)) 58 | hist.sum_squares = float(np.sum(values ** 2)) 59 | 60 | # Drop the start of the first bin 61 | bin_edges = bin_edges[1:] 62 | 63 | # Add bin edges and counts 64 | for edge in bin_edges: 65 | hist.bucket_limit.append(edge) 66 | for c in counts: 67 | hist.bucket.append(c) 68 | 69 | # Create and write Summary 70 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 71 | self.writer.add_summary(summary, step) 72 | self.writer.flush() -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xiangyu-CAS/Realtime_Multi-Person_Pose_Estimation.PyTorch/79cc5af5878171f83be86ffdb9af1c6ac09116a4/model/__init__.py -------------------------------------------------------------------------------- /model/fpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import math 6 | import os, sys 7 | 8 | ############################################################ 9 | # FPN Graph 10 | ############################################################ 11 | class FPN(nn.Module): 12 | def __init__(self, C1, C2, C3, C4, C5, out_channels): 13 | super(FPN, self).__init__() 14 | self.out_channels = out_channels 15 | self.C1 = C1 16 | self.C2 = C2 17 | self.C3 = C3 18 | self.C4 = C4 19 | self.C5 = C5 20 | self.P6 = nn.MaxPool2d(kernel_size=1, stride=2) 21 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1, padding=0) 22 | self.P5_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 23 | 24 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1, padding=0) 25 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 26 | 27 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1, padding=0) 28 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 29 | 30 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1, padding=0) 31 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 32 | 33 | def forward(self, x): 34 | x = self.C1(x) 35 | x = self.C2(x) 36 | c2_out = x 37 | x = self.C3(x) 38 | c3_out = x 39 | x = self.C4(x) 40 | c4_out = x 41 | x = self.C5(x) 42 | p5_out = self.P5_conv1(x) 43 | p4_out = self.P4_conv1(c4_out) + F.upsample(p5_out, scale_factor=2) 44 | p3_out = self.P3_conv1(c3_out) + F.upsample(p4_out, scale_factor=2) 45 | p2_out = self.P2_conv1(c2_out) + F.upsample(p3_out, scale_factor=2) 46 | 47 | p5_out = self.P5_conv2(p5_out) 48 | p4_out = self.P4_conv2(p4_out) 49 | p3_out = self.P3_conv2(p3_out) 50 | p2_out = self.P2_conv2(p2_out) 51 | 52 | # P6 is used for the 5th anchor scale in RPN. Generated by 53 | # subsampling from P5 with stride of 2. 54 | p6_out = self.P6(p5_out) 55 | 56 | return [p2_out, p3_out, p4_out, p5_out, p6_out] 57 | 58 | 59 | ############################################################ 60 | # Resnet Graph 61 | ############################################################ 62 | 63 | class Bottleneck(nn.Module): 64 | expansion = 4 65 | 66 | def __init__(self, inplanes, planes, stride=1, downsample=None): 67 | super(Bottleneck, self).__init__() 68 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 69 | self.bn1 = nn.BatchNorm2d(planes) 70 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 71 | padding=1, bias=False) 72 | self.bn2 = nn.BatchNorm2d(planes) 73 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 74 | self.bn3 = nn.BatchNorm2d(planes * 4) 75 | self.relu = nn.ReLU(inplace=True) 76 | self.downsample = downsample 77 | self.stride = stride 78 | 79 | def forward(self, x): 80 | residual = x 81 | 82 | out = self.conv1(x) 83 | out = self.bn1(out) 84 | out = self.relu(out) 85 | 86 | out = self.conv2(out) 87 | out = self.bn2(out) 88 | out = self.relu(out) 89 | 90 | out = self.conv3(out) 91 | out = self.bn3(out) 92 | 93 | if self.downsample is not None: 94 | residual = self.downsample(x) 95 | 96 | out += residual 97 | out = self.relu(out) 98 | 99 | return out 100 | 101 | class ResNet(nn.Module): 102 | 103 | def __init__(self, architecture, stage5=False): 104 | super(ResNet, self).__init__() 105 | assert architecture in ["resnet50", "resnet101"] 106 | self.inplanes = 64 107 | self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3] 108 | self.block = Bottleneck 109 | self.stage5 = stage5 110 | 111 | self.C1 = nn.Sequential( 112 | nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False), 113 | nn.BatchNorm2d(64, eps=0.001, momentum=0.01), 114 | nn.ReLU(inplace=True), 115 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 116 | ) 117 | self.C2 = self.make_layer(self.block, 64, self.layers[0]) 118 | self.C3 = self.make_layer(self.block, 128, self.layers[1], stride=2) 119 | self.C4 = self.make_layer(self.block, 256, self.layers[2], stride=2) 120 | if self.stage5: 121 | self.C5 = self.make_layer(self.block, 512, self.layers[3], stride=2) 122 | else: 123 | self.C5 = None 124 | 125 | def forward(self, x): 126 | x = self.C1(x) 127 | x = self.C2(x) 128 | x = self.C3(x) 129 | x = self.C4(x) 130 | x = self.C5(x) 131 | return x 132 | 133 | 134 | def stages(self): 135 | return [self.C1, self.C2, self.C3, self.C4, self.C5] 136 | 137 | def make_layer(self, block, planes, blocks, stride=1): 138 | downsample = None 139 | if stride != 1 or self.inplanes != planes * block.expansion: 140 | downsample = nn.Sequential( 141 | nn.Conv2d(self.inplanes, planes * block.expansion, 142 | kernel_size=1, stride=stride), 143 | nn.BatchNorm2d(planes * block.expansion, eps=0.001, momentum=0.01), 144 | ) 145 | 146 | layers = [] 147 | layers.append(block(self.inplanes, planes, stride, downsample)) 148 | self.inplanes = planes * block.expansion 149 | for i in range(1, blocks): 150 | layers.append(block(self.inplanes, planes)) 151 | 152 | return nn.Sequential(*layers) 153 | 154 | 155 | ############################################################ 156 | # pose estimation Graph 157 | ############################################################ 158 | 159 | 160 | class Pose_Estimation(nn.Module): 161 | def __init__(self, vec_num, heat_num): 162 | super(Pose_Estimation, self).__init__() 163 | resnet = ResNet('resnet50', stage5=True) 164 | C1, C2, C3, C4, C5 = resnet.stages() 165 | self.fpn = FPN(C1, C2, C3, C4, C5, out_channels=256) 166 | 167 | self.block1 = nn.Sequential( 168 | nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1), 169 | nn.ReLU(inplace=True), 170 | nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1), 171 | nn.ReLU(inplace=True), 172 | nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1), 173 | nn.ReLU(inplace=True), 174 | nn.Conv2d(128, 512, kernel_size=1, stride=1, padding=0), 175 | nn.ReLU(inplace=True) 176 | ) 177 | self.predict_L1_stage1 = nn.Conv2d(512, vec_num, kernel_size=1, stride=1, padding=0) 178 | self.predict_L2_stage1 = nn.Conv2d(512, heat_num, kernel_size=1, stride=1, padding=0) 179 | 180 | self.block2 = nn.Sequential( 181 | nn.Conv2d(256 + vec_num + heat_num, 128, kernel_size=7, stride=1, padding=3), 182 | nn.ReLU(inplace=True), 183 | nn.Conv2d(128, 128, kernel_size=7, stride=1, padding=3), 184 | nn.ReLU(inplace=True), 185 | nn.Conv2d(128, 128, kernel_size=7, stride=1, padding=3), 186 | nn.ReLU(inplace=True), 187 | nn.Conv2d(128, 128, kernel_size=7, stride=1, padding=3), 188 | nn.ReLU(inplace=True) 189 | ) 190 | 191 | self.predict_L1_stage2 = nn.Conv2d(128, vec_num, kernel_size=1, stride=1, padding=0) 192 | self.predict_L2_stage2 = nn.Conv2d(128, heat_num, kernel_size=1, stride=1, padding=0) 193 | 194 | self.initialize_weights() 195 | 196 | 197 | 198 | def initialize_weights(self): 199 | """Initialize model weights. 200 | """ 201 | for m in self.modules(): 202 | if isinstance(m, nn.Conv2d): 203 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 204 | m.weight.data.normal_(0, math.sqrt(2. / n)) 205 | if m.bias is not None: 206 | m.bias.data.zero_() 207 | elif isinstance(m, nn.BatchNorm2d): 208 | m.weight.data.fill_(1) 209 | m.bias.data.zero_() 210 | elif isinstance(m, nn.Linear): 211 | m.weight.data.normal_(0, 0.01) 212 | m.bias.data.zero_() 213 | 214 | def load_weights(self, filepath): 215 | """Modified version of the correspoding Keras function with 216 | the addition of multi-GPU support and the ability to exclude 217 | some layers from loading. 218 | exlude: list of layer names to excluce 219 | """ 220 | if os.path.exists(filepath): 221 | self.load_state_dict(torch.load(filepath)) 222 | else: 223 | print("Weight file not found ...") 224 | 225 | def forward(self, x, mask): 226 | 227 | [p2_out, p3_out, p4_out, p5_out, p6_out] = self.fpn(x) 228 | 229 | out1 = self.block1(p3_out) 230 | 231 | vec1 = self.predict_L1_stage1(out1) 232 | heat1 = self.predict_L1_stage1(out1) 233 | 234 | vec1_mask = vec1 * mask 235 | heat1_mask = heat1 * mask 236 | 237 | out2 = torch.cat([vec1, heat1, out1], 1) 238 | 239 | vec2 = self.predict_L1_stage2(out2) 240 | heat2 = self.predict_L1_stage1(out2) 241 | 242 | vec2_mask = vec2 * mask 243 | heat2_mask = heat2 * mask 244 | 245 | return vec1_mask, heat1_mask, vec2_mask, heat2_mask 246 | -------------------------------------------------------------------------------- /model/resnet50_conv3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import os 4 | import sys 5 | import math 6 | import torchvision.models as models 7 | 8 | class resnet50_conv3(nn.Module): 9 | 10 | def __init__(self, net_dict, batch_norm=False): 11 | 12 | super(resnet50_conv3, self).__init__() 13 | 14 | #self.model0 = self._make_layer(net_dict[0], batch_norm, True) 15 | resnet50 = models.resnet50() 16 | self.conv1 = resnet50.conv1 17 | self.bn1 = resnet50.bn1 18 | self.relu = resnet50.relu 19 | self.maxpool = resnet50.maxpool 20 | self.layer1 = resnet50.layer1 21 | self.layer2 = resnet50.layer2 22 | #self.layer3 = resnet50.layer3 23 | 24 | self.conv_reduce = nn.Conv2d(512, 128, 1, 1, 0) 25 | 26 | 27 | self.model1_1 = self._make_layer(net_dict[1][0], batch_norm) 28 | self.model1_2 = self._make_layer(net_dict[1][1], batch_norm) 29 | 30 | self.model2_1 = self._make_layer(net_dict[2][0], batch_norm) 31 | self.model2_2 = self._make_layer(net_dict[2][1], batch_norm) 32 | 33 | self.model3_1 = self._make_layer(net_dict[3][0], batch_norm) 34 | self.model3_2 = self._make_layer(net_dict[3][1], batch_norm) 35 | 36 | self.model4_1 = self._make_layer(net_dict[4][0], batch_norm) 37 | self.model4_2 = self._make_layer(net_dict[4][1], batch_norm) 38 | 39 | self.model5_1 = self._make_layer(net_dict[5][0], batch_norm) 40 | self.model5_2 = self._make_layer(net_dict[5][1], batch_norm) 41 | 42 | self.model6_1 = self._make_layer(net_dict[6][0], batch_norm) 43 | self.model6_2 = self._make_layer(net_dict[6][1], batch_norm) 44 | 45 | for m in self.modules(): 46 | if isinstance(m, nn.Conv2d): 47 | m.weight.data.normal_(0, 0.01) 48 | if m.bias is not None: 49 | m.bias.data.zero_() 50 | elif isinstance(m, nn.BatchNorm2d): 51 | m.weight.data.fill_(1) 52 | m.bias.data.zero_() 53 | 54 | def _make_layer(self, net_dict, batch_norm=False, last_activity=False): 55 | 56 | layers = [] 57 | length = len(net_dict) - 1 58 | for i in range(length): 59 | one_layer = net_dict[i] 60 | key = one_layer.keys()[0] 61 | v = one_layer[key] 62 | 63 | if 'pool' in key: 64 | layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])] 65 | else: 66 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) 67 | if batch_norm: 68 | layers += [conv2d, nn.BatchNorm2d(v[1]), nn.ReLU(inplace=True)] 69 | else: 70 | layers += [conv2d, nn.ReLU(inplace=True)] 71 | 72 | if last_activity: 73 | one_layer = net_dict[-1] 74 | key = one_layer.keys()[0] 75 | v = one_layer[key] 76 | 77 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) 78 | if batch_norm: 79 | layers += [conv2d, nn.BatchNorm2d(v[1]), nn.ReLU(inplace=True)] 80 | else: 81 | layers += [conv2d, nn.ReLU(inplace=True)] 82 | else: 83 | one_layer = net_dict[-1] 84 | key = one_layer.keys()[0] 85 | v = one_layer[key] 86 | 87 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) 88 | layers += [conv2d] 89 | return nn.Sequential(*layers) 90 | 91 | def forward(self, x, mask): 92 | #out0 = self.model0(x) 93 | x = self.conv1(x) 94 | x = self.bn1(x) 95 | x = self.relu(x) 96 | x = self.maxpool(x) 97 | 98 | x = self.layer1(x) 99 | x = self.layer2(x) 100 | #x = self.layer3(x) 101 | 102 | x = self.conv_reduce(x) 103 | out0 = self.relu(x) 104 | 105 | 106 | 107 | 108 | out1_1 = self.model1_1(out0) 109 | out1_2 = self.model1_2(out0) 110 | out1 = torch.cat([out1_1, out1_2, out0], 1) 111 | out1_vec_mask = out1_1 * mask 112 | out1_heat_mask = out1_2 * mask 113 | 114 | out2_1 = self.model2_1(out1) 115 | out2_2 = self.model2_2(out1) 116 | out2 = torch.cat([out2_1, out2_2, out0], 1) 117 | out2_vec_mask = out2_1 * mask 118 | out2_heat_mask = out2_2 * mask 119 | 120 | out3_1 = self.model3_1(out2) 121 | out3_2 = self.model3_2(out2) 122 | out3 = torch.cat([out3_1, out3_2, out0], 1) 123 | out3_vec_mask = out3_1 * mask 124 | out3_heat_mask = out3_2 * mask 125 | 126 | out4_1 = self.model4_1(out3) 127 | out4_2 = self.model4_2(out3) 128 | out4 = torch.cat([out4_1, out4_2, out0], 1) 129 | out4_vec_mask = out4_1 * mask 130 | out4_heat_mask = out4_2 * mask 131 | 132 | out5_1 = self.model5_1(out4) 133 | out5_2 = self.model5_2(out4) 134 | out5 = torch.cat([out5_1, out5_2, out0], 1) 135 | out5_vec_mask = out5_1 * mask 136 | out5_heat_mask = out5_2 * mask 137 | 138 | out6_1 = self.model6_1(out5) 139 | out6_2 = self.model6_2(out5) 140 | out6_vec_mask = out6_1 * mask 141 | out6_heat_mask = out6_2 * mask 142 | 143 | return out1_vec_mask, out1_heat_mask, out2_vec_mask, out2_heat_mask, out3_vec_mask, out3_heat_mask, out4_vec_mask, out4_heat_mask, out5_vec_mask, out5_heat_mask, out6_vec_mask, out6_heat_mask 144 | 145 | 146 | def PoseModel(num_point, num_vector, num_stages=6, batch_norm=False, pretrained=False): 147 | 148 | net_dict = [] 149 | block0 = [{'conv1_1': [3, 64, 3, 1, 1]}, {'conv1_2': [64, 64, 3, 1, 1]}, {'pool1': [2, 2, 0]}, 150 | {'conv2_1': [64, 128, 3, 1, 1]}, {'conv2_2': [128, 128, 3, 1, 1]}, {'pool2': [2, 2, 0]}, 151 | {'conv3_1': [128, 256, 3, 1, 1]}, {'conv3_2': [256, 256, 3, 1, 1]}, {'conv3_3': [256, 256, 3, 1, 1]}, {'conv3_4': [256, 256, 3, 1, 1]}, {'pool3': [2, 2, 0]}, 152 | {'conv4_1': [256, 512, 3, 1, 1]}, {'conv4_2': [512, 512, 3, 1, 1]}, {'conv4_3_cpm': [512, 256, 3, 1, 1]}, {'conv4_4_cpm': [256, 128, 3, 1, 1]}] 153 | net_dict.append(block0) 154 | 155 | block1 = [[], []] 156 | in_vec = [0, 128, 128, 128, 128, 512, num_vector * 2] 157 | in_heat = [0, 128, 128, 128, 128, 512, num_point] 158 | for i in range(1, 6): 159 | if i < 4: 160 | block1[0].append({'conv{}_stage1_vec'.format(i) :[in_vec[i], in_vec[i + 1], 3, 1, 1]}) 161 | block1[1].append({'conv{}_stage1_heat'.format(i):[in_heat[i], in_heat[i + 1], 3, 1, 1]}) 162 | else: 163 | block1[0].append({'conv{}_stage1_vec'.format(i):[in_vec[i], in_vec[i + 1], 1, 1, 0]}) 164 | block1[1].append({'conv{}_stage1_heat'.format(i):[in_heat[i], in_heat[i + 1], 1, 1, 0]}) 165 | net_dict.append(block1) 166 | 167 | in_vec_1 = [0, 128 + num_point + num_vector * 2, 128, 128, 128, 128, 128, 128, num_vector * 2] 168 | in_heat_1 = [0, 128 + num_point + num_vector * 2, 128, 128, 128, 128, 128, 128, num_point] 169 | for j in range(2, num_stages + 1): 170 | blocks = [[], []] 171 | for i in range(1, 8): 172 | if i < 6: 173 | blocks[0].append({'conv{}_stage{}_vec'.format(i, j):[in_vec_1[i], in_vec_1[i + 1], 7, 1, 3]}) 174 | blocks[1].append({'conv{}_stage{}_heat'.format(i, j):[in_heat_1[i], in_heat_1[i + 1], 7, 1, 3]}) 175 | else: 176 | blocks[0].append({'conv{}_stage{}_vec'.format(i, j):[in_vec_1[i], in_vec_1[i + 1], 1, 1, 0]}) 177 | blocks[1].append({'conv{}_stage{}_heat'.format(i, j):[in_heat_1[i], in_heat_1[i + 1], 1, 1, 0]}) 178 | net_dict.append(blocks) 179 | 180 | model = resnet50_conv3(net_dict, batch_norm) 181 | pretrain_model_file = '/root/.torch/models/resnet50-19c8e357.pth' 182 | print("Loading pretrained weights from %s" % (pretrain_model_file)) 183 | model_dict = model.state_dict() 184 | pretrain_state_dict = torch.load(pretrain_model_file) 185 | pretrain_state_dict = {k: v for k, v in pretrain_state_dict.items() if k in model_dict} 186 | model_dict.update(pretrain_state_dict) 187 | model.load_state_dict(model_dict) 188 | 189 | # if pretrained: 190 | # parameter_num = 10 191 | # if batch_norm: 192 | # vgg19 = models.vgg19_bn(pretrained=True) 193 | # parameter_num *= 6 194 | # else: 195 | # vgg19 = models.vgg19(pretrained=True) 196 | # parameter_num *= 2 197 | # 198 | # vgg19_state_dict = vgg19.state_dict() 199 | # vgg19_keys = vgg19_state_dict.keys() 200 | # 201 | # model_dict = model.state_dict() 202 | # from collections import OrderedDict 203 | # weights_load = OrderedDict() 204 | # 205 | # for i in range(parameter_num): 206 | # weights_load[model.state_dict().keys()[i]] = vgg19_state_dict[vgg19_keys[i]] 207 | # model_dict.update(weights_load) 208 | # model.load_state_dict(model_dict) 209 | resnet_model = models.resnet50(pretrained=True) 210 | 211 | 212 | 213 | return model 214 | 215 | 216 | if __name__ == '__main__': 217 | 218 | print PoseModel(19, 6, True, True) 219 | -------------------------------------------------------------------------------- /model/vgg_1branch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import os 4 | import sys 5 | import math 6 | import torchvision.models as models 7 | 8 | def make_net_dict(): 9 | 10 | feature = [{'conv1_1': [3, 64, 3, 1, 1]}, {'conv1_2': [64, 64, 3, 1, 1]}, {'pool1': [2, 2, 0]}, 11 | {'conv2_1': [64, 128, 3, 1, 1]}, {'conv2_2': [128, 128, 3, 1, 1]}, {'pool2': [2, 2, 0]}, 12 | {'conv3_1': [128, 256, 3, 1, 1]}, {'conv3_2': [256, 256, 3, 1, 1]}, {'conv3_3': [256, 256, 3, 1, 1]}, {'conv3_4': [256, 256, 3, 1, 1]}, {'pool3': [2, 2, 0]}, 13 | {'conv4_1': [256, 512, 3, 1, 1]}, {'conv4_2': [512, 512, 3, 1, 1]}, {'conv4_3_cpm': [512, 256, 3, 1, 1]}, {'conv4_4_cpm': [256, 128, 3, 1, 1]}] 14 | 15 | 16 | block1 = [{'conv5_1_CPM': [128, 128, 3, 1, 1]},{'conv5_2_CPM': [128, 128, 3, 1, 1]},{'conv5_3_CPM': [128, 128, 3, 1, 1]}]#,#, 17 | #{'conv5_4_CPM': [128, 512, 1, 1, 1]}] 18 | 19 | 20 | block2 = [{'Mconv1': [128+38+19, 128, 7, 1, 3]}, {'Mconv2': [128, 128, 7, 1, 3]}, 21 | {'Mconv3': [128, 128, 7, 1, 3]},{'Mconv4': [128, 128, 7, 1, 3]}, 22 | {'Mconv5': [128, 128, 7, 1, 3]}, 23 | {'Mconv6': [128, 128, 1, 1, 0]} 24 | ] 25 | 26 | predict_layers = [[{'predict_L1': [128, 38, 1, 1, 0]}], 27 | [{'predict_L2': [128, 19, 1, 1, 0]}]] 28 | 29 | net_dict = [feature,block1,predict_layers,block2,predict_layers] 30 | 31 | return net_dict 32 | 33 | 34 | class vgg_1branch(nn.Module): 35 | 36 | def __init__(self, net_dict, batch_norm=False): 37 | 38 | super(vgg_1branch, self).__init__() 39 | 40 | self.feature = self._make_layer(net_dict[0]) 41 | 42 | self.block1 = self._make_layer(net_dict[1]) 43 | 44 | self.predict_L1_stage1 = self._make_layer(net_dict[2][0]) 45 | self.predict_L2_stage1 = self._make_layer(net_dict[2][1]) 46 | 47 | # repeate 48 | self.block2 = self._make_layer(net_dict[3]) 49 | 50 | self.predict_L1_stage2 = self._make_layer(net_dict[4][0]) 51 | self.predict_L2_stage2 = self._make_layer(net_dict[4][1]) 52 | 53 | self.block3 = self._make_layer(net_dict[3]) 54 | 55 | self.predict_L1_stage3 = self._make_layer(net_dict[4][0]) 56 | self.predict_L2_stage3 = self._make_layer(net_dict[4][1]) 57 | 58 | self.block4 = self._make_layer(net_dict[3]) 59 | 60 | self.predict_L1_stage4 = self._make_layer(net_dict[4][0]) 61 | self.predict_L2_stage4 = self._make_layer(net_dict[4][1]) 62 | 63 | self.block5 = self._make_layer(net_dict[3]) 64 | 65 | self.predict_L1_stage5 = self._make_layer(net_dict[4][0]) 66 | self.predict_L2_stage5 = self._make_layer(net_dict[4][1]) 67 | 68 | self.block6 = self._make_layer(net_dict[3]) 69 | 70 | self.predict_L1_stage6 = self._make_layer(net_dict[4][0]) 71 | self.predict_L2_stage6 = self._make_layer(net_dict[4][1]) 72 | 73 | 74 | self._init_weights() 75 | 76 | def _init_weights(self): 77 | for m in self.modules(): 78 | if isinstance(m, nn.Conv2d): 79 | m.weight.data.normal_(0, 0.01) 80 | if m.bias is not None: 81 | m.bias.data.zero_() 82 | 83 | def _make_layer(self, net_dict, batch_norm=False): 84 | layers = [] 85 | length = len(net_dict) 86 | for i in range(length): 87 | one_layer = net_dict[i] 88 | key = one_layer.keys()[0] 89 | v = one_layer[key] 90 | 91 | if 'pool' in key: 92 | layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])] 93 | else: 94 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) 95 | if batch_norm: 96 | layers += [conv2d, nn.BatchNorm2d(v[1]), nn.ReLU(inplace=True)] 97 | else: 98 | layers += [conv2d, nn.ReLU(inplace=True)] 99 | 100 | return nn.Sequential(*layers) 101 | 102 | def forward(self, x, mask): 103 | # define forward flow 104 | feature = self.feature(x) 105 | 106 | out_stage1 = self.block1(feature) 107 | L1_stage1 = self.predict_L1_stage1(out_stage1) 108 | L2_stage1 = self.predict_L2_stage1(out_stage1) 109 | L1_stage1_mask = L1_stage1 * mask 110 | L2_stage1_mask = L2_stage1 * mask 111 | 112 | concat_stage2 = torch.cat([L1_stage1, L2_stage1, feature], 1) 113 | out_stage2 = self.block2(concat_stage2) 114 | L1_stage2 = self.predict_L1_stage2(out_stage2) 115 | L2_stage2 = self.predict_L2_stage2(out_stage2) 116 | L1_stage2_mask = L1_stage2 * mask 117 | L2_stage2_mask = L2_stage2 * mask 118 | 119 | concat_stage3 = torch.cat([L1_stage2, L2_stage2, feature], 1) 120 | out_stage3 = self.block3(concat_stage3) 121 | L1_stage3 = self.predict_L1_stage3(out_stage3) 122 | L2_stage3 = self.predict_L2_stage3(out_stage3) 123 | L1_stage3_mask = L1_stage3 * mask 124 | L2_stage3_mask = L2_stage3 * mask 125 | 126 | concat_stage4 = torch.cat([L1_stage3, L2_stage3, feature], 1) 127 | out_stage4 = self.block4(concat_stage4) 128 | L1_stage4 = self.predict_L1_stage4(out_stage4) 129 | L2_stage4 = self.predict_L2_stage4(out_stage4) 130 | L1_stage4_mask = L1_stage4 * mask 131 | L2_stage4_mask = L2_stage4 * mask 132 | 133 | concat_stage5 = torch.cat([L1_stage4, L2_stage4, feature], 1) 134 | out_stage5 = self.block5(concat_stage5) 135 | L1_stage5 = self.predict_L1_stage5(out_stage5) 136 | L2_stage5 = self.predict_L2_stage5(out_stage5) 137 | L1_stage5_mask = L1_stage5 * mask 138 | L2_stage5_mask = L2_stage5 * mask 139 | 140 | concat_stage6 = torch.cat([L1_stage5, L2_stage5, feature], 1) 141 | out_stage6 = self.block6(concat_stage6) 142 | L1_stage6 = self.predict_L1_stage6(out_stage6) 143 | L2_stage6 = self.predict_L2_stage6(out_stage6) 144 | L1_stage6_mask = L1_stage6 * mask 145 | L2_stage6_mask = L2_stage6 * mask 146 | 147 | return L1_stage1_mask,L2_stage1_mask, \ 148 | L1_stage2_mask, L2_stage2_mask, \ 149 | L1_stage3_mask, L2_stage3_mask, \ 150 | L1_stage4_mask, L2_stage4_mask, \ 151 | L1_stage5_mask, L2_stage5_mask, \ 152 | L1_stage6_mask, L2_stage6_mask 153 | 154 | def PoseModel(num_point, num_vector, num_stages=6, batch_norm=False, pretrained=False): 155 | net_dict = make_net_dict() 156 | model = vgg_1branch(net_dict, batch_norm) 157 | 158 | if pretrained: 159 | parameter_num = 10 160 | if batch_norm: 161 | vgg19 = models.vgg19_bn(pretrained=True) 162 | parameter_num *= 6 163 | else: 164 | vgg19 = models.vgg19(pretrained=True) 165 | parameter_num *= 2 166 | 167 | vgg19_state_dict = vgg19.state_dict() 168 | vgg19_keys = vgg19_state_dict.keys() 169 | 170 | model_dict = model.state_dict() 171 | from collections import OrderedDict 172 | weights_load = OrderedDict() 173 | 174 | for i in range(parameter_num): 175 | weights_load[model.state_dict().keys()[i]] = vgg19_state_dict[vgg19_keys[i]] 176 | model_dict.update(weights_load) 177 | model.load_state_dict(model_dict) 178 | 179 | return model 180 | 181 | 182 | if __name__ == '__main__': 183 | print PoseModel(19, 6, True, True) 184 | -------------------------------------------------------------------------------- /pose_estimation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import os 4 | import sys 5 | import math 6 | import torchvision.models as models 7 | 8 | class Pose_Estimation(nn.Module): 9 | 10 | def __init__(self, net_dict, batch_norm=False): 11 | 12 | super(Pose_Estimation, self).__init__() 13 | 14 | self.model0 = self._make_layer(net_dict[0], batch_norm, True) 15 | 16 | self.model1_1 = self._make_layer(net_dict[1][0], batch_norm) 17 | self.model1_2 = self._make_layer(net_dict[1][1], batch_norm) 18 | 19 | self.model2_1 = self._make_layer(net_dict[2][0], batch_norm) 20 | self.model2_2 = self._make_layer(net_dict[2][1], batch_norm) 21 | 22 | self.model3_1 = self._make_layer(net_dict[3][0], batch_norm) 23 | self.model3_2 = self._make_layer(net_dict[3][1], batch_norm) 24 | 25 | self.model4_1 = self._make_layer(net_dict[4][0], batch_norm) 26 | self.model4_2 = self._make_layer(net_dict[4][1], batch_norm) 27 | 28 | self.model5_1 = self._make_layer(net_dict[5][0], batch_norm) 29 | self.model5_2 = self._make_layer(net_dict[5][1], batch_norm) 30 | 31 | self.model6_1 = self._make_layer(net_dict[6][0], batch_norm) 32 | self.model6_2 = self._make_layer(net_dict[6][1], batch_norm) 33 | 34 | for m in self.modules(): 35 | if isinstance(m, nn.Conv2d): 36 | m.weight.data.normal_(0, 0.01) 37 | if m.bias is not None: 38 | m.bias.data.zero_() 39 | elif isinstance(m, nn.BatchNorm2d): 40 | m.weight.data.fill_(1) 41 | m.bias.data.zero_() 42 | 43 | def _make_layer(self, net_dict, batch_norm=False, last_activity=False): 44 | 45 | layers = [] 46 | length = len(net_dict) - 1 47 | for i in range(length): 48 | one_layer = net_dict[i] 49 | key = one_layer.keys()[0] 50 | v = one_layer[key] 51 | 52 | if 'pool' in key: 53 | layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])] 54 | else: 55 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) 56 | if batch_norm: 57 | layers += [conv2d, nn.BatchNorm2d(v[1]), nn.ReLU(inplace=True)] 58 | else: 59 | layers += [conv2d, nn.ReLU(inplace=True)] 60 | 61 | if last_activity: 62 | one_layer = net_dict[-1] 63 | key = one_layer.keys()[0] 64 | v = one_layer[key] 65 | 66 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) 67 | if batch_norm: 68 | layers += [conv2d, nn.BatchNorm2d(v[1]), nn.ReLU(inplace=True)] 69 | else: 70 | layers += [conv2d, nn.ReLU(inplace=True)] 71 | else: 72 | one_layer = net_dict[-1] 73 | key = one_layer.keys()[0] 74 | v = one_layer[key] 75 | 76 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) 77 | layers += [conv2d] 78 | return nn.Sequential(*layers) 79 | 80 | def forward(self, x, mask): 81 | out0 = self.model0(x) 82 | 83 | out1_1 = self.model1_1(out0) 84 | out1_2 = self.model1_2(out0) 85 | out1 = torch.cat([out1_1, out1_2, out0], 1) 86 | out1_vec_mask = out1_1 * mask 87 | out1_heat_mask = out1_2 * mask 88 | 89 | out2_1 = self.model2_1(out1) 90 | out2_2 = self.model2_2(out1) 91 | out2 = torch.cat([out2_1, out2_2, out0], 1) 92 | out2_vec_mask = out2_1 * mask 93 | out2_heat_mask = out2_2 * mask 94 | 95 | out3_1 = self.model3_1(out2) 96 | out3_2 = self.model3_2(out2) 97 | out3 = torch.cat([out3_1, out3_2, out0], 1) 98 | out3_vec_mask = out3_1 * mask 99 | out3_heat_mask = out3_2 * mask 100 | 101 | out4_1 = self.model4_1(out3) 102 | out4_2 = self.model4_2(out3) 103 | out4 = torch.cat([out4_1, out4_2, out0], 1) 104 | out4_vec_mask = out4_1 * mask 105 | out4_heat_mask = out4_2 * mask 106 | 107 | out5_1 = self.model5_1(out4) 108 | out5_2 = self.model5_2(out4) 109 | out5 = torch.cat([out5_1, out5_2, out0], 1) 110 | out5_vec_mask = out5_1 * mask 111 | out5_heat_mask = out5_2 * mask 112 | 113 | out6_1 = self.model6_1(out5) 114 | out6_2 = self.model6_2(out5) 115 | out6_vec_mask = out6_1 * mask 116 | out6_heat_mask = out6_2 * mask 117 | 118 | return out1_vec_mask, out1_heat_mask, out2_vec_mask, out2_heat_mask, out3_vec_mask, out3_heat_mask, out4_vec_mask, out4_heat_mask, out5_vec_mask, out5_heat_mask, out6_vec_mask, out6_heat_mask 119 | 120 | 121 | def PoseModel(num_point, num_vector, num_stages=6, batch_norm=False, pretrained=False): 122 | 123 | net_dict = [] 124 | block0 = [{'conv1_1': [3, 64, 3, 1, 1]}, {'conv1_2': [64, 64, 3, 1, 1]}, {'pool1': [2, 2, 0]}, 125 | {'conv2_1': [64, 128, 3, 1, 1]}, {'conv2_2': [128, 128, 3, 1, 1]}, {'pool2': [2, 2, 0]}, 126 | {'conv3_1': [128, 256, 3, 1, 1]}, {'conv3_2': [256, 256, 3, 1, 1]}, {'conv3_3': [256, 256, 3, 1, 1]}, {'conv3_4': [256, 256, 3, 1, 1]}, {'pool3': [2, 2, 0]}, 127 | {'conv4_1': [256, 512, 3, 1, 1]}, {'conv4_2': [512, 512, 3, 1, 1]}, {'conv4_3_cpm': [512, 256, 3, 1, 1]}, {'conv4_4_cpm': [256, 128, 3, 1, 1]}] 128 | net_dict.append(block0) 129 | 130 | block1 = [[], []] 131 | in_vec = [0, 128, 128, 128, 128, 512, num_vector * 2] 132 | in_heat = [0, 128, 128, 128, 128, 512, num_point] 133 | for i in range(1, 6): 134 | if i < 4: 135 | block1[0].append({'conv{}_stage1_vec'.format(i) :[in_vec[i], in_vec[i + 1], 3, 1, 1]}) 136 | block1[1].append({'conv{}_stage1_heat'.format(i):[in_heat[i], in_heat[i + 1], 3, 1, 1]}) 137 | else: 138 | block1[0].append({'conv{}_stage1_vec'.format(i):[in_vec[i], in_vec[i + 1], 1, 1, 0]}) 139 | block1[1].append({'conv{}_stage1_heat'.format(i):[in_heat[i], in_heat[i + 1], 1, 1, 0]}) 140 | net_dict.append(block1) 141 | 142 | in_vec_1 = [0, 128 + num_point + num_vector * 2, 128, 128, 128, 128, 128, 128, num_vector * 2] 143 | in_heat_1 = [0, 128 + num_point + num_vector * 2, 128, 128, 128, 128, 128, 128, num_point] 144 | for j in range(2, num_stages + 1): 145 | blocks = [[], []] 146 | for i in range(1, 8): 147 | if i < 6: 148 | blocks[0].append({'conv{}_stage{}_vec'.format(i, j):[in_vec_1[i], in_vec_1[i + 1], 7, 1, 3]}) 149 | blocks[1].append({'conv{}_stage{}_heat'.format(i, j):[in_heat_1[i], in_heat_1[i + 1], 7, 1, 3]}) 150 | else: 151 | blocks[0].append({'conv{}_stage{}_vec'.format(i, j):[in_vec_1[i], in_vec_1[i + 1], 1, 1, 0]}) 152 | blocks[1].append({'conv{}_stage{}_heat'.format(i, j):[in_heat_1[i], in_heat_1[i + 1], 1, 1, 0]}) 153 | net_dict.append(blocks) 154 | 155 | model = Pose_Estimation(net_dict, batch_norm) 156 | 157 | if pretrained: 158 | parameter_num = 10 159 | if batch_norm: 160 | vgg19 = models.vgg19_bn(pretrained=True) 161 | parameter_num *= 6 162 | else: 163 | vgg19 = models.vgg19(pretrained=True) 164 | parameter_num *= 2 165 | 166 | vgg19_state_dict = vgg19.state_dict() 167 | vgg19_keys = vgg19_state_dict.keys() 168 | 169 | model_dict = model.state_dict() 170 | from collections import OrderedDict 171 | weights_load = OrderedDict() 172 | 173 | for i in range(parameter_num): 174 | weights_load[model.state_dict().keys()[i]] = vgg19_state_dict[vgg19_keys[i]] 175 | model_dict.update(weights_load) 176 | model.load_state_dict(model_dict) 177 | 178 | return model 179 | 180 | 181 | if __name__ == '__main__': 182 | 183 | print PoseModel(19, 6, True, True) 184 | -------------------------------------------------------------------------------- /preprocessing/convert_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append('/root/deep_learning/caffe/caffe-1.0/python/') 4 | import caffe 5 | from caffe.proto import caffe_pb2 6 | import torch 7 | 8 | sys.path.append('..') 9 | import pose_estimation 10 | from utils import save_checkpoint as save_checkpoint 11 | 12 | 13 | def load_caffe_model(deploy_path, model_path): 14 | caffe.set_mode_cpu() 15 | net = caffe.Net(deploy_path, model_path, caffe.TEST) 16 | 17 | return net 18 | 19 | 20 | def load_pytorch_model(): 21 | model = pose_estimation.PoseModel(num_point=19, num_vector=19, pretrained=True) 22 | 23 | return model 24 | 25 | 26 | def convert(caffe_net, pytorch_net): 27 | caffe_keys = caffe_net.params.keys() 28 | pytorch_keys = pytorch_net.state_dict().keys() 29 | 30 | length_caffe = len(caffe_keys) 31 | length_pytorch = len(pytorch_keys) 32 | dic = {} 33 | L1 = [] 34 | L2 = [] 35 | _1 = [] 36 | _2 = [] 37 | for i in range(length_caffe): 38 | if 'L1' in caffe_keys[i]: 39 | L1.append(caffe_keys[i]) 40 | if '_1' in pytorch_keys[2 * i]: 41 | _1.append(pytorch_keys[2 * i][:-7]) 42 | else: 43 | _2.append(pytorch_keys[2 * i][:-7]) 44 | elif 'L2' in caffe_keys[i]: 45 | L2.append(caffe_keys[i]) 46 | if '_1' in pytorch_keys[2 * i]: 47 | _1.append(pytorch_keys[2 * i][:-7]) 48 | else: 49 | _2.append(pytorch_keys[2 * i][:-7]) 50 | else: 51 | dic[caffe_keys[i]] = pytorch_keys[2 * i][:-7] 52 | 53 | for info in zip(L1, _1): 54 | dic[info[0]] = info[1] 55 | for info in zip(L2, _2): 56 | dic[info[0]] = info[1] 57 | 58 | model_dict = pytorch_net.state_dict() 59 | from collections import OrderedDict 60 | weights_load = OrderedDict() 61 | for key in dic: 62 | caffe_key = key 63 | pytorch_key = dic[key] 64 | weights_load[pytorch_key + '.weight'] = torch.from_numpy(caffe_net.params[caffe_key][0].data) 65 | weights_load[pytorch_key + '.bias'] = torch.from_numpy(caffe_net.params[caffe_key][1].data) 66 | model_dict.update(weights_load) 67 | pytorch_net.load_state_dict(model_dict) 68 | save_checkpoint({ 69 | 'iter': 0, 70 | 'state_dict': pytorch_net.state_dict(), 71 | }, True, 'caffe_model_coco') 72 | 73 | 74 | if __name__ == '__main__': 75 | caffe_net = load_caffe_model('/home/xiangyu/data/pretrain/COCO/pose_deploy.prototxt', 76 | '/home/xiangyu/data/pretrain/COCO/pose_iter_440000.caffemodel') 77 | pytorch_net = load_pytorch_model() 78 | 79 | convert(caffe_net, pytorch_net) -------------------------------------------------------------------------------- /preprocessing/generate_json_mask.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import math 4 | import json 5 | import numpy as np 6 | from pycocotools.coco import COCO 7 | 8 | ''' 9 | coco_annotations 10 | 11 | u'keypoints': [ 12 | 0.u'nose', 13 | 1.u'left_eye', 14 | 2.u'right_eye', 15 | 3.u'left_ear', 16 | 4.u'right_ear', 17 | 5.u'left_shoulder', 18 | 6.u'right_shoulder', 19 | 7.u'left_elbow', 20 | 8.u'right_elbow', 21 | 9.u'left_wrist', 22 | 10.u'right_wrist', 23 | 11.u'left_hip', 24 | 12.u'right_hip', 25 | 13.u'left_knee', 26 | 14.u'right_knee', 27 | 15.u'left_ankle', 28 | 16.u'right_ankle'], 29 | 30 | 31 | OUR annotations 32 | u'keypoints': [ 33 | 0.u'nose', -> nose 34 | 1.u'left_eye', neck 35 | 2.u'right_eye', right_shoulder 36 | 3.u'left_ear', right_elbow 37 | 4.u'right_ear', right_wrist 38 | 5.u'left_shoulder', left_shoulder 39 | 6.u'right_shoulder', left_elbow 40 | 7.u'left_elbow', left_wrist 41 | 8.u'right_elbow', right_hip 42 | 9.u'left_wrist', right_knee 43 | 10.u'right_wrist', right_ankle 44 | 11.u'left_hip', left_hip 45 | 12.u'right_hip', left_knee 46 | 13.u'left_knee', left_ankle 47 | 14.u'right_knee', right_eye 48 | 15.u'left_ankle', left_eye 49 | 16.u'right_ankle' right_ear 50 | 17. left_ear], 51 | 52 | ''' 53 | 54 | def generate_json_mask(ann_path, json_path, mask_dir, filelist_path, masklist_path): 55 | COCO_Order = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,11,12, 13,14, 15, 16] 56 | COCO_TO_OURS = [0, 15, 14, 17, 16, 5, 2, 6, 3, 7, 4, 11, 8, 12, 9, 13, 10] 57 | 58 | coco = COCO(ann_path) 59 | ids = list(coco.imgs.keys()) 60 | lists = [] 61 | 62 | filelist_fp = open(filelist_path, 'w') 63 | masklist_fp = open(masklist_path, 'w') 64 | for i, img_id in enumerate(ids): 65 | ann_ids = coco.getAnnIds(imgIds=img_id) 66 | img_anns = coco.loadAnns(ann_ids) 67 | 68 | numPeople = len(img_anns) 69 | name = coco.imgs[img_id]['file_name'] 70 | height = coco.imgs[img_id]['height'] 71 | width = coco.imgs[img_id]['width'] 72 | 73 | persons = [] 74 | person_centers = [] 75 | 76 | for p in range(numPeople): 77 | 78 | if img_anns[p]['num_keypoints'] < 5 or img_anns[p]['area'] < 32 * 32: 79 | continue 80 | kpt = img_anns[p]['keypoints'] 81 | dic = dict() 82 | 83 | # person center 84 | person_center = [img_anns[p]['bbox'][0] + img_anns[p]['bbox'][2] / 2.0, 85 | img_anns[p]['bbox'][1] + img_anns[p]['bbox'][3] / 2.0] 86 | scale = img_anns[p]['bbox'][3] / 368.0 87 | 88 | # skip this person if the distance to exiting person is too small 89 | flag = 0 90 | for pc in person_centers: 91 | dis = math.sqrt((person_center[0] - pc[0]) * (person_center[0] - pc[0]) + (person_center[1] - pc[1]) * ( 92 | person_center[1] - pc[1])) 93 | if dis < pc[2] * 0.3: 94 | flag = 1; 95 | break 96 | if flag == 1: 97 | continue 98 | dic['objpos'] = person_center 99 | dic['keypoints'] = np.zeros((17, 3)).tolist() 100 | dic['scale'] = scale 101 | for part in range(17): 102 | dic['keypoints'][part][0] = kpt[part * 3] 103 | dic['keypoints'][part][1] = kpt[part * 3 + 1] 104 | # visiable is 1, unvisiable is 0 and not labeled is 2 105 | if kpt[part * 3 + 2] == 2: 106 | dic['keypoints'][part][2] = 1 107 | elif kpt[part * 3 + 2] == 1: 108 | dic['keypoints'][part][2] = 0 109 | else: 110 | dic['keypoints'][part][2] = 2 111 | 112 | persons.append(dic) 113 | person_centers.append(np.append(person_center, max(img_anns[p]['bbox'][2], img_anns[p]['bbox'][3]))) 114 | 115 | if len(persons) > 0: 116 | filelist_fp.write(name + '\n') 117 | info = dict() 118 | info['filename'] = name 119 | info['info'] = [] 120 | cnt = 1 121 | for person in persons: 122 | dic = dict() 123 | dic['pos'] = person['objpos'] 124 | dic['keypoints'] = np.zeros((18, 3)).tolist() 125 | dic['scale'] = person['scale'] 126 | for i in range(17): 127 | dic['keypoints'][COCO_TO_OURS[i]][0] = person['keypoints'][i][0] 128 | dic['keypoints'][COCO_TO_OURS[i]][1] = person['keypoints'][i][1] 129 | dic['keypoints'][COCO_TO_OURS[i]][2] = person['keypoints'][i][2] 130 | dic['keypoints'][1][0] = (person['keypoints'][5][0] + person['keypoints'][6][0]) * 0.5 131 | dic['keypoints'][1][1] = (person['keypoints'][5][1] + person['keypoints'][6][1]) * 0.5 132 | if person['keypoints'][5][2] == person['keypoints'][6][2]: 133 | dic['keypoints'][1][2] = person['keypoints'][5][2] 134 | elif person['keypoints'][5][2] == 2 or person['keypoints'][6][2] == 2: 135 | dic['keypoints'][1][2] = 2 136 | else: 137 | dic['keypoints'][1][2] = 0 138 | info['info'].append(dic) 139 | lists.append(info) 140 | 141 | mask_all = np.zeros((height, width), dtype=np.uint8) 142 | mask_miss = np.zeros((height, width), dtype=np.uint8) 143 | flag = 0 144 | for p in img_anns: 145 | if p['iscrowd'] == 1: 146 | mask_crowd = coco.annToMask(p) 147 | temp = np.bitwise_and(mask_all, mask_crowd) 148 | mask_crowd = mask_crowd - temp 149 | flag += 1 150 | continue 151 | else: 152 | mask = coco.annToMask(p) 153 | 154 | mask_all = np.bitwise_or(mask, mask_all) 155 | 156 | if p['num_keypoints'] <= 0: 157 | mask_miss = np.bitwise_or(mask, mask_miss) 158 | 159 | if flag < 1: 160 | mask_miss = np.logical_not(mask_miss) 161 | elif flag == 1: 162 | mask_miss = np.logical_not(np.bitwise_or(mask_miss, mask_crowd)) 163 | mask_all = np.bitwise_or(mask_all, mask_crowd) 164 | else: 165 | raise Exception('crowd segments > 1') 166 | np.save(os.path.join(mask_dir, name.split('.')[0] + '.npy'), mask_miss) 167 | masklist_fp.write(os.path.join(mask_dir, name.split('.')[0] + '.npy') + '\n') 168 | if i % 1000 == 0: 169 | print "Processed {} of {}".format(i, len(ids)) 170 | 171 | masklist_fp.close() 172 | filelist_fp.close() 173 | print 'write json file' 174 | 175 | fp = open(json_path, 'w') 176 | fp.write(json.dumps(lists)) 177 | fp.close() 178 | 179 | print 'done!' 180 | 181 | 182 | 183 | if __name__ == '__main__': 184 | 185 | # ann_dir = '/home/xiangyu/data/coco/annotations/' 186 | # img_dir = '/home/xiangyu/data/coco/images/val2014/' 187 | # out_dir = '/home/xiangyu/data/samsung_pose_data/' 188 | # 189 | # # IN 190 | # train_ann_path = os.path.join(ann_dir, 'person_keypoints_valminusminival2014.json') 191 | # val_ann_path = os.path.join(ann_dir, 'person_keypoints_minival2014.json') 192 | # # OUT 193 | # mask_dir = os.path.join(out_dir, 'mask') 194 | # json_dir = os.path.join(out_dir, 'json') 195 | # img_list_dir = os.path.join(out_dir, 'img_list') 196 | # mask_list_dir = os.path.join(out_dir, 'mask_list') 197 | # 198 | # train_json_path = os.path.join(json_dir, 'valminusminival2014.json') 199 | # val_json_path = os.path.join(json_dir, 'minival2014.json') 200 | # 201 | # train_img_path = os.path.join(img_list_dir, 'valminusminival2014.txt') 202 | # val_img_path = os.path.join(img_list_dir, 'minival2014.txt') 203 | # 204 | # train_mask_path = os.path.join(mask_list_dir, 'valminusminival2014.txt') 205 | # val_mask_path = os.path.join(mask_list_dir, 'minival2014.txt') 206 | 207 | 208 | #--------------------------------------------------------------------------- 209 | ann_dir = '/home/xiangyu/data/coco/annotations/' 210 | img_dir = '/home/xiangyu/data/coco/images/train2014/' 211 | out_dir = '/home/xiangyu/data/samsung_pose_data_train/' 212 | 213 | # IN 214 | train_ann_path = os.path.join(ann_dir, 'person_keypoints_train2014.json') 215 | 216 | # OUT 217 | mask_dir = os.path.join(out_dir, 'mask') 218 | json_dir = os.path.join(out_dir, 'json') 219 | img_list_dir = os.path.join(out_dir, 'img_list') 220 | mask_list_dir = os.path.join(out_dir, 'mask_list') 221 | 222 | train_json_path = os.path.join(json_dir, 'train2014.json') 223 | 224 | train_img_path = os.path.join(img_list_dir, 'train2014.txt') 225 | 226 | train_mask_path = os.path.join(mask_list_dir, 'train2014.txt') 227 | 228 | generate_json_mask(train_ann_path, train_json_path, mask_dir, train_img_path, train_mask_path) 229 | #generate_json_mask(val_ann_path, val_json_path, mask_dir, val_img_path, val_mask_path) 230 | -------------------------------------------------------------------------------- /train/config.yml: -------------------------------------------------------------------------------- 1 | workers: 6 2 | weight_decay: 0.0005 3 | momentum: 0.9 4 | display: 50 5 | max_iter: 200000 6 | batch_size: 10 7 | test_interval: 50 8 | topk: 3 9 | base_lr: 0.00004 10 | start_iters: 0 11 | best_model: 12345678.9 12 | #-------------lr_policy--------------------# 13 | # step 14 | lr_policy: 'step' 15 | policy_parameter: 16 | gamma: 0.333 17 | step_size: 25000 18 | #53100 19 | # exp 20 | # lr_policy: 'exp' 21 | # policy_parameter: 22 | # gamma: 0.99 23 | # 24 | # inv 25 | # lr_policy: 'inv' 26 | # policy_parameter: 27 | # gamma: 0.1 28 | # power: 0.1 29 | # 30 | # multistep 31 | #lr_policy: 'multistep' 32 | #policy_parameter: 33 | # stepvalue: #[20000, 35000, 45000] 34 | # gamma: 0.33#0.1 35 | # 36 | # poly 37 | #lr_policy: 'poly' 38 | #policy_parameter: 39 | #power: 1.5 40 | #max_iter: 250000 41 | # 42 | # sigmoid 43 | # lr_policy: 'sigmoid' 44 | # policy_parameter: 45 | # gamma: 0.9 46 | # stepsize: 5000 47 | #lr_policy: 'multistep-poly' 48 | #policy_parameter: 49 | #stepvalue: [12435, 24870, 37350, 49740] 50 | #max_iter: 62175 51 | #gamma: 0.333 52 | #power: 1.2 53 | -------------------------------------------------------------------------------- /train/log/train-2018-02-19-06-37-30.log: -------------------------------------------------------------------------------- 1 | workers: 6 2 | max_iter: 200000 3 | batch_size: 60 4 | policy_parameter: {'step_size': 68000, 'gamma': 0.333} 5 | base_lr: 4e-05 6 | topk: 3 7 | start_iters: 0 8 | lr_policy: step 9 | test_interval: 3000 10 | best_model: 12345678.9 11 | weight_decay: 0.0005 12 | display: 50 13 | momentum: 0.9 14 | -------------------------------------------------------------------------------- /train/retrain.sh: -------------------------------------------------------------------------------- 1 | export PYTHONUNBUFFERED="True" 2 | LOG="log/train-`date +'%Y-%m-%d-%H-%M-%S'`.log" 3 | # python train_pose.py --gpu 0 1 --train_dir /home/code/panhongyu/datasets/coco/filelist/traincoco_pytorch.txt /home/code/panhongyu/datasets/coco/masklist/traincoco_pytorch.txt /home/code/panhongyu/datasets/coco/json/traincoco_pytorch.json --val_dir /home/code/panhongyu/datasets/coco/filelist/valcoco_pytorch.txt /home/code/panhongyu/datasets/coco/masklist/valcoco_pytorch.txt /home/code/panhongyu/datasets/coco/json/valcoco_pytorch.json --config config.yml > $LOG 4 | 5 | #python -u train_pose.py --gpu 2 1 --pretrained /data/xiaobing.wang/qy.feng/Pytorch_RMPE/training/openpose_coco_latest.pth.tar --train_dir /data/xiaobing.wang/qy.feng/data/coco_pytorch/train_img_list.txt /data/xiaobing.wang/qy.feng/data/coco_pytorch/train_maskmiss_list.txt /data/xiaobing.wang/qy.feng/data/coco_pytorch/train_json_file.json --val_dir /data/xiaobing.wang/qy.feng/data/coco_pytorch/val_img_list.txt /data/xiaobing.wang/qy.feng/data/coco_pytorch/val_maskmiss_list.txt /data/xiaobing.wang/qy.feng/data/coco_pytorch/val_json_file.json --config config.yml 2>&1 | tee $LOG # ./output.txtopenpose_coco_best.pth.tar 6 | 7 | #--gpu 3 4 8 | python -u train_pose.py --gpu 7 --pretrained /data/xiaobing.wang/qy.feng/Pytorch_RMPE/training/openpose_coco_best.pth.tar --train_dir /data/xiaobing.wang/qy.feng/data/coco_pytorch/train_img_list.txt /data/xiaobing.wang/qy.feng/data/coco_pytorch/train_maskmiss_list.txt /data/xiaobing.wang/qy.feng/data/coco_pytorch/train_json_file.json --val_dir /data/xiaobing.wang/qy.feng/data/coco_pytorch/val_img_list.txt /data/xiaobing.wang/qy.feng/data/coco_pytorch/val_maskmiss_list.txt /data/xiaobing.wang/qy.feng/data/coco_pytorch/val_json_file.json --config config.yml 2>&1 | tee $LOG # ./output.txt 9 | -------------------------------------------------------------------------------- /train/train_model.sh: -------------------------------------------------------------------------------- 1 | export PYTHONUNBUFFERED="True" 2 | LOG="log/train-`date +'%Y-%m-%d-%H-%M-%S'`.log" 3 | # python train_pose.py --gpu 0 1 --train_dir /home/code/panhongyu/datasets/coco/filelist/traincoco_pytorch.txt /home/code/panhongyu/datasets/coco/masklist/traincoco_pytorch.txt /home/code/panhongyu/datasets/coco/json/traincoco_pytorch.json --val_dir /home/code/panhongyu/datasets/coco/filelist/valcoco_pytorch.txt /home/code/panhongyu/datasets/coco/masklist/valcoco_pytorch.txt /home/code/panhongyu/datasets/coco/json/valcoco_pytorch.json --config config.yml > $LOG 4 | # --pretrained /data/xiaobing.wang/qy.feng/Pytorch_RMPE/training/openpose_coco_latest.pth.tar 5 | 6 | 7 | python train_pose.py --gpu 0 --train_dir /data/root/data/samsung_pose/datasets/img_list/valminusminival2014.txt /data/root/data/samsung_pose/datasets/mask_list/valminusminival2014.txt /data/root/data/samsung_pose/datasets/json/valminusminival2014.json --val_dir /data/root/data/samsung_pose/datasets/img_list/minival2014.txt /data/root/data/samsung_pose/datasets/mask_list/minival2014.txt /data/root/data/samsung_pose/datasets/json/minival2014.json --config config.yml > $LOG 8 | -------------------------------------------------------------------------------- /train/train_pose.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import torch.backends.cudnn as cudnn 5 | import torch.optim 6 | import os 7 | import sys 8 | import argparse 9 | import time 10 | sys.path.append('..') 11 | import CocoFolder 12 | import Mytransforms 13 | from utils import * 14 | import vis_util 15 | import pose_estimation 16 | from logger import Logger 17 | 18 | def parse(): 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--config', type=str, 22 | dest='config', help='to set the parameters', 23 | default='config.yml') 24 | parser.add_argument('--gpu', default=[0], nargs='+', type=int, 25 | dest='gpu', help='the gpu used') 26 | parser.add_argument('--pretrained', default=None,type=str, 27 | dest='pretrained', help='the path of pretrained model') 28 | 29 | parser.add_argument('--snapshot',type=str, 30 | dest='snapshot', help='resume model', 31 | default=None 32 | ) 33 | 34 | parser.add_argument('--root', type=str, 35 | dest='root', help='the root of images', 36 | default='/data/root/data/coco/images/val2014') 37 | parser.add_argument('--train_dir', nargs='+', type=str, 38 | dest='train_dir', help='the path of train file', 39 | default=['/home/xiangyu/data/samsung_pose_data/img_list/valminusminival2014.txt', 40 | '/home/xiangyu/data/samsung_pose_data/mask_list/valminusminival2014.txt', 41 | '/home/xiangyu/data/samsung_pose_data/json/valminusminival2014.json']) 42 | parser.add_argument('--val_dir', nargs='+', type=str, 43 | dest='val_dir', help='the path of val file', 44 | default=['/home/xiangyu/data/samsung_pose_data/img_list/minival2014.txt', 45 | '/home/xiangyu/data/samsung_pose_data/mask_list/minival2014.txt', 46 | '/home/xiangyu/data/samsung_pose_data/json/minival2014.json']) 47 | parser.add_argument('--num_classes', default=1000, type=int, 48 | dest='num_classes', help='num_classes (default: 1000)') 49 | parser.add_argument('--logdir', default='./logs', type=str, 50 | dest='logdir', help='path of log') 51 | return parser.parse_args() 52 | 53 | 54 | def construct_model(args): 55 | if not args.snapshot: 56 | model = pose_estimation.PoseModel(num_point=19, num_vector=19, pretrained=True) 57 | else: 58 | model = pose_estimation.PoseModel(num_point=19, num_vector=19, pretrained=True) 59 | state_dict = torch.load(args.snapshot)['state_dict'] 60 | model.load_state_dict(state_dict) 61 | # if not args.pretrained: 62 | # model = pose_estimation.PoseModel(num_point=19, num_vector=19, pretrained=True) 63 | # else: 64 | # state_dict = torch.load(args.pretrained)['state_dict'] 65 | # from collections import OrderedDict 66 | # new_state_dict = OrderedDict() 67 | # for k, v in state_dict.items(): 68 | # name = k[7:] 69 | # new_state_dict[name] = v 70 | # model.load_state_dict(new_state_dict) 71 | 72 | #os.environ['CUDA_VISIBLE_DEVICES'] = ','.join([str(gpu) for gpu in args.gpu]) 73 | #model = torch.nn.DataParallel(model, device_ids=range(len(args.gpu))).cuda() 74 | model.cuda()# single gpu 75 | 76 | return model 77 | 78 | 79 | def get_parameters(model, config, isdefault=True): 80 | 81 | if isdefault: 82 | return model.parameters(), [1.] 83 | lr_1 = [] 84 | lr_2 = [] 85 | lr_4 = [] 86 | lr_8 = [] 87 | params_dict = dict(model.named_parameters()) 88 | for key, value in params_dict.items(): 89 | if ('model1_' not in key) and ('model0.' not in key): 90 | if key[-4:] == 'bias': 91 | lr_8.append(value) 92 | else: 93 | lr_4.append(value) 94 | elif key[-4:] == 'bias': 95 | lr_2.append(value) 96 | else: 97 | lr_1.append(value) 98 | params = [{'params': lr_1, 'lr': config.base_lr}, 99 | {'params': lr_2, 'lr': config.base_lr * 2.}, 100 | {'params': lr_4, 'lr': config.base_lr * 4.}, 101 | {'params': lr_8, 'lr': config.base_lr * 8.}] 102 | 103 | return params, [1., 2., 4., 8.] 104 | 105 | 106 | def to_np(x): 107 | return x.data.cpu().numpy() 108 | 109 | 110 | def train_val(model, args): 111 | 112 | traindir = args.train_dir 113 | valdir = args.val_dir 114 | 115 | config = Config(args.config) 116 | cudnn.benchmark = True 117 | 118 | # Set the logger 119 | logger = Logger('./log') 120 | 121 | train_loader = torch.utils.data.DataLoader( 122 | CocoFolder.CocoFolder(traindir, 8, 123 | Mytransforms.Compose([Mytransforms.RandomResized(), 124 | Mytransforms.RandomRotate(40), 125 | Mytransforms.RandomCrop(368), 126 | Mytransforms.RandomHorizontalFlip(), 127 | ])), 128 | batch_size=config.batch_size, shuffle=True, 129 | num_workers=config.workers, pin_memory=True) 130 | 131 | if config.test_interval != 0 and args.val_dir is not None: 132 | val_loader = torch.utils.data.DataLoader( 133 | CocoFolder.CocoFolder(valdir, 8, 134 | Mytransforms.Compose([Mytransforms.TestResized(368), 135 | ])), 136 | batch_size=4, shuffle=False, 137 | num_workers=config.workers, pin_memory=True) 138 | 139 | criterion = nn.MSELoss().cuda() 140 | 141 | params, multiple = get_parameters(model, config, False) 142 | 143 | optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum, 144 | weight_decay=config.weight_decay) 145 | 146 | batch_time = AverageMeter() 147 | data_time = AverageMeter() 148 | losses = AverageMeter() 149 | losses_list = [AverageMeter() for i in range(12)] 150 | top1 = AverageMeter() 151 | topk = AverageMeter() 152 | 153 | end = time.time() 154 | iters = config.start_iters 155 | best_model = config.best_model 156 | learning_rate = config.base_lr 157 | 158 | model.train() 159 | 160 | heat_weight = 46 * 46 * 19 / 2.0 # for convenient to compare with origin code 161 | vec_weight = 46 * 46 * 38 / 2.0 162 | 163 | while iters < config.max_iter: 164 | #---------------------------------------------------- train ------------------------------------------ 165 | for i, (input, heatmap, vecmap, mask, kpt) in enumerate(train_loader): 166 | 167 | learning_rate = adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy, policy_parameter=config.policy_parameter, multiple=multiple) 168 | data_time.update(time.time() - end) 169 | 170 | input = input.cuda(async=True) 171 | heatmap = heatmap.cuda(async=True) 172 | vecmap = vecmap.cuda(async=True) 173 | mask = mask.cuda(async=True) 174 | 175 | input_var = torch.autograd.Variable(input) 176 | heatmap_var = torch.autograd.Variable(heatmap) 177 | vecmap_var = torch.autograd.Variable(vecmap) 178 | mask_var = torch.autograd.Variable(mask) 179 | 180 | vec1, heat1, vec2, heat2, vec3, heat3, vec4, heat4, vec5, heat5, vec6, heat6 = model(input_var, mask_var) 181 | loss1_1 = criterion(vec1, vecmap_var) * vec_weight 182 | loss1_2 = criterion(heat1, heatmap_var) * heat_weight 183 | loss2_1 = criterion(vec2, vecmap_var) * vec_weight 184 | loss2_2 = criterion(heat2, heatmap_var) * heat_weight 185 | loss3_1 = criterion(vec3, vecmap_var) * vec_weight 186 | loss3_2 = criterion(heat3, heatmap_var) * heat_weight 187 | loss4_1 = criterion(vec4, vecmap_var) * vec_weight 188 | loss4_2 = criterion(heat4, heatmap_var) * heat_weight 189 | loss5_1 = criterion(vec5, vecmap_var) * vec_weight 190 | loss5_2 = criterion(heat5, heatmap_var) * heat_weight 191 | loss6_1 = criterion(vec6, vecmap_var) * vec_weight 192 | loss6_2 = criterion(heat6, heatmap_var) * heat_weight 193 | 194 | loss = loss1_1 + loss1_2 + loss2_1 + loss2_2 + loss3_1 + loss3_2 + loss4_1 + loss4_2 + loss5_1 + loss5_2 + loss6_1 + loss6_2 195 | 196 | losses.update(loss.data[0], input.size(0)) 197 | loss_list = [loss1_1, loss1_2, loss2_1, loss2_2, loss3_1, loss3_2, loss4_1, loss4_2, loss5_1, loss5_2, loss6_1, loss6_2] 198 | for cnt, l in enumerate(loss_list): 199 | losses_list[cnt].update(l.data[0], input.size(0)) 200 | 201 | optimizer.zero_grad() 202 | loss.backward() 203 | optimizer.step() 204 | 205 | batch_time.update(time.time() - end) 206 | end = time.time() 207 | 208 | iters += 1 209 | if iters % config.display == 0: 210 | print('Train Iteration: {0}\t' 211 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 212 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 213 | 'Learning rate = {2}\n' 214 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( 215 | iters, config.display, learning_rate, batch_time=batch_time, 216 | data_time=data_time, loss=losses)) 217 | for cnt in range(0,12,2): 218 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})\t' 219 | 'Loss{1}_2 = {loss2.val:.8f} (ave = {loss2.avg:.8f})'.format(cnt / 2 + 1, cnt / 2 + 1, loss1=losses_list[cnt], loss2=losses_list[cnt + 1])) 220 | print(time.strftime('%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n', time.localtime())) 221 | 222 | batch_time.reset() 223 | data_time.reset() 224 | losses.reset() 225 | for cnt in range(12): 226 | losses_list[cnt].reset() 227 | 228 | #------------------------------------------ val --------------------------------------------------------------------- 229 | # if config.test_interval != 0 and args.val_dir is not None and iters % config.test_interval == 0: 230 | # model.eval() 231 | # for j, (input, heatmap, vecmap, mask, kpts) in enumerate(val_loader): 232 | # imgs = input.numpy() 233 | # heatmap = heatmap.numpy() 234 | # vecmap = vecmap.numpy() 235 | # mask = mask.numpy() 236 | # 237 | # canvas_targs = np.zeros(imgs.shape) 238 | # canvas_preds = np.zeros(imgs.shape) 239 | # 240 | # for i in range(len(imgs)): 241 | # img = imgs[i] 242 | # img = img.transpose(1, 2, 0) # 368, 368, 3 243 | # img = (img + 1) / 2 * 255 244 | # 245 | # # visualize GT by kpts 246 | # # canvas_kpts = img.copy() 247 | # # vis_util.draw_kpts(canvas_kpts, kpts) 248 | # 249 | # # visualize results derived from target 250 | # all_peaks, subset, candidate = vis_util.get_pose(heatmap[i], vecmap[i]) 251 | # canvas_targ = img.copy() 252 | # vis_util.draw_pose(canvas_targ, all_peaks, subset, candidate) 253 | # canvas_targ = canvas_targ.transpose(2, 0, 1) 254 | # canvas_targs[i] = canvas_targ 255 | # 256 | # # visualize predicted results 257 | # input = input.cuda(async=True) 258 | # input_var = torch.autograd.Variable(input, volatile=True) 259 | # mask_white = np.ones((mask.shape), dtype=np.float32) 260 | # mask_white_var = torch.autograd.Variable(torch.from_numpy(mask_white).cuda()) 261 | # 262 | # vec1, heat1, vec2, heat2, vec3, heat3, vec4, heat4, vec5, heat5, vec6, heat6 \ 263 | # = model(input_var,mask_white_var) 264 | # 265 | # heat_out = heat6.data.cpu().numpy() 266 | # vec_out = vec6.data.cpu().numpy() 267 | # 268 | # for i in range(len(imgs)): 269 | # img = imgs[i] 270 | # img = img.transpose(1, 2, 0) # 368, 368, 3 271 | # img = (img + 1) / 2 * 255 272 | # all_peaks, subset, candidate = vis_util.get_pose(heat_out[i], vec_out[i]) 273 | # canvas_pred = img.copy() 274 | # vis_util.draw_pose(canvas_pred, all_peaks, subset, candidate) 275 | # canvas_pred = canvas_pred.transpose(2, 0, 1) 276 | # canvas_preds[i] = canvas_pred 277 | # 278 | # ## Log images 279 | # 280 | # imgs = { 281 | # 'target': canvas_targs, 282 | # 'predict': canvas_preds 283 | # } 284 | # for tag, images in imgs.items(): 285 | # logger.image_summary(tag, images, 0) 286 | # 287 | # break 288 | # 289 | # 290 | # model.train() 291 | 292 | if iters % 5000==0: 293 | torch.save({ 294 | 'iter': iters, 295 | 'state_dict': model.state_dict(), 296 | }, str(iters) + '.pth.tar') 297 | 298 | if iters == config.max_iter: 299 | break 300 | 301 | 302 | if __name__ == '__main__': 303 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 304 | args = parse() 305 | model = construct_model(args) 306 | train_val(model, args) -------------------------------------------------------------------------------- /vis_scripts.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import coco_loader 3 | import CocoFolder 4 | import Mytransforms 5 | import numpy as np 6 | import cv2 7 | import matplotlib 8 | import matplotlib.pyplot as plt 9 | 10 | dir = ['/home/xiangyu/data/samsung_pose_data/img_list/valminusminival2014.txt', 11 | '/home/xiangyu/data/samsung_pose_data/mask_list/valminusminival2014.txt', 12 | '/home/xiangyu/data/samsung_pose_data/json/valminusminival2014.json'] 13 | out_dir = './vis_input/' 14 | 15 | loader = torch.utils.data.DataLoader( 16 | coco_loader.coco_loader(dir, 8, 17 | Mytransforms.Compose([Mytransforms.RandomResized(), 18 | Mytransforms.RandomRotate(40), 19 | Mytransforms.RandomCrop(368), 20 | Mytransforms.RandomHorizontalFlip(), 21 | ])), 22 | batch_size=10, shuffle=False, 23 | num_workers=1, pin_memory=True) 24 | 25 | # dir = ['/home/xiangyu/data/samsung_pose_data/img_list/valminusminival2014.txt', 26 | # '/home/xiangyu/data/samsung_pose_data/mask_list/valminusminival2014.txt', 27 | # '/home/xiangyu/data/samsung_pose_data/json/valminusminival2014.json'] 28 | # out_dir = './vis_input/' 29 | # 30 | # loader = torch.utils.data.DataLoader( 31 | # CocoFolder.CocoFolder(dir, 8, 32 | # Mytransforms.Compose([Mytransforms.RandomResized(), 33 | # Mytransforms.RandomRotate(40), 34 | # Mytransforms.RandomCrop(368), 35 | # Mytransforms.RandomHorizontalFlip(), 36 | # ])), 37 | # batch_size=10, shuffle=False, 38 | # num_workers=1, pin_memory=True) 39 | 40 | 41 | for i, (input, heatmap, vecmap, mask,kpt) in enumerate(loader): 42 | imgs = input.numpy() 43 | heats = heatmap.numpy() 44 | vectors = vecmap.numpy() 45 | masks = mask.numpy() 46 | break 47 | 48 | for i in range(10): 49 | img = imgs[i, :, :, :] 50 | img = img.transpose(1, 2, 0) 51 | img *= 128 52 | img += 128 53 | 54 | #img /= 255 55 | # plt.imshow(img) 56 | # plt.show() 57 | # plt.close() 58 | 59 | mask = masks[i, :, :, :] 60 | mask = mask.transpose(1, 2, 0) 61 | mask = cv2.resize(mask, (368, 368)) 62 | mask = mask.reshape((368, 368, 1)) 63 | new_img = img * mask 64 | img = np.array(img, np.uint8) 65 | new_img = np.array(new_img, np.uint8) 66 | # plt.imshow(new_img) 67 | # plt.show() 68 | # plt.close() 69 | 70 | heatmaps = heats[i, :, :, :] 71 | heatmaps = heatmaps.transpose(1, 2, 0) 72 | heatmaps = cv2.resize(heatmaps, (368, 368)) 73 | for j in range(0, 19): 74 | heatmap = heatmaps[:, :, j] 75 | heatmap = heatmap.reshape((368, 368, 1)) 76 | heatmap *= 255 77 | heatmap = np.array(heatmap,np.uint8) 78 | heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) 79 | # heatmap = heatmap.reshape((368,368,1)) 80 | #heatmap /= 255 81 | # result = heatmap * 0.4 + img * 0.5 82 | print j 83 | # plt.imshow(img) 84 | # plt.imshow(heatmap, alpha=0.5) 85 | # plt.show() 86 | # plt.close() 87 | heatmap = cv2.addWeighted(new_img,0.5,heatmap,0.5,0) 88 | cv2.imwrite(out_dir+ '{}_heatmap_{}.jpg'.format(i,j), heatmap) 89 | 90 | vecs = vectors[i, :, :, :] 91 | vecs = vecs.transpose(1, 2, 0) 92 | vecs = cv2.resize(vecs, (368, 368)) 93 | for j in range(0, 38, 2): 94 | vec = np.abs(vecs[:, :, j]) 95 | vec += np.abs(vecs[:, :, j + 1]) 96 | vec[vec > 1] = 1 97 | vec = vec.reshape((368, 368, 1)) 98 | # vec[vec > 0] = 1 99 | vec *= 255 100 | vec = np.array(vec, np.uint8) 101 | # vec = cv2.applyColorMap(vec, cv2.COLORMAP_JET) 102 | # vec = vec.reshape((368, 368)) 103 | #vec /= 255 104 | print j 105 | vec = cv2.applyColorMap(vec, cv2.COLORMAP_JET) 106 | vec = cv2.addWeighted(new_img, 0.5, vec, 0.5, 0) 107 | cv2.imwrite(out_dir + '{}_vec_{}.jpg'.format(i, j), vec) 108 | # plt.imshow(img) 109 | # # result = vec * 0.4 + img * 0.5 110 | # plt.imshow(vec, alpha=0.5) 111 | # plt.show() 112 | # plt.close() 113 | print 'done!' --------------------------------------------------------------------------------