├── README.md └── data ├── group_diff_data_gen.py └── openpose ├── __init__.py ├── body.py ├── hand.py ├── model.py └── util.py /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |

GroupDiff: Diffusion-based Group Portrait Editing

4 | 5 |
6 | 7 | 8 | ## Dataset 9 | 10 | Our dataset is developed based on NUS LV Multiple-Human Parsing Dataset v2.0. Please download the source data from the [link](https://drive.google.com/file/d/1YVBGMru0dlwB8zu1OoErOazZoc8ISSJn/view?usp=sharing). 11 | 12 | We use [MMPose](https://github.com/open-mmlab/mmpose) to estimate the pose using the "vitpose_h" model. You can download the pose estimation results from this [link](https://drive.google.com/file/d/1_ivJ5jTv0p-gdcZ8XLvTix_ymg7KOJTL/view?usp=sharing). 13 | 14 | After downloading the dataset, unzip the file and put them under the dataset folder with the following structure: 15 | ``` 16 | ./LV-MHP-v2 17 | ├── train 18 | ├── images 19 | ├── parsing_annos 20 | └── pose_estimation 21 | └── shhq_dataset 22 | ├── images 23 | ├── parsing_annos 24 | └── pose_estimation 25 | ``` 26 | 27 | Based on the preprocessed data, we propose a comprehensive training data generation engine to synthesize paired data. The data generation codes can be found [here](https://github.com/yumingj/GroupDiff/blob/main/data/group_diff_data_gen.py). 28 | -------------------------------------------------------------------------------- /data/group_diff_data_gen.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import glob 3 | import json 4 | import math 5 | import random 6 | 7 | import cv2 8 | import numpy as np 9 | import torch 10 | import torch.utils.data as data 11 | import torchvision.transforms as transforms 12 | import torchvision.transforms.functional as F 13 | from PIL import Image, ImageDraw 14 | from scipy.ndimage import binary_dilation 15 | from torch import Tensor 16 | from torchvision.ops import masks_to_boxes 17 | 18 | from openpose import OpenposeDetector 19 | 20 | 21 | class GroupDiffDataGen(data.Dataset): 22 | 23 | def __init__(self, 24 | state, 25 | skeleton_path_prefix, 26 | add_harmonization=False, 27 | reposing_exemplar=True, 28 | use_localssd=False): 29 | self.state = state 30 | self.add_harmonization = add_harmonization 31 | self.use_localssd = use_localssd 32 | 33 | if state == 'train': 34 | data_dir = './LV-MHP-v2/train' 35 | self.data_path_list = glob.glob(f'{data_dir}/images/*.jpg') 36 | self.parsing_dir = f'{data_dir}/parsing_annos' 37 | self.pose_estimation_path = f'{data_dir}/pose_estimation' 38 | self.data_path_list.sort() 39 | else: 40 | data_dir = './LV-MHP-v2/val' 41 | self.data_path_list = glob.glob(f'{data_dir}/images/*.jpg') 42 | self.parsing_dir = f'{data_dir}/parsing_annos' 43 | self.data_path_list.sort() 44 | 45 | self.skeleton_path_prefix = skeleton_path_prefix 46 | 47 | self.resize_transform_img = transforms.Resize(size=512) 48 | self.resize_transform_mask = transforms.Resize( 49 | size=512, interpolation=transforms.InterpolationMode.NEAREST) 50 | 51 | self.resize_transform_exemplar = transforms.Resize(size=224) 52 | 53 | self.apply_openpose = OpenposeDetector() 54 | 55 | self.reposing_exemplar = reposing_exemplar 56 | 57 | self.random_color_identity_group = [[(0, 0, 255), (0, 0, 200), 58 | (0, 0, 150)], 59 | [(255, 0, 0), (200, 0, 0), 60 | (150, 0, 0)], 61 | [(0, 255, 0), (0, 200, 0), 62 | (0, 200, 0)], 63 | [(255, 0, 255), (200, 0, 200), 64 | (150, 0, 150)], 65 | [(0, 255, 255), (0, 200, 200), 66 | (0, 150, 150)]] 67 | 68 | def transform_exemplar(self): 69 | transform_list = [] 70 | transform_list += [ 71 | transforms.RandomAffine( 72 | degrees=20, 73 | translate=(0.1, 0.1), 74 | scale=(0.9, 1.10), 75 | fill=255, 76 | interpolation=transforms.InterpolationMode.BILINEAR) 77 | ] 78 | if self.add_harmonization: 79 | transform_list += [ 80 | transforms.ColorJitter( 81 | brightness=(0.9, 1.1), 82 | contrast=(0.9, 1.1), 83 | saturation=(0.8, 1.3)) 84 | ] 85 | transform_list += [transforms.Resize(size=512)] 86 | # transform_list += [transforms.Normalize((0.48145466, 0.4578275, 0.40821073), 87 | # (0.26862954, 0.26130258, 0.27577711))] 88 | 89 | return transforms.Compose(transform_list) 90 | 91 | def get_candidate_parsing_list_for_exemplar(self, inpaint_mask, 92 | seleted_idx, 93 | instance_parsing_list): 94 | candidate_parsing_list = [] 95 | idx_in_candidate_list = 0 96 | count = 0 97 | for idx, instance_parsing in enumerate(instance_parsing_list): 98 | mask_binary = np.zeros( 99 | (inpaint_mask.shape[0], inpaint_mask.shape[1]), dtype=np.uint8) 100 | mask_binary[instance_parsing > 0] = 1 101 | 102 | if np.sum(mask_binary * inpaint_mask) == 0: 103 | continue 104 | 105 | candidate_parsing_list.append(instance_parsing) 106 | 107 | if idx == seleted_idx: 108 | idx_in_candidate_list = count 109 | 110 | count += 1 111 | 112 | return candidate_parsing_list, idx_in_candidate_list 113 | 114 | 115 | 116 | def warp_parsing(self, parsing, rect1, rect2): 117 | shape1 = parsing.shape 118 | h = shape1[0] 119 | w = shape1[1] 120 | 121 | rect1 = np.array(rect1, dtype=np.float32) 122 | rect2 = np.array(rect2, dtype=np.float32) 123 | 124 | # ===== homography 125 | H = cv2.getPerspectiveTransform(src=rect1, dst=rect2) 126 | # print(H) 127 | # H_inverse = np.linalg.inv(H) 128 | 129 | # img_warped = cv2.warpPerspective(src=img, M=H_inverse, dsize=(w, h)) 130 | parsing_warped = cv2.warpPerspective( 131 | src=parsing, M=H, dsize=(w, h), flags=cv2.INTER_NEAREST) 132 | 133 | return parsing_warped 134 | 135 | def rotate_whole_arms(self, ori_point, point_a, point_b, alpha): 136 | x_0, y_0 = ori_point 137 | x_a, y_a = point_a 138 | x_b, y_b = point_b 139 | 140 | x_a = x_a - x_0 141 | y_a = y_a - y_0 142 | 143 | x_b = x_b - x_0 144 | y_b = y_b - y_0 145 | 146 | x_a_prime = x_a * math.cos(alpha) - y_a * math.sin(alpha) 147 | y_a_prime = x_a * math.sin(alpha) + y_a * math.cos(alpha) 148 | 149 | x_b_dif = x_b - x_a 150 | y_b_dif = y_b - y_a 151 | 152 | x_b_prime = x_b_dif * math.cos(alpha) - y_b_dif * math.sin( 153 | alpha) + x_a_prime + x_0 154 | y_b_prime = x_b_dif * math.sin(alpha) + y_b_dif * math.cos( 155 | alpha) + y_a_prime + y_0 156 | 157 | return [x_a_prime + x_0, y_a_prime + y_0], [x_b_prime, y_b_prime] 158 | 159 | def rotate_part_arms(self, ori_point, point_a, alpha): 160 | x_0, y_0 = ori_point 161 | x_a, y_a = point_a 162 | 163 | x_a = x_a - x_0 164 | y_a = y_a - y_0 165 | 166 | x_a_prime = x_a * math.cos(alpha) - y_a * math.sin(alpha) 167 | y_a_prime = x_a * math.sin(alpha) + y_a * math.cos(alpha) 168 | 169 | return [x_a_prime + x_0, y_a_prime + y_0] 170 | 171 | def randomly_change_pose(self, ori_coordinates, selected_person_idx): 172 | new_coordinates = copy.deepcopy(ori_coordinates) 173 | candidate = ori_coordinates['candidate'] 174 | subset = ori_coordinates['subset'] 175 | 176 | augmentation_type = random.uniform(0, 1) 177 | try: 178 | index_2 = int(subset[selected_person_idx][2]) 179 | index_3 = int(subset[selected_person_idx][3]) 180 | index_4 = int(subset[selected_person_idx][4]) 181 | index_5 = int(subset[selected_person_idx][5]) 182 | index_6 = int(subset[selected_person_idx][6]) 183 | index_7 = int(subset[selected_person_idx][7]) 184 | except: 185 | return new_coordinates 186 | 187 | if (index_2 == -1 or index_3 == -1 188 | or index_4 == -1) and (index_3 == -1 or index_4 == -1) and ( 189 | index_5 == -1 or index_6 == -1 190 | or index_7 == -1) and (index_6 == -1 or index_7 == -1): 191 | return new_coordinates 192 | 193 | augmentation_type = random.uniform(0, 1) 194 | trial_num = 0 195 | while (trial_num < 5): 196 | if augmentation_type < 0.25: 197 | if index_2 == -1 or index_3 == -1 or index_4 == -1: 198 | trial_num += 1 199 | augmentation_type = random.uniform(0, 1) 200 | continue 201 | # left arms 202 | # change from the body_idx 2 203 | changed_x3, changed_x4 = self.rotate_whole_arms( 204 | candidate[int(subset[selected_person_idx][2])][0:2], 205 | candidate[int(subset[selected_person_idx][3])][0:2], 206 | candidate[int(subset[selected_person_idx][4])][0:2], 207 | 2 * math.pi * random.random()) 208 | 209 | new_coordinates['candidate'][int( 210 | subset[selected_person_idx][3])][0:2] = changed_x3 211 | new_coordinates['candidate'][int( 212 | subset[selected_person_idx][4])][0:2] = changed_x4 213 | elif augmentation_type < 0.5: 214 | # left arms 215 | # change from the body_idx 3 216 | if index_3 == -1 or index_4 == -1: 217 | trial_num += 1 218 | augmentation_type = random.uniform(0, 1) 219 | continue 220 | changed_x4 = self.rotate_part_arms( 221 | candidate[int(subset[selected_person_idx][3])][0:2], 222 | candidate[int(subset[selected_person_idx][4])][0:2], 223 | 2 * math.pi * random.random()) 224 | new_coordinates['candidate'][int( 225 | subset[selected_person_idx][4])][0:2] = changed_x4 226 | elif augmentation_type < 0.75: 227 | # right arms 228 | # change from the body_idx 5 229 | if index_5 == -1 or index_6 == -1 or index_7 == -1: 230 | trial_num += 1 231 | augmentation_type = random.uniform(0, 1) 232 | continue 233 | changed_x6, changed_x7 = self.rotate_whole_arms( 234 | candidate[int(subset[selected_person_idx][5])][0:2], 235 | candidate[int(subset[selected_person_idx][6])][0:2], 236 | candidate[int(subset[selected_person_idx][7])][0:2], 237 | 2 * math.pi * random.random()) 238 | 239 | new_coordinates['candidate'][int( 240 | subset[selected_person_idx][6])][0:2] = changed_x6 241 | new_coordinates['candidate'][int( 242 | subset[selected_person_idx][7])][0:2] = changed_x7 243 | else: 244 | # right arms 245 | # change from the body_idx 5 246 | if index_6 == -1 or index_7 == -1: 247 | trial_num += 1 248 | augmentation_type = random.uniform(0, 1) 249 | continue 250 | changed_x7 = self.rotate_part_arms( 251 | candidate[int(subset[selected_person_idx][6])][0:2], 252 | candidate[int(subset[selected_person_idx][7])][0:2], 253 | 2 * math.pi * random.random()) 254 | new_coordinates['candidate'][int( 255 | subset[selected_person_idx][7])][0:2] = changed_x7 256 | 257 | break 258 | 259 | return new_coordinates 260 | 261 | 262 | def reposing_exemplar_img(self, exemplar_img, parsing_map): 263 | _, ori_coordinates = self.apply_openpose(exemplar_img) 264 | 265 | if self.reposing_exemplar: 266 | selected_person_idx = 0 267 | new_coordinates = self.randomly_change_pose( 268 | ori_coordinates, selected_person_idx) 269 | 270 | connected_line_list = [[2, 3], [3, 4], [5, 6], [6, 7]] 271 | 272 | new_exemplar_img = exemplar_img.copy() 273 | for connected_line in connected_line_list: 274 | try: 275 | index = int( 276 | ori_coordinates['subset'][0][connected_line[0]]) 277 | except: 278 | continue 279 | if index == -1: 280 | continue 281 | point1 = ori_coordinates['candidate'][index][0:2] 282 | 283 | try: 284 | index = int( 285 | ori_coordinates['subset'][0][connected_line[1]]) 286 | except: 287 | continue 288 | if index == -1: 289 | continue 290 | point2 = ori_coordinates['candidate'][index][0:2] 291 | 292 | try: 293 | index = int( 294 | new_coordinates['subset'][0][connected_line[0]]) 295 | except: 296 | continue 297 | if index == -1: 298 | continue 299 | new_point1 = new_coordinates['candidate'][index][0:2] 300 | 301 | try: 302 | index = int( 303 | new_coordinates['subset'][0][connected_line[1]]) 304 | except: 305 | continue 306 | 307 | if index == -1: 308 | continue 309 | new_point2 = new_coordinates['candidate'][index][0:2] 310 | 311 | if (point1 == new_point1) and (point2 == new_point2): 312 | continue 313 | 314 | # if the arm, extend the point2 315 | if (connected_line == [3, 4]) or (connected_line == [6, 7]): 316 | # import pdb 317 | # pdb.set_trace() 318 | point2[0] = point2[0] + 0.6 * (point2[0] - point1[0]) 319 | point2[1] = point2[1] + 0.6 * (point2[1] - point1[1]) 320 | 321 | length = ((point1[0] - point2[0])**2 + 322 | (point1[1] - point2[1])**2)**0.5 323 | 324 | ori_rec_points = self.find_parallel_points( 325 | point1, point2, 0.25 * length) 326 | 327 | # if the arm, extend the point2 328 | if (connected_line == [3, 4]) or (connected_line == [6, 7]): 329 | # import pdb 330 | # pdb.set_trace() 331 | new_point2[0] = new_point2[0] + 0.6 * ( 332 | new_point2[0] - new_point1[0]) 333 | new_point2[1] = new_point2[1] + 0.6 * ( 334 | new_point2[1] - new_point1[1]) 335 | 336 | length = ((new_point1[0] - new_point2[0])**2 + 337 | (new_point1[1] - new_point2[1])**2)**0.5 338 | new_rec_points = self.find_parallel_points( 339 | new_point1, new_point2, 0.25 * length) 340 | 341 | warped_exemplar = self.warp_img(exemplar_img, ori_rec_points, 342 | new_rec_points) 343 | 344 | masked_area = np.zeros_like(exemplar_img[:, :, 0]) 345 | cv2.fillPoly(masked_area, [np.array(ori_rec_points)], 255) 346 | masked_area = masked_area * (parsing_map > 0) 347 | 348 | new_exemplar_img[masked_area == 255] = 255 349 | 350 | warped_parsing = self.warp_parsing(parsing_map, ori_rec_points, 351 | new_rec_points) 352 | 353 | masked_area = np.zeros_like(exemplar_img[:, :, 0]) 354 | cv2.fillPoly(masked_area, [np.array(new_rec_points)], 255) 355 | masked_area = masked_area * (warped_parsing > 0) 356 | 357 | new_exemplar_img[masked_area == 255] = warped_exemplar[ 358 | masked_area == 255] 359 | 360 | return new_exemplar_img, new_coordinates 361 | else: 362 | return exemplar_img, ori_coordinates 363 | 364 | def warp_img(self, img, rect1, rect2): 365 | shape1 = img.shape 366 | h = shape1[0] 367 | w = shape1[1] 368 | 369 | rect1 = np.array(rect1, dtype=np.float32) 370 | rect2 = np.array(rect2, dtype=np.float32) 371 | 372 | # ===== homography 373 | H = cv2.getPerspectiveTransform(src=rect1, dst=rect2) 374 | # print(H) 375 | # H_inverse = np.linalg.inv(H) 376 | 377 | # img_warped = cv2.warpPerspective(src=img, M=H_inverse, dsize=(w, h)) 378 | img_warped = cv2.warpPerspective(src=img, M=H, dsize=(w, h)) 379 | 380 | return img_warped 381 | 382 | def find_parallel_points(self, point1, point2, distance): 383 | # Calculate slope and intercept of the line passing through the two points 384 | # slope = (point2[1] - point1[1]) / (point2[0] - point1[0]) 385 | # intercept = point1[1] - slope * point1[0] 386 | 387 | # Calculate the angle of the line 388 | angle = np.arctan2(point2[1] - point1[1], point2[0] - point1[0]) 389 | 390 | # Calculate new points parallel to the line 391 | parallel_points = [] 392 | for direction in [-1, 393 | 1]: # Two directions (left and right of the line) 394 | new_x = point1[0] + direction * distance * np.sin(angle) 395 | new_y = point1[1] - direction * distance * np.cos(angle) 396 | parallel_points.append((int(new_x), int(new_y))) 397 | 398 | for direction in [1, 399 | -1]: # Two directions (left and right of the line) 400 | new_x = point2[0] + direction * distance * np.sin(angle) 401 | new_y = point2[1] - direction * distance * np.cos(angle) 402 | parallel_points.append((int(new_x), int(new_y))) 403 | 404 | return parallel_points 405 | 406 | def read_img(self, img_path): 407 | 408 | img = np.array(Image.open(img_path).convert('RGB')) 409 | 410 | return img 411 | 412 | def read_img_exemplar_mask(self, img, candidate_parsing_list): 413 | 414 | img_exemplar_list = [] 415 | parsing_exemplar_list = [] 416 | for parsing in candidate_parsing_list: 417 | mask_binary = np.zeros((img.shape[0], img.shape[1]), 418 | dtype=np.uint8) 419 | mask_binary[parsing > 0] = 1 420 | 421 | img_exemplar = img.copy() 422 | 423 | img_exemplar[mask_binary == 0] = 255. 424 | inner_dilated_aug = random.uniform(0, 1) 425 | if inner_dilated_aug < 0.2: 426 | structuring_element = np.ones((5, 5), dtype=bool) 427 | dilated_mask_binary = binary_dilation( 428 | 1 - mask_binary, structure=structuring_element) 429 | img_exemplar[dilated_mask_binary == 1] = 255. 430 | 431 | mask_tensor = torch.from_numpy(mask_binary).unsqueeze(0) 432 | 433 | obj_ids = torch.unique(mask_tensor) 434 | obj_ids = obj_ids[1:] 435 | masks = mask_tensor == obj_ids[:, None, None] 436 | 437 | boxes = masks_to_boxes(masks) 438 | 439 | h, w = mask_binary.shape 440 | 441 | # make the bounding box slightly larger 442 | enlarge_ratio = 0.1 443 | enlarge_margin_h = int((boxes[0][3] - boxes[0][1]) * enlarge_ratio) 444 | enlarge_margin_w = int((boxes[0][2] - boxes[0][0]) * enlarge_ratio) 445 | 446 | bbox_y1, bbox_y2 = max(0, 447 | int(boxes[0][1]) - enlarge_margin_h), min( 448 | h, 449 | int(boxes[0][3]) + enlarge_margin_h) 450 | bbox_x1, bbox_x2 = max(0, 451 | int(boxes[0][0]) - enlarge_margin_w), min( 452 | w, 453 | int(boxes[0][2]) + enlarge_margin_w) 454 | img_exemplar = img_exemplar[bbox_y1:bbox_y2, bbox_x1:bbox_x2] 455 | img_exemplar_list.append(img_exemplar) 456 | parsing_exemplar_list.append(parsing[bbox_y1:bbox_y2, 457 | bbox_x1:bbox_x2]) 458 | 459 | return img_exemplar_list, parsing_exemplar_list 460 | 461 | def transform_exemplar_and_parsing(self, exemplar_img, parsing): 462 | 463 | random_affine_transformation = transforms.RandomAffine( 464 | degrees=20, 465 | translate=(0.1, 0.1), 466 | scale=(0.9, 1.10), 467 | fill=255, 468 | interpolation=transforms.InterpolationMode.BILINEAR) 469 | resize_transform_img = transforms.Resize(size=512) 470 | resize_transform_parsing = transforms.Resize( 471 | size=512, interpolation=transforms.InterpolationMode.NEAREST) 472 | 473 | channels, height, width = exemplar_img.size() 474 | 475 | ret = random_affine_transformation.get_params( 476 | random_affine_transformation.degrees, 477 | random_affine_transformation.translate, 478 | random_affine_transformation.scale, 479 | random_affine_transformation.shear, [width, height]) 480 | 481 | fill = 255 482 | if isinstance(exemplar_img, Tensor): 483 | if isinstance(fill, (int, float)): 484 | fill = [float(fill)] * channels 485 | else: 486 | fill = [float(f) for f in fill] 487 | 488 | exemplar_img = F.affine( 489 | exemplar_img, 490 | *ret, 491 | interpolation=transforms.InterpolationMode.BILINEAR, 492 | fill=fill, 493 | center=random_affine_transformation.center) 494 | 495 | channels, _, _ = parsing.size() 496 | fill = 0 497 | if isinstance(parsing, Tensor): 498 | if isinstance(fill, (int, float)): 499 | fill = [float(fill)] * channels 500 | else: 501 | fill = [float(f) for f in fill] 502 | 503 | parsing = F.affine( 504 | parsing, 505 | *ret, 506 | interpolation=transforms.InterpolationMode.NEAREST, 507 | fill=fill, 508 | center=random_affine_transformation.center) 509 | 510 | exemplar_img = resize_transform_img(exemplar_img) 511 | parsing = resize_transform_parsing(parsing) 512 | 513 | return exemplar_img, parsing 514 | 515 | def random_brush_top_down(self, skeleton_mask, ori_rec_points): 516 | mask = Image.new('L', (skeleton_mask.shape[1], skeleton_mask.shape[0]), 0) 517 | 518 | num_points = int(np.random.uniform(8, 15)) 519 | 520 | sampled_points_top = np.linspace(ori_rec_points[0], ori_rec_points[1], num_points) 521 | sampled_points_top = [(int(x), int(y)) for x, y in sampled_points_top] 522 | 523 | sampled_points_down = np.linspace(ori_rec_points[3], ori_rec_points[2], num_points) 524 | sampled_points_down = [(int(x), int(y)) for x, y in sampled_points_down] 525 | 526 | vertex = [] 527 | for top_point, down_point in zip(sampled_points_top, sampled_points_down): 528 | random_move = np.random.uniform(-0.6, 0.6) 529 | sampled_x, sampled_y = top_point 530 | sampled_x = sampled_x + int(random_move * (sampled_points_top[1][0] - sampled_points_top[0][0])) 531 | sampled_y = sampled_y - int(np.random.uniform(0, 1.0) * (sampled_points_down[1][1] - sampled_points_down[0][1])) 532 | vertex.append((sampled_x, sampled_y)) 533 | 534 | sampled_x, sampled_y = down_point 535 | random_move = np.random.uniform(-0.6, 0.6) 536 | sampled_x = sampled_x + int(random_move * (sampled_points_top[1][0] - sampled_points_top[0][0])) 537 | sampled_y = sampled_y + int(np.random.uniform(0, 1.0) * (sampled_points_down[1][1] - sampled_points_down[0][1])) 538 | vertex.append((sampled_x, sampled_y)) 539 | 540 | draw = ImageDraw.Draw(mask) 541 | min_width = 12 542 | max_width = 48 543 | width = int(np.random.uniform(min_width, max_width)) 544 | draw.line(vertex, fill=1, width=width) 545 | for v in vertex: 546 | draw.ellipse((v[0] - width//2, 547 | v[1] - width//2, 548 | v[0] + width//2, 549 | v[1] + width//2), 550 | fill=1) 551 | 552 | mask = np.asarray(mask, np.uint8) * 255 553 | 554 | return mask 555 | 556 | def load_arm_hand_masks(self, skeleton_mask, selected_person_bbox, 557 | instance_parsing_list): 558 | area_list = [] 559 | for instance_parsing in instance_parsing_list: 560 | mask_binary = np.zeros((instance_parsing.shape[0], instance_parsing.shape[1]), dtype=np.uint8) 561 | mask_binary[instance_parsing > 0] = 1 562 | area = np.sum(selected_person_bbox * mask_binary) 563 | area_list.append(area) 564 | 565 | seleted_idx = np.argmax(area_list) 566 | 567 | selected_parsing = instance_parsing_list[seleted_idx] 568 | 569 | temp_mask = np.zeros_like(selected_parsing) 570 | for value in [5, 7]: 571 | temp_mask[selected_parsing == value] = 1 572 | if np.sum(temp_mask) != 0: 573 | kernel_width = 28 574 | kernel_height = 45 575 | 576 | kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_width, kernel_height)) 577 | dilated_mask = cv2.dilate(temp_mask, kernel) 578 | 579 | skeleton_mask[skeleton_mask == 0] = dilated_mask[skeleton_mask == 0] 580 | 581 | temp_mask = np.zeros_like(selected_parsing) 582 | for value in [6, 8]: 583 | temp_mask[selected_parsing == value] = 1 584 | if np.sum(temp_mask) != 0: 585 | kernel_width = 28 586 | kernel_height = 45 587 | 588 | kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_width, kernel_height)) 589 | dilated_mask = cv2.dilate(temp_mask, kernel) 590 | 591 | skeleton_mask[skeleton_mask == 0] = dilated_mask[skeleton_mask == 0] 592 | 593 | return skeleton_mask, seleted_idx 594 | 595 | def random_brush_down_top(self, skeleton_mask, ori_rec_points): 596 | mask = Image.new('L', (skeleton_mask.shape[1], skeleton_mask.shape[0]), 0) 597 | 598 | num_points = int(np.random.uniform(8, 15)) 599 | 600 | sampled_points_top = np.linspace(ori_rec_points[0], ori_rec_points[1], num_points) 601 | sampled_points_top = [(int(x), int(y)) for x, y in sampled_points_top] 602 | 603 | sampled_points_down = np.linspace(ori_rec_points[3], ori_rec_points[2], num_points) 604 | sampled_points_down = [(int(x), int(y)) for x, y in sampled_points_down] 605 | 606 | vertex = [] 607 | for top_point, down_point in zip(sampled_points_down, sampled_points_top): 608 | random_move = np.random.uniform(-0.6, 0.6) 609 | sampled_x, sampled_y = top_point 610 | sampled_x = sampled_x + int(random_move * (sampled_points_top[1][0] - sampled_points_top[0][0])) 611 | sampled_y = sampled_y - int(np.random.uniform(0, 1.0) * (sampled_points_down[1][1] - sampled_points_down[0][1])) 612 | vertex.append((sampled_x, sampled_y)) 613 | 614 | sampled_x, sampled_y = down_point 615 | random_move = np.random.uniform(-0.6, 0.6) 616 | sampled_x = sampled_x + int(random_move * (sampled_points_top[1][0] - sampled_points_top[0][0])) 617 | sampled_y = sampled_y + int(np.random.uniform(0, 1.0) * (sampled_points_down[1][1] - sampled_points_down[0][1])) 618 | vertex.append((sampled_x, sampled_y)) 619 | 620 | draw = ImageDraw.Draw(mask) 621 | min_width = 12 622 | max_width = 48 623 | width = int(np.random.uniform(min_width, max_width)) 624 | draw.line(vertex, fill=1, width=width) 625 | for v in vertex: 626 | draw.ellipse((v[0] - width//2, 627 | v[1] - width//2, 628 | v[0] + width//2, 629 | v[1] + width//2), 630 | fill=1) 631 | 632 | mask = np.asarray(mask, np.uint8) * 255 633 | 634 | # import pdb 635 | # pdb.set_trace() 636 | 637 | return mask 638 | 639 | def random_brush_left_right(self, skeleton_mask, ori_rec_points): 640 | mask = Image.new('L', (skeleton_mask.shape[1], skeleton_mask.shape[0]), 0) 641 | 642 | num_points = int(np.random.uniform(8, 15)) 643 | 644 | sampled_points_top = np.linspace(ori_rec_points[3], ori_rec_points[0], num_points) 645 | sampled_points_top = [(int(x), int(y)) for x, y in sampled_points_top] 646 | 647 | sampled_points_down = np.linspace(ori_rec_points[2], ori_rec_points[1], num_points) 648 | sampled_points_down = [(int(x), int(y)) for x, y in sampled_points_down] 649 | 650 | vertex = [] 651 | for top_point, down_point in zip(sampled_points_down, sampled_points_top): 652 | random_move = np.random.uniform(-0.6, 0.6) 653 | sampled_x, sampled_y = top_point 654 | sampled_x = sampled_x - int(np.random.uniform(0, 1.0) * (sampled_points_top[1][0] - sampled_points_top[0][0])) 655 | sampled_y = sampled_y + int(random_move * (sampled_points_down[1][1] - sampled_points_down[0][1])) 656 | vertex.append((sampled_x, sampled_y)) 657 | 658 | sampled_x, sampled_y = down_point 659 | random_move = np.random.uniform(-0.6, 0.6) 660 | sampled_x = sampled_x + int(np.random.uniform(0, 1.0) * (sampled_points_top[1][0] - sampled_points_top[0][0])) 661 | sampled_y = sampled_y + int(random_move * (sampled_points_down[1][1] - sampled_points_down[0][1])) 662 | vertex.append((sampled_x, sampled_y)) 663 | 664 | draw = ImageDraw.Draw(mask) 665 | min_width = 12 666 | max_width = 48 667 | width = int(np.random.uniform(min_width, max_width)) 668 | draw.line(vertex, fill=1, width=width) 669 | for v in vertex: 670 | draw.ellipse((v[0] - width//2, 671 | v[1] - width//2, 672 | v[0] + width//2, 673 | v[1] + width//2), 674 | fill=1) 675 | 676 | mask = np.asarray(mask, np.uint8) * 255 677 | 678 | # import pdb 679 | # pdb.set_trace() 680 | 681 | return mask 682 | 683 | def random_brush_right_left(self, skeleton_mask, ori_rec_points): 684 | mask = Image.new('L', (skeleton_mask.shape[1], skeleton_mask.shape[0]), 0) 685 | 686 | num_points = int(np.random.uniform(8, 15)) 687 | 688 | sampled_points_top = np.linspace(ori_rec_points[3], ori_rec_points[0], num_points) 689 | sampled_points_top = [(int(x), int(y)) for x, y in sampled_points_top] 690 | 691 | sampled_points_down = np.linspace(ori_rec_points[2], ori_rec_points[1], num_points) 692 | sampled_points_down = [(int(x), int(y)) for x, y in sampled_points_down] 693 | 694 | vertex = [] 695 | for top_point, down_point in zip(sampled_points_top, sampled_points_down): 696 | random_move = np.random.uniform(-0.6, 0.6) 697 | sampled_x, sampled_y = top_point 698 | sampled_x = sampled_x + int(np.random.uniform(0, 1.0) * (sampled_points_top[1][0] - sampled_points_top[0][0])) 699 | sampled_y = sampled_y + int(random_move * (sampled_points_down[1][1] - sampled_points_down[0][1])) 700 | vertex.append((sampled_x, sampled_y)) 701 | 702 | sampled_x, sampled_y = down_point 703 | random_move = np.random.uniform(-0.6, 0.6) 704 | sampled_x = sampled_x - int(np.random.uniform(0, 1.0) * (sampled_points_top[1][0] - sampled_points_top[0][0])) 705 | sampled_y = sampled_y + int(random_move * (sampled_points_down[1][1] - sampled_points_down[0][1])) 706 | vertex.append((sampled_x, sampled_y)) 707 | 708 | draw = ImageDraw.Draw(mask) 709 | min_width = 12 710 | max_width = 48 711 | width = int(np.random.uniform(min_width, max_width)) 712 | draw.line(vertex, fill=1, width=width) 713 | for v in vertex: 714 | draw.ellipse((v[0] - width//2, 715 | v[1] - width//2, 716 | v[0] + width//2, 717 | v[1] + width//2), 718 | fill=1) 719 | 720 | mask = np.asarray(mask, np.uint8) * 255 721 | 722 | return mask 723 | 724 | def random_brush_augment(self, skeleton_mask, ori_rec_points): 725 | 726 | brush_direction_type = random.uniform(0, 1) 727 | if brush_direction_type < 0.25: 728 | brush_mask = self.random_brush_top_down(skeleton_mask, ori_rec_points) 729 | elif brush_direction_type < 0.5: 730 | brush_mask = self.random_brush_down_top(skeleton_mask, ori_rec_points) 731 | elif brush_direction_type < 0.75: 732 | brush_mask = self.random_brush_left_right(skeleton_mask, ori_rec_points) 733 | else: 734 | brush_mask = self.random_brush_right_left(skeleton_mask, ori_rec_points) 735 | 736 | skeleton_mask[skeleton_mask == 0] = brush_mask[skeleton_mask == 0] 737 | return skeleton_mask 738 | 739 | def compute_diff_mask(self, ori_coordinates, new_coordinates, 740 | skeleton_mask): 741 | 742 | skeleton_mask = skeleton_mask * 255 743 | 744 | diff_skeleton_list = [] 745 | for subset_idx, subset in enumerate(ori_coordinates['subset']): 746 | for skeleton_idx in range(18): 747 | if ori_coordinates['candidate'][ 748 | ori_coordinates['subset'][subset_idx] 749 | [skeleton_idx]] != new_coordinates['candidate'][ 750 | new_coordinates['subset'][subset_idx][skeleton_idx]]: 751 | diff_skeleton_list.append(f'{subset_idx}_{skeleton_idx}') 752 | 753 | limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ 754 | [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ 755 | [1, 16], [16, 18], [3, 17], [6, 18]] 756 | 757 | for diff_skeleton in diff_skeleton_list: 758 | subset_idx, skeleton_idx = diff_skeleton.split('_') 759 | subset_idx = int(subset_idx) 760 | for limb in limbSeq: 761 | if int(skeleton_idx) + 1 in limb: 762 | index_point_1 = int( 763 | ori_coordinates['subset'][subset_idx][limb[0] - 1]) 764 | index_point_2 = int( 765 | ori_coordinates['subset'][subset_idx][limb[1] - 1]) 766 | 767 | if index_point_1 != -1 and index_point_2 != -1: 768 | point1 = ori_coordinates['candidate'][index_point_1][ 769 | 0:2] 770 | point2 = ori_coordinates['candidate'][index_point_2][ 771 | 0:2] 772 | 773 | point2[0] = point2[0] + 0.7 * (point2[0] - point1[0]) 774 | point2[1] = point2[1] + 0.7 * (point2[1] - point1[1]) 775 | 776 | length = ((point1[0] - point2[0])**2 + 777 | (point1[1] - point2[1])**2)**0.5 778 | 779 | length_ratio = random.uniform(0.20, 0.40) 780 | ori_rec_points = self.find_parallel_points( 781 | point1, point2, length_ratio * length) 782 | 783 | cv2.fillPoly(skeleton_mask, [np.array(ori_rec_points)], 784 | 255) 785 | skeleton_mask = self.random_brush_augment(skeleton_mask, ori_rec_points) 786 | 787 | index_point_1 = int( 788 | new_coordinates['subset'][subset_idx][limb[0] - 1]) 789 | index_point_2 = int( 790 | new_coordinates['subset'][subset_idx][limb[1] - 1]) 791 | 792 | if index_point_1 != -1 and index_point_2 != -1: 793 | point1 = new_coordinates['candidate'][index_point_1][ 794 | 0:2] 795 | point2 = new_coordinates['candidate'][index_point_2][ 796 | 0:2] 797 | 798 | point2[0] = point2[0] + 0.7 * (point2[0] - point1[0]) 799 | point2[1] = point2[1] + 0.7 * (point2[1] - point1[1]) 800 | 801 | length = ((point1[0] - point2[0])**2 + 802 | (point1[1] - point2[1])**2)**0.5 803 | 804 | length_ratio = random.uniform(0.20, 0.40) 805 | ori_rec_points = self.find_parallel_points( 806 | point1, point2, length_ratio * length) 807 | 808 | cv2.fillPoly(skeleton_mask, [np.array(ori_rec_points)], 809 | 255) 810 | skeleton_mask = self.random_brush_augment(skeleton_mask, ori_rec_points) 811 | skeleton_mask = skeleton_mask / 255 812 | 813 | return skeleton_mask 814 | 815 | def get_id_feature(self, candidate_parsing_list): 816 | 817 | id_feature_list = [] 818 | for instance_parsing in candidate_parsing_list: 819 | bbox_mask = np.zeros( 820 | (instance_parsing.shape[0], instance_parsing.shape[1]), 821 | dtype=np.uint8) 822 | mask_binary = np.zeros( 823 | (instance_parsing.shape[0], instance_parsing.shape[1]), 824 | dtype=np.uint8) 825 | mask_binary[instance_parsing > 0] = 1 826 | 827 | mask_tensor = torch.from_numpy(mask_binary).unsqueeze(0) 828 | 829 | obj_ids = torch.unique(mask_tensor) 830 | obj_ids = obj_ids[1:] 831 | masks = mask_tensor == obj_ids[:, None, None] 832 | 833 | boxes = masks_to_boxes(masks) 834 | 835 | h, w = mask_binary.shape 836 | 837 | enlarge_ratio = 0.1 838 | enlarge_margin_h = int((boxes[0][3] - boxes[0][1]) * enlarge_ratio) 839 | enlarge_margin_w = int((boxes[0][2] - boxes[0][0]) * enlarge_ratio) 840 | 841 | bbox_y1, bbox_y2 = max(0, 842 | int(boxes[0][1]) - enlarge_margin_h), min( 843 | h, 844 | int(boxes[0][3]) + enlarge_margin_h) 845 | bbox_x1, bbox_x2 = max(0, 846 | int(boxes[0][0]) - enlarge_margin_w), min( 847 | w, 848 | int(boxes[0][2]) + enlarge_margin_w) 849 | bbox_mask[bbox_y1:bbox_y2, bbox_x1:bbox_x2] = 1 850 | id_feature_list.append(bbox_mask) 851 | 852 | return id_feature_list 853 | 854 | def generate_skeletion_mask(self, coordinates, skeleton_map): 855 | skeleton_mask = np.zeros( 856 | (skeleton_map.shape[0], skeleton_map.shape[1]), dtype=np.uint8) 857 | 858 | candidate = coordinates['candidate'] 859 | subset = coordinates['subset'] 860 | 861 | selected_person_idx = random.choice(range(len(subset))) 862 | 863 | skeleton_joint_list = [] 864 | random_type = random.uniform(0, 1) 865 | if random_type < 0.35: 866 | skeleton_joint_list.append([2, 3]) 867 | skeleton_joint_list.append([3, 4]) 868 | elif random_type < 0.7: 869 | skeleton_joint_list.append([5, 6]) 870 | skeleton_joint_list.append([6, 7]) 871 | else: 872 | skeleton_joint_list.append([2, 3]) 873 | skeleton_joint_list.append([3, 4]) 874 | skeleton_joint_list.append([5, 6]) 875 | skeleton_joint_list.append([6, 7]) 876 | 877 | # left and right arms 878 | for skeleton_joint in skeleton_joint_list: 879 | index_point_1 = int(subset[selected_person_idx][skeleton_joint[0]]) 880 | index_point_2 = int(subset[selected_person_idx][skeleton_joint[1]]) 881 | 882 | if index_point_1 != -1 and index_point_2 != -1: 883 | point1 = candidate[index_point_1][0:2] 884 | point2 = candidate[index_point_2][0:2] 885 | 886 | point2[0] = point2[0] + 0.7 * (point2[0] - point1[0]) 887 | point2[1] = point2[1] + 0.7 * (point2[1] - point1[1]) 888 | 889 | length = ((point1[0] - point2[0])**2 + 890 | (point1[1] - point2[1])**2)**0.5 891 | 892 | length_ratio = random.uniform(0.20, 0.40) 893 | ori_rec_points = self.find_parallel_points( 894 | point1, point2, length_ratio * length) 895 | 896 | cv2.fillPoly(skeleton_mask, [np.array(ori_rec_points)], 255) 897 | 898 | # import pdb 899 | # pdb.set_trace() 900 | # Image.fromarray(skeleton_mask).save('temp_skeleton_mask.png') 901 | skeleton_mask = self.random_brush_augment(skeleton_mask, ori_rec_points) 902 | # import pdb 903 | # pdb.set_trace() 904 | # Image.fromarray(skeleton_mask).save('temp_skeleton_mask.png') 905 | 906 | skeleton_mask = skeleton_mask / 255 907 | 908 | # selected person bbox 909 | selected_person_bbox = np.zeros( 910 | (skeleton_map.shape[0], skeleton_map.shape[1]), dtype=np.uint8) 911 | x_list = [] 912 | y_list = [] 913 | 914 | for i in range(18): 915 | index = int(subset[selected_person_idx][i]) 916 | if index == -1: 917 | continue 918 | x, y = candidate[index][0:2] 919 | x_list.append(x) 920 | y_list.append(y) 921 | 922 | x_min = min(x_list) 923 | x_max = max(x_list) 924 | y_min = min(y_list) 925 | y_max = max(y_list) 926 | 927 | x1 = int(max(0, x_min - 0.4 * (x_max - x_min))) 928 | x2 = int(x_max + 0.4 * (x_max - x_min)) 929 | y1 = int(max(0, y_min - 0.4 * (y_max - y_min))) 930 | y2 = int(y_max + 0.4 * (y_max - y_min)) 931 | 932 | selected_person_bbox[y1:y2, x1:x2] = 1 933 | 934 | return skeleton_mask, selected_person_idx, selected_person_bbox 935 | 936 | def mmpose_to_openpose(self, mmpose_coordinates, bbox_threshold=0.2): 937 | num_persons = len(mmpose_coordinates) 938 | coordinates = {} 939 | coordinates['subset'] = [] 940 | coordinates['candidate'] = [] 941 | 942 | coordinate_count = 0 943 | for person_idx in range(num_persons): 944 | if mmpose_coordinates[person_idx]["bbox_score"] < bbox_threshold: 945 | continue 946 | subset = {} 947 | for subset_idx in range(18): 948 | subset[subset_idx] = -1 949 | for subset_idx, skeleton_idx in enumerate( 950 | [0, 17, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 951 | 3]): 952 | if skeleton_idx == 17: 953 | if mmpose_coordinates[person_idx]["keypoint_scores"][ 954 | 6] < 0.1: 955 | continue 956 | if mmpose_coordinates[person_idx]["keypoint_scores"][ 957 | 5] < 0.1: 958 | continue 959 | subset[subset_idx] = coordinate_count 960 | coordinates_6 = mmpose_coordinates[person_idx][ 961 | "keypoints"][6] 962 | coordinates_5 = mmpose_coordinates[person_idx][ 963 | "keypoints"][5] 964 | coordinates['candidate'].append([ 965 | (coordinates_6[0] + coordinates_5[0]) / 2.0, 966 | (coordinates_6[1] + coordinates_5[1]) / 2.0 967 | ]) 968 | coordinate_count += 1 969 | else: 970 | if mmpose_coordinates[person_idx]["keypoint_scores"][ 971 | skeleton_idx] < 0.5: 972 | continue 973 | subset[subset_idx] = coordinate_count 974 | coordinates['candidate'].append( 975 | mmpose_coordinates[person_idx]["keypoints"] 976 | [skeleton_idx]) 977 | coordinate_count += 1 978 | 979 | coordinates['subset'].append(subset) 980 | 981 | return coordinates 982 | 983 | def generate_bbox_from_mask(self, mask): 984 | # Find the coordinates of non-zero elements in the mask 985 | y_coords, x_coords = np.where(mask) 986 | 987 | if len(y_coords) == 0 or len(x_coords) == 0: 988 | # No non-zero elements found (empty mask) 989 | return None 990 | 991 | # Compute the bounding box corners 992 | y_min, y_max = np.min(y_coords), np.max(y_coords) 993 | x_min, x_max = np.min(x_coords), np.max(x_coords) 994 | 995 | # Return the bounding box coordinates as (y_min, x_min, y_max, x_max) 996 | return y_min, x_min, y_max, x_max 997 | 998 | def generate_skeletion_mask(self, coordinates, skeleton_map): 999 | skeleton_mask = np.zeros( 1000 | (skeleton_map.shape[0], skeleton_map.shape[1]), dtype=np.uint8) 1001 | 1002 | candidate = coordinates['candidate'] 1003 | subset = coordinates['subset'] 1004 | 1005 | selected_person_idx = random.choice(range(len(subset))) 1006 | 1007 | # left arms 1008 | coordinates_x_list = [] 1009 | coordinates_y_list = [] 1010 | for body_idx in [2, 3, 4]: 1011 | index = int(subset[selected_person_idx][body_idx]) 1012 | if index == -1: 1013 | continue 1014 | coordinates_x, coordinates_y = candidate[index][0:2] 1015 | coordinates_x_list.append(coordinates_x) 1016 | coordinates_y_list.append(coordinates_y) 1017 | 1018 | if len(coordinates_x_list) != 0: 1019 | left_x = int(min(coordinates_x_list)) 1020 | up_y = int(min(coordinates_y_list)) 1021 | 1022 | right_x = int(max(coordinates_x_list)) 1023 | down_y = int(max(coordinates_y_list)) 1024 | 1025 | pad_width = int(max(down_y - up_y, right_x - left_x) * 0.15) 1026 | 1027 | skeleton_mask[max(0, up_y - pad_width):down_y + pad_width, 1028 | max(0, left_x - pad_width):right_x + pad_width] = 1 1029 | 1030 | # right arms 1031 | coordinates_x_list = [] 1032 | coordinates_y_list = [] 1033 | for body_idx in [5, 6, 7]: 1034 | index = int(subset[selected_person_idx][body_idx]) 1035 | if index == -1: 1036 | continue 1037 | coordinates_x, coordinates_y = candidate[index][0:2] 1038 | coordinates_x_list.append(coordinates_x) 1039 | coordinates_y_list.append(coordinates_y) 1040 | 1041 | if len(coordinates_x_list) != 0: 1042 | left_x = int(min(coordinates_x_list)) 1043 | up_y = int(min(coordinates_y_list)) 1044 | 1045 | right_x = int(max(coordinates_x_list)) 1046 | down_y = int(max(coordinates_y_list)) 1047 | 1048 | pad_width = int(max(down_y - up_y, right_x - left_x) * 0.15) 1049 | 1050 | skeleton_mask[max(0, up_y - pad_width):down_y + pad_width, 1051 | max(0, left_x - pad_width):right_x + pad_width] = 1 1052 | 1053 | # selected person bbox 1054 | selected_person_bbox = np.zeros( 1055 | (skeleton_map.shape[0], skeleton_map.shape[1]), dtype=np.uint8) 1056 | x_list = [] 1057 | y_list = [] 1058 | 1059 | for i in range(18): 1060 | index = int(subset[selected_person_idx][i]) 1061 | if index == -1: 1062 | continue 1063 | x, y = candidate[index][0:2] 1064 | x_list.append(x) 1065 | y_list.append(y) 1066 | 1067 | x_min = min(x_list) 1068 | x_max = max(x_list) 1069 | y_min = min(y_list) 1070 | y_max = max(y_list) 1071 | 1072 | x1 = int(max(0, x_min - 0.4 * (x_max - x_min))) 1073 | x2 = int(x_max + 0.4 * (x_max - x_min)) 1074 | y1 = int(max(0, y_min - 0.4 * (y_max - y_min))) 1075 | y2 = int(y_max + 0.4 * (y_max - y_min)) 1076 | 1077 | selected_person_bbox[y1:y2, x1:x2] = 1 1078 | 1079 | return skeleton_mask, selected_person_idx, selected_person_bbox 1080 | 1081 | def expand_identity_feature(self, id_feature_list, selected_idx, 1082 | inpaint_mask): 1083 | 1084 | id_feature_temp = id_feature_list[selected_idx].copy() 1085 | id_feature_temp[inpaint_mask == 1] = 1 1086 | 1087 | if np.sum(id_feature_temp) == (id_feature_temp.shape[0] * 1088 | id_feature_temp.shape[1]): 1089 | id_feature_list[selected_idx] = id_feature_temp 1090 | return id_feature_list 1091 | 1092 | mask_tensor = torch.from_numpy(id_feature_temp).unsqueeze(0) 1093 | 1094 | obj_ids = torch.unique(mask_tensor) 1095 | obj_ids = obj_ids[1:] 1096 | masks = mask_tensor == obj_ids[:, None, None] 1097 | 1098 | boxes = masks_to_boxes(masks) 1099 | 1100 | bbox_y1, bbox_y2 = max(0, int(boxes[0][1])), int(boxes[0][3]) 1101 | bbox_x1, bbox_x2 = max(0, int(boxes[0][0])), int(boxes[0][2]) 1102 | 1103 | id_feature_temp[bbox_y1:bbox_y2, bbox_x1:bbox_x2] = 1 1104 | 1105 | id_feature_list[selected_idx] = id_feature_temp 1106 | 1107 | return id_feature_list 1108 | 1109 | def adjust_coordinates(self, coordinates, original_size): 1110 | ratio = 512. / original_size 1111 | for candidate in coordinates['candidate']: 1112 | candidate[0] = candidate[0] * ratio 1113 | candidate[1] = candidate[1] * ratio 1114 | 1115 | return coordinates 1116 | 1117 | def flip_skeleton_coordinates(self, coordinates): 1118 | 1119 | for subset_index in range(len(coordinates['subset'])): 1120 | new_subset = {} 1121 | for index in range(18): 1122 | if index == 2: 1123 | new_subset[index] = coordinates['subset'][subset_index][5] 1124 | elif index == 3: 1125 | new_subset[index] = coordinates['subset'][subset_index][6] 1126 | elif index == 4: 1127 | new_subset[index] = coordinates['subset'][subset_index][7] 1128 | elif index == 8: 1129 | new_subset[index] = coordinates['subset'][subset_index][11] 1130 | elif index == 9: 1131 | new_subset[index] = coordinates['subset'][subset_index][12] 1132 | elif index == 10: 1133 | new_subset[index] = coordinates['subset'][subset_index][13] 1134 | elif index == 5: 1135 | new_subset[index] = coordinates['subset'][subset_index][2] 1136 | elif index == 6: 1137 | new_subset[index] = coordinates['subset'][subset_index][3] 1138 | elif index == 7: 1139 | new_subset[index] = coordinates['subset'][subset_index][4] 1140 | elif index == 11: 1141 | new_subset[index] = coordinates['subset'][subset_index][8] 1142 | elif index == 12: 1143 | new_subset[index] = coordinates['subset'][subset_index][9] 1144 | elif index == 13: 1145 | new_subset[index] = coordinates['subset'][subset_index][10] 1146 | else: 1147 | new_subset[index] = coordinates['subset'][subset_index][ 1148 | index] 1149 | coordinates['subset'][subset_index] = new_subset 1150 | 1151 | return coordinates 1152 | 1153 | 1154 | 1155 | def draw_bodypose(self, canvas, candidate, subset): 1156 | stickwidth = 4 1157 | limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ 1158 | [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ 1159 | [1, 16], [16, 18], [3, 17], [6, 18]] 1160 | 1161 | colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ 1162 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ 1163 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] 1164 | for i in range(18): 1165 | for n in range(len(subset)): 1166 | index = int(subset[n][i]) 1167 | if index == -1: 1168 | continue 1169 | x, y = candidate[index][0:2] 1170 | cv2.circle( 1171 | canvas, (int(x), int(y)), 4, colors[i], thickness=-1) 1172 | for i in range(17): 1173 | for n in range(len(subset)): 1174 | index = [subset[n][point - 1] for point in limbSeq[i]] 1175 | if -1 in index: 1176 | continue 1177 | cur_canvas = canvas.copy() 1178 | Y = [candidate[int(point)][0] for point in index] 1179 | X = [candidate[int(point)][1] for point in index] 1180 | mX = np.mean(X) 1181 | mY = np.mean(Y) 1182 | length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 1183 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) 1184 | polygon = cv2.ellipse2Poly((int(mY), int(mX)), 1185 | (int(length / 2), stickwidth), 1186 | int(angle), 0, 360, 1) 1187 | cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) 1188 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) 1189 | # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]]) 1190 | # plt.imshow(canvas[:, :, [2, 1, 0]]) 1191 | return canvas 1192 | 1193 | def crop_img_mask(self, img, human_mask, bbox_mask, bbox_coor, face_mask, 1194 | target_person_face_mask, skeleton_map, skeleton_mask, 1195 | coordinates, instance_parsing_list): 1196 | h, w, _ = img.shape 1197 | 1198 | x1, y1, x2, y2 = bbox_coor 1199 | 1200 | center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2 1201 | 1202 | bbox_h, bbox_w = y2 - y1, x2 - x1 1203 | 1204 | enlarge_bbox_ratio = 1.1 1205 | 1206 | enlarged_bbox = int(max([bbox_h, bbox_w]) * enlarge_bbox_ratio) 1207 | 1208 | cropped_size = min([h, w, enlarged_bbox]) 1209 | cropped_size = cropped_size // 2 * 2 1210 | 1211 | crop_y1 = center_y - cropped_size // 2 1212 | crop_y2 = center_y + cropped_size // 2 1213 | crop_x1 = center_x - cropped_size // 2 1214 | crop_x2 = center_x + cropped_size // 2 1215 | if crop_y1 < 0: 1216 | crop_y1 = 0 1217 | crop_y2 = cropped_size 1218 | 1219 | if crop_y2 > h: 1220 | crop_y1 = h - cropped_size 1221 | crop_y2 = h 1222 | 1223 | if crop_x1 < 0: 1224 | crop_x1 = 0 1225 | crop_x2 = cropped_size 1226 | 1227 | if crop_x2 > w: 1228 | crop_x1 = w - cropped_size 1229 | crop_x2 = w 1230 | 1231 | img = img[crop_y1:crop_y2, crop_x1:crop_x2] 1232 | human_mask = human_mask[crop_y1:crop_y2, crop_x1:crop_x2] 1233 | bbox_mask = bbox_mask[crop_y1:crop_y2, crop_x1:crop_x2] 1234 | face_mask = face_mask[crop_y1:crop_y2, crop_x1:crop_x2] 1235 | target_person_face_mask = target_person_face_mask[crop_y1:crop_y2, 1236 | crop_x1:crop_x2] 1237 | skeleton_map = skeleton_map[crop_y1:crop_y2, crop_x1:crop_x2] 1238 | skeleton_mask = skeleton_mask[crop_y1:crop_y2, crop_x1:crop_x2] 1239 | 1240 | cropped_instance_parsing_list = [] 1241 | for instance_parsing in instance_parsing_list: 1242 | cropped_instance_parsing_list.append( 1243 | instance_parsing[crop_y1:crop_y2, crop_x1:crop_x2]) 1244 | 1245 | for candidate in coordinates['candidate']: 1246 | candidate[0] = candidate[0] - crop_x1 1247 | candidate[1] = candidate[1] - crop_y1 1248 | 1249 | # import pdb 1250 | # pdb.set_trace() 1251 | current_width = img.shape[0] 1252 | for subset in coordinates['subset']: 1253 | for index in range(17): 1254 | if subset[index] == -1: 1255 | continue 1256 | return img, human_mask, bbox_mask, face_mask, target_person_face_mask, skeleton_map, skeleton_mask, coordinates, cropped_instance_parsing_list 1257 | 1258 | def occlusion_deleting(self, bbox_mask): 1259 | indices = np.where(bbox_mask != 0) 1260 | x_min, y_min = np.min(indices, axis=1) 1261 | x_max, y_max = np.max(indices, axis=1) 1262 | 1263 | inpaint_mask = np.zeros((bbox_mask.shape[0], bbox_mask.shape[1]), 1264 | dtype=np.uint8) 1265 | random_length = int(random.uniform(0.2, 0.4) * (y_max - y_min)) 1266 | 1267 | location = random.choice([0, 1]) 1268 | if location == 0: 1269 | inpaint_mask[:, 1270 | max(0, y_max - random_length // 2):y_max + 1271 | random_length // 2] = 1 1272 | else: 1273 | inpaint_mask[:, 1274 | max(0, y_min - random_length // 2):y_min + 1275 | random_length // 2] = 1 1276 | 1277 | return inpaint_mask 1278 | 1279 | def get_id_color_map(self, id_feature_list): 1280 | random.shuffle(self.random_color_identity_group) 1281 | 1282 | color_list = [] 1283 | identity_map = np.zeros( 1284 | (id_feature_list[0].shape[0], id_feature_list[0].shape[1], 3)) 1285 | count_map = np.zeros( 1286 | (id_feature_list[0].shape[0], id_feature_list[0].shape[1])) 1287 | for idx, mask in enumerate(id_feature_list): 1288 | color_group_idx = idx % 5 1289 | random_color = random.choices( 1290 | self.random_color_identity_group[color_group_idx], k=1)[0] 1291 | temp_mask = np.zeros( 1292 | (id_feature_list[0].shape[0], id_feature_list[0].shape[1], 3), 1293 | dtype=np.uint8) 1294 | 1295 | temp_mask[mask == 1] = random_color 1296 | # import pdb 1297 | # pdb.set_trace() 1298 | # identity_color_indicator.append(random_color) 1299 | identity_map += temp_mask 1300 | # import pdb 1301 | # pdb.set_trace() 1302 | count_map += mask 1303 | color_list.append(random_color) 1304 | 1305 | count_map[count_map == 0] = 1 1306 | count_map = count_map[:, :, np.newaxis] 1307 | # import pdb 1308 | # pdb.set_trace() 1309 | identity_map = identity_map / count_map 1310 | identity_map = identity_map.astype(np.uint8) 1311 | 1312 | return identity_map, color_list 1313 | 1314 | def reposing_add(self, bbox_mask, inpaint_mask): 1315 | indices = np.where(bbox_mask != 0) 1316 | y_min, x_min = np.min(indices, axis=1) 1317 | y_max, x_max = np.max(indices, axis=1) 1318 | 1319 | augmentation = random.uniform(0.4, 1.0) 1320 | random_length = int(augmentation * (y_max - y_min)) 1321 | inpaint_mask[y_min + random_length:y_max, x_min:x_max] = 1 1322 | 1323 | return inpaint_mask 1324 | 1325 | def harmonization_add(self, img, bbox_mask, human_mask, inpaint_mask): 1326 | 1327 | img_augmented = Image.fromarray(img) 1328 | transform = transforms.ColorJitter( 1329 | brightness=(0.7, 1.3), contrast=(0.7, 1.3), saturation=(0.7, 1.5)) 1330 | img_augmented = np.array(transform(img_augmented)) 1331 | 1332 | revised_img = img.copy() 1333 | revised_img[human_mask == 1, :] = img_augmented[human_mask == 1, :] 1334 | inpaint_mask[bbox_mask == 1] = 1 1335 | 1336 | return revised_img, inpaint_mask 1337 | 1338 | def occlusion_add(self, img, human_mask, bbox_mask): 1339 | indices = np.where(bbox_mask != 0) 1340 | y_min, x_min = np.min(indices, axis=1) 1341 | y_max, x_max = np.max(indices, axis=1) 1342 | 1343 | bbox_mask_revised = bbox_mask.copy() 1344 | column_mask = random.uniform(0, 1) 1345 | if column_mask < 0.5: 1346 | bbox_mask_revised[:, x_min:x_max] = 1 1347 | 1348 | inpaint_mask = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8) 1349 | inpaint_mask[(bbox_mask_revised - human_mask) > 0] = 1 1350 | 1351 | return inpaint_mask, bbox_mask_revised 1352 | 1353 | def read_mask_for_delete(self, selected_parsing_idx, 1354 | instance_parsing_list): 1355 | mask = instance_parsing_list[selected_parsing_idx] 1356 | 1357 | mask_binary = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8) 1358 | mask_binary[mask > 0] = 1 1359 | 1360 | mask_tensor = torch.from_numpy(mask_binary).unsqueeze(0) 1361 | 1362 | obj_ids = torch.unique(mask_tensor) 1363 | obj_ids = obj_ids[1:] 1364 | masks = mask_tensor == obj_ids[:, None, None] 1365 | 1366 | boxes = masks_to_boxes(masks) 1367 | 1368 | bbox_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8) 1369 | 1370 | h, w = mask.shape 1371 | 1372 | bbox_y1, bbox_y2 = max(0, int(boxes[0][1])), min(h, int(boxes[0][3])) 1373 | bbox_x1, bbox_x2 = max(0, int(boxes[0][0])), min(w, int(boxes[0][2])) 1374 | bbox_mask[bbox_y1:bbox_y2, bbox_x1:bbox_x2] = 1 1375 | 1376 | face_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8) 1377 | for idx, parsing in enumerate(instance_parsing_list): 1378 | 1379 | if idx == selected_parsing_idx: 1380 | target_person_face_mask = np.zeros( 1381 | (mask.shape[0], mask.shape[1]), dtype=np.uint8) 1382 | 1383 | for i in range(1, 5): 1384 | face_mask[parsing == i] = 1 1385 | 1386 | if idx == selected_parsing_idx: 1387 | target_person_face_mask[parsing == i] = 1 1388 | 1389 | return mask_binary, bbox_mask, ( 1390 | bbox_x1, bbox_y1, bbox_x2, 1391 | bbox_y2), face_mask, target_person_face_mask 1392 | 1393 | def read_mask(self, selected_parsing_idx, instance_parsing_list): 1394 | mask = instance_parsing_list[selected_parsing_idx] 1395 | 1396 | # mask_name = mask_path.split('/')[-1][:-4] 1397 | # img_id = int(mask_name.split('_')[0]) 1398 | num_persons = len(instance_parsing_list) 1399 | person_id = selected_parsing_idx + 1 1400 | # num_persons = int(mask_name.split('_')[-2]) 1401 | # person_id = int(mask_name.split('_')[-1]) 1402 | 1403 | mask_binary = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8) 1404 | mask_binary[mask > 0] = 1 1405 | 1406 | mask_tensor = torch.from_numpy(mask_binary).unsqueeze(0) 1407 | 1408 | obj_ids = torch.unique(mask_tensor) 1409 | obj_ids = obj_ids[1:] 1410 | masks = mask_tensor == obj_ids[:, None, None] 1411 | 1412 | boxes = masks_to_boxes(masks) 1413 | 1414 | bbox_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8) 1415 | 1416 | h, w = mask.shape 1417 | 1418 | # make the bounding box slightly larger 1419 | enlarge_ratio = 0.05 1420 | enlarge_margin_h = int((boxes[0][3] - boxes[0][1]) * enlarge_ratio) 1421 | enlarge_margin_w = int((boxes[0][2] - boxes[0][0]) * enlarge_ratio) 1422 | 1423 | if person_id > 1: 1424 | # left_person = f'{mask_path[:-4][:-len(mask_name)]}/{img_id}_{num_persons:02d}_{(person_id-1):02d}.png' 1425 | mask_left = instance_parsing_list[selected_parsing_idx - 1] 1426 | 1427 | mask_binary_left = np.zeros( 1428 | (mask_left.shape[0], mask_left.shape[1]), dtype=np.uint8) 1429 | mask_binary_left[mask_left > 0] = 1 1430 | 1431 | mask_tensor_left = torch.from_numpy(mask_binary_left).unsqueeze(0) 1432 | 1433 | obj_ids_left = torch.unique(mask_tensor_left) 1434 | obj_ids_left = obj_ids_left[1:] 1435 | masks_left = mask_tensor_left == obj_ids_left[:, None, None] 1436 | 1437 | boxes_left = masks_to_boxes(masks_left) 1438 | 1439 | enlarge_margin_left = min( 1440 | enlarge_margin_w, 1441 | int((boxes_left[0][2] - boxes_left[0][0]) * 0.05)) 1442 | else: 1443 | enlarge_margin_left = enlarge_margin_w 1444 | 1445 | if person_id < num_persons: 1446 | # right_person = f'{mask_path[:-4][:-len(mask_name)]}/{img_id}_{num_persons:02d}_{(person_id+1):02d}.png' 1447 | mask_right = instance_parsing_list[selected_parsing_idx + 1] 1448 | 1449 | mask_binary_right = np.zeros( 1450 | (mask_right.shape[0], mask_right.shape[1]), dtype=np.uint8) 1451 | mask_binary_right[mask_right > 0] = 1 1452 | 1453 | mask_tensor_right = torch.from_numpy(mask_binary_right).unsqueeze( 1454 | 0) 1455 | 1456 | obj_ids_right = torch.unique(mask_tensor_right) 1457 | obj_ids_right = obj_ids_right[1:] 1458 | masks_right = mask_tensor_right == obj_ids_right[:, None, None] 1459 | 1460 | boxes_right = masks_to_boxes(masks_right) 1461 | 1462 | enlarge_margin_right = min( 1463 | enlarge_margin_w, 1464 | int((boxes_right[0][2] - boxes_right[0][0]) * 0.05)) 1465 | else: 1466 | enlarge_margin_right = enlarge_margin_w 1467 | 1468 | bbox_y1, bbox_y2 = max(0, 1469 | int(boxes[0][1]) - enlarge_margin_h), min( 1470 | h, 1471 | int(boxes[0][3]) + enlarge_margin_h) 1472 | bbox_x1, bbox_x2 = max(0, 1473 | int(boxes[0][0]) - enlarge_margin_left), min( 1474 | w, 1475 | int(boxes[0][2]) + enlarge_margin_right) 1476 | bbox_mask[bbox_y1:bbox_y2, bbox_x1:bbox_x2] = 1 1477 | 1478 | face_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8) 1479 | 1480 | for idx, parsing in enumerate(instance_parsing_list): 1481 | 1482 | if idx == selected_parsing_idx: 1483 | target_person_face_mask = np.zeros( 1484 | (mask.shape[0], mask.shape[1]), dtype=np.uint8) 1485 | 1486 | for i in range(1, 5): 1487 | face_mask[parsing == i] = 1 1488 | 1489 | if idx == selected_parsing_idx: 1490 | target_person_face_mask[parsing == i] = 1 1491 | 1492 | return mask_binary, bbox_mask, ( 1493 | bbox_x1, bbox_y1, bbox_x2, 1494 | bbox_y2), face_mask, target_person_face_mask 1495 | 1496 | def remove_background(self, img, instance_parsing_list): 1497 | 1498 | mask_binary = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8) 1499 | for instance_parsing in instance_parsing_list: 1500 | mask_binary[instance_parsing > 0] = 1 1501 | 1502 | img[mask_binary == 0] = 255 1503 | 1504 | return img 1505 | 1506 | def load_instance_parsing_maps(self, parsing_path_list): 1507 | 1508 | parsing_list = [] 1509 | for parsing_path in parsing_path_list: 1510 | mask = np.array(Image.open(parsing_path).convert('RGB'))[:, :, 0] 1511 | parsing_list.append(mask) 1512 | 1513 | return parsing_list 1514 | 1515 | 1516 | def __getitem__(self, index): 1517 | 1518 | while True: 1519 | try: 1520 | img_path = self.data_path_list[index] 1521 | img_name = img_path.split('/')[-1][:-4] 1522 | parsing_path_list = glob.glob( 1523 | f'{self.parsing_dir}/{img_name}_*.png') 1524 | 1525 | instance_parsing_list = self.load_instance_parsing_maps( 1526 | parsing_path_list) 1527 | 1528 | coordinates_path = f'{self.pose_estimation_path}/{img_name}.json' 1529 | # a function to load json file from locally 1530 | mmpose_coordinates = json.load(open(coordinates_path, )) 1531 | 1532 | coordinates = self.mmpose_to_openpose(mmpose_coordinates) 1533 | 1534 | img = self.read_img(img_path) 1535 | 1536 | # remove background 1537 | remove_bg_aug = random.uniform(0, 1) 1538 | if remove_bg_aug < 0.2: 1539 | img = self.remove_background(img, instance_parsing_list) 1540 | 1541 | canvas = np.zeros_like(img) 1542 | skeleton_map = self.draw_bodypose(canvas, 1543 | coordinates['candidate'], 1544 | coordinates['subset']) 1545 | 1546 | skeleton_mask, selected_person_idx, selected_person_bbox = self.generate_skeletion_mask( 1547 | coordinates, skeleton_map) 1548 | 1549 | new_coordinates = self.randomly_change_pose( 1550 | coordinates, selected_person_idx) 1551 | 1552 | skeleton_mask = self.compute_diff_mask(coordinates, 1553 | new_coordinates, 1554 | skeleton_mask) 1555 | 1556 | skeleton_mask, selected_parsing_idx = self.load_arm_hand_masks( 1557 | skeleton_mask, selected_person_bbox, instance_parsing_list) 1558 | 1559 | # parsing_path = random.choice(candidate_parsing_path_list) 1560 | # augmentation types: 1561 | # 1) occlusion for adding a person; 1562 | # 2) harmonization when adding a person; 1563 | # 3) reposing when adding a person; 1564 | # 4) occlusion for removing a persons 1565 | augmentation_type = random.uniform(0, 1) 1566 | if augmentation_type < 0.8: 1567 | # add person cases 1568 | human_mask, bbox_mask, bbox_coor, face_mask, target_person_face_mask = self.read_mask( 1569 | selected_parsing_idx, instance_parsing_list) 1570 | 1571 | # define the crop region 1572 | img, human_mask, bbox_mask, face_mask, target_person_face_mask, skeleton_map, skeleton_mask, coordinates, instance_parsing_list = self.crop_img_mask( 1573 | img, human_mask, bbox_mask, bbox_coor, face_mask, 1574 | target_person_face_mask, skeleton_map, skeleton_mask, 1575 | coordinates, instance_parsing_list) 1576 | assert np.sum(bbox_mask) != 0 1577 | 1578 | revised_img = img.copy() 1579 | 1580 | # for each add person type, we need to deal with the occlusion region 1581 | inpaint_mask, bbox_mask_revised = self.occlusion_add( 1582 | img, human_mask, bbox_mask) 1583 | 1584 | inpaint_mask[skeleton_mask == 1] = 1 1585 | 1586 | reposing_aug = random.uniform(0, 1) 1587 | if reposing_aug > 0.4: 1588 | inpaint_mask = self.reposing_add( 1589 | bbox_mask, inpaint_mask) 1590 | 1591 | inpaint_mask[face_mask == 1] = 0 1592 | 1593 | # dilated inpaint mask 1594 | dilate_inpaint_aug = random.uniform(0, 1) 1595 | if dilate_inpaint_aug < 0.4: 1596 | structuring_element = np.ones((5, 5), dtype=bool) 1597 | inpaint_mask = binary_dilation( 1598 | inpaint_mask, 1599 | structure=structuring_element).astype(np.uint8) 1600 | 1601 | inpaint_mask_after_reposing = inpaint_mask.copy() 1602 | 1603 | harmonization_aug = random.uniform(0, 1) 1604 | # add_harmonization = 0.1 1605 | if harmonization_aug < 0.5 and self.add_harmonization: 1606 | revised_img, inpaint_mask = self.harmonization_add( 1607 | img, bbox_mask_revised, human_mask, inpaint_mask) 1608 | 1609 | # exclude the surrounding person from the inpainting regions 1610 | inpaint_mask[(face_mask - 1611 | target_person_face_mask) == 1] = 0 1612 | # else: 1613 | # inpaint_mask[bbox_mask_revised == 1] = 1 1614 | # inpaint_mask[(face_mask - target_person_face_mask) == 1] = 0 1615 | else: 1616 | human_mask, bbox_mask, bbox_coor, face_mask, target_person_face_mask = self.read_mask_for_delete( 1617 | selected_parsing_idx, instance_parsing_list) 1618 | 1619 | # define the crop region 1620 | img, human_mask, bbox_mask, face_mask, target_person_face_mask, skeleton_map, skeleton_mask, coordinates, instance_parsing_list = self.crop_img_mask( 1621 | img, human_mask, bbox_mask, bbox_coor, face_mask, 1622 | target_person_face_mask, skeleton_map, skeleton_mask, 1623 | coordinates, instance_parsing_list) 1624 | assert np.sum(bbox_mask) != 0 1625 | 1626 | inpaint_mask = self.occlusion_deleting(human_mask) 1627 | revised_img = img.copy() 1628 | 1629 | inpaint_mask[skeleton_mask == 1] = 1 1630 | inpaint_mask[face_mask == 1] = 0 1631 | 1632 | inpaint_mask_after_reposing = inpaint_mask.copy() 1633 | 1634 | # load the exemplar image 1635 | candidate_parsing_list, idx_in_candidate_list = self.get_candidate_parsing_list_for_exemplar( 1636 | inpaint_mask, selected_parsing_idx, instance_parsing_list) 1637 | 1638 | if len(candidate_parsing_list) == 0: 1639 | index = random.randint(0, len(self.data_path_list) - 1) 1640 | continue 1641 | 1642 | # get_indicator 1643 | id_feature_list = self.get_id_feature(candidate_parsing_list) 1644 | 1645 | # expand the id feature list using the inpaint mask 1646 | id_feature_list = self.expand_identity_feature( 1647 | id_feature_list, idx_in_candidate_list, inpaint_mask) 1648 | 1649 | id_color_map, color_list = self.get_id_color_map( 1650 | id_feature_list) 1651 | 1652 | img_exemplar_list, parsing_exemplar_list = self.read_img_exemplar_mask( 1653 | img, candidate_parsing_list) 1654 | for idx, (img_exemplar, parsing) in enumerate( 1655 | zip(img_exemplar_list, parsing_exemplar_list)): 1656 | incomplete_exemplar_aug = random.uniform(0, 1) 1657 | if incomplete_exemplar_aug < 0.4: 1658 | length = img_exemplar.shape[0] 1659 | random_portion = random.uniform(0.2, 0.6) 1660 | # the masked part should be directly cropped out, rather than applying the mask 1661 | # img_exemplar[-int(random_portion * length):, :] = 255 1662 | img_exemplar = img_exemplar[:-int(random_portion * 1663 | length), :] 1664 | parsing = parsing[:-int(random_portion * length), :] 1665 | img_exemplar_list[idx] = img_exemplar 1666 | parsing_exemplar_list[idx] = parsing 1667 | 1668 | img = torch.from_numpy(img).permute(2, 0, 1) 1669 | id_color_map = torch.from_numpy(id_color_map).permute(2, 0, 1) 1670 | skeleton_map = torch.from_numpy(skeleton_map).permute(2, 0, 1) 1671 | revised_img = torch.from_numpy(revised_img).permute(2, 0, 1) 1672 | inpaint_mask = torch.from_numpy(inpaint_mask).unsqueeze(0) 1673 | human_mask = torch.from_numpy(human_mask).unsqueeze(0) 1674 | skeleton_mask = torch.from_numpy(skeleton_mask).unsqueeze(0) 1675 | 1676 | exemplar_img_list = [] 1677 | exemplar_skeleton_map_list = [] 1678 | exemplar_skeleton_coordinates_list = [] 1679 | exemplar_color_block_list = [] 1680 | for idx, (img_exemplar, parsing) in enumerate( 1681 | zip(img_exemplar_list, parsing_exemplar_list)): 1682 | 1683 | img_exemplar = torch.from_numpy(img_exemplar).permute( 1684 | 2, 0, 1) 1685 | height, width = img_exemplar.size(1), img_exemplar.size(2) 1686 | 1687 | parsing = torch.from_numpy(parsing).unsqueeze(0) 1688 | 1689 | if height == width: 1690 | pass 1691 | elif height < width: 1692 | diff = width - height 1693 | top_pad = diff // 2 1694 | down_pad = diff - top_pad 1695 | left_pad = 0 1696 | right_pad = 0 1697 | padding_size = [left_pad, top_pad, right_pad, down_pad] 1698 | img_exemplar = F.pad( 1699 | img_exemplar, padding=padding_size, fill=255) 1700 | parsing = F.pad(parsing, padding=padding_size, fill=0) 1701 | else: 1702 | diff = height - width 1703 | left_pad = diff // 2 1704 | right_pad = diff - left_pad 1705 | top_pad = 0 1706 | down_pad = 0 1707 | padding_size = [left_pad, top_pad, right_pad, down_pad] 1708 | img_exemplar = F.pad( 1709 | img_exemplar, padding=padding_size, fill=255) 1710 | parsing = F.pad(parsing, padding=padding_size, fill=0) 1711 | 1712 | exemplar_img, parsing = self.transform_exemplar_and_parsing( 1713 | img_exemplar, parsing) 1714 | exemplar_img = exemplar_img.permute(1, 2, 0) 1715 | parsing = parsing.squeeze(0) 1716 | 1717 | exemplar_img, new_coordinates = self.reposing_exemplar_img( 1718 | exemplar_img.numpy(), parsing.numpy()) 1719 | 1720 | exemplar_skeleton_map = self.draw_bodypose( 1721 | np.zeros_like(exemplar_img), 1722 | new_coordinates['candidate'], 1723 | new_coordinates['subset']) 1724 | 1725 | exemplar_img = self.resize_transform_exemplar( 1726 | torch.from_numpy(exemplar_img).permute( 1727 | 2, 0, 1)).permute(1, 2, 0) / 255. 1728 | exemplar_skeleton_map = torch.from_numpy( 1729 | exemplar_skeleton_map) / 255.0 1730 | 1731 | exemplar_skeleton_coordinates_list.append(new_coordinates) 1732 | # flip_random = random.uniform(0, 1) 1733 | # flip_random = 0.1 1734 | # if flip_random < 0.5: 1735 | # flip image 1736 | # exemplar_img = torch.fliplr(exemplar_img) 1737 | # flip skeleton 1738 | # new_coordinates = self.flip_skeleton_coordinates(new_coordinates) 1739 | # canvas = np.zeros_like(exemplar_skeleton_map) 1740 | # exemplar_skeleton_map = self.draw_bodypose(canvas, new_coordinates['candidate'], new_coordinates['subset']) 1741 | # exemplar_skeleton_map = torch.from_numpy(exemplar_skeleton_map) / 255.0 1742 | # exemplar_skeleton_map = torch.fliplr(exemplar_skeleton_map) 1743 | 1744 | exemplar_skeleton_map_list.append(exemplar_skeleton_map) 1745 | exemplar_img_list.append(exemplar_img) 1746 | 1747 | # generate color block 1748 | # import pdb 1749 | # pdb.set_trace() 1750 | exemplar_color_block = torch.zeros_like( 1751 | exemplar_skeleton_map) 1752 | exemplar_color_block[:, :, 0] = color_list[idx][0] 1753 | exemplar_color_block[:, :, 1] = color_list[idx][1] 1754 | exemplar_color_block[:, :, 2] = color_list[idx][2] 1755 | exemplar_color_block = exemplar_color_block / 255. 1756 | # exemplar_color_block = torch.tensor([[[color_list[idx][0], color_list[idx][1], color_list[idx][2]]] * 224] * 224) / 255. 1757 | exemplar_color_block_list.append(exemplar_color_block) 1758 | 1759 | if len(exemplar_img_list) > 5: 1760 | index = random.randint(0, len(self.data_path_list) - 1) 1761 | continue 1762 | 1763 | if len(exemplar_img_list) < 5: 1764 | add_length = 5 - len(exemplar_img_list) 1765 | for _ in range(add_length): 1766 | exemplar_img_list.append( 1767 | torch.zeros_like(exemplar_img_list[0])) 1768 | exemplar_skeleton_map_list.append( 1769 | torch.zeros_like(exemplar_skeleton_map_list[0])) 1770 | exemplar_skeleton_coordinates_list.append(None) 1771 | exemplar_color_block_list.append( 1772 | torch.zeros_like(exemplar_color_block_list[0])) 1773 | id_feature_list.append( 1774 | np.zeros_like(id_feature_list[0])) 1775 | 1776 | id_feature_channel_list = [] 1777 | for id_feature in id_feature_list: 1778 | id_feature_channel_list.append( 1779 | torch.from_numpy(id_feature.astype( 1780 | np.uint8)).unsqueeze(0)) 1781 | 1782 | # id_feature_channel = torch.from_numpy(np.stack(id_feature_list, axis=0, dtype=np.uint8)) 1783 | 1784 | img = self.resize_transform_img(img).permute(1, 2, 1785 | 0) / 127.5 - 1 1786 | id_color_map = self.resize_transform_mask( 1787 | id_color_map).permute(1, 2, 0) / 255.0 1788 | revised_img = self.resize_transform_img(revised_img).permute( 1789 | 1, 2, 0) / 127.5 - 1 1790 | coordinates = self.adjust_coordinates(coordinates, 1791 | inpaint_mask.size(1)) 1792 | canvas = np.zeros_like(img) 1793 | skeleton_map = self.draw_bodypose(canvas, 1794 | coordinates['candidate'], 1795 | coordinates['subset']) 1796 | skeleton_map = torch.from_numpy(skeleton_map) / 255.0 1797 | inpaint_mask = self.resize_transform_mask( 1798 | inpaint_mask).permute(1, 2, 0) 1799 | human_mask = self.resize_transform_mask(human_mask).permute( 1800 | 1, 2, 0) 1801 | skeleton_mask = self.resize_transform_mask( 1802 | skeleton_mask).permute(1, 2, 0) 1803 | 1804 | for idx, id_feature in enumerate(id_feature_channel_list): 1805 | id_feature_channel_list[idx] = self.resize_transform_mask( 1806 | id_feature).permute(1, 2, 0) 1807 | 1808 | inpaint_mask_after_reposing = torch.from_numpy( 1809 | inpaint_mask_after_reposing).unsqueeze(0) 1810 | inpaint_mask_after_reposing = self.resize_transform_mask( 1811 | inpaint_mask_after_reposing).permute(1, 2, 0).squeeze(2) 1812 | revised_img[inpaint_mask_after_reposing == 1] = 0 1813 | 1814 | flip_img = random.uniform(0, 1) 1815 | # flip_img = 0.1 1816 | if flip_img < 0.5: 1817 | img = torch.fliplr(img) 1818 | id_color_map = torch.fliplr(id_color_map) 1819 | revised_img = torch.fliplr(revised_img) 1820 | inpaint_mask = torch.fliplr(inpaint_mask) 1821 | skeleton_mask = torch.fliplr(skeleton_mask) 1822 | coordinates = self.flip_skeleton_coordinates(coordinates) 1823 | canvas = np.zeros_like(img) 1824 | skeleton_map = self.draw_bodypose(canvas, 1825 | coordinates['candidate'], 1826 | coordinates['subset']) 1827 | skeleton_map = torch.from_numpy(skeleton_map) / 255.0 1828 | skeleton_map = torch.fliplr(skeleton_map) 1829 | 1830 | for idx, id_feature in enumerate(id_feature_channel_list): 1831 | id_feature_channel_list[idx] = torch.fliplr(id_feature) 1832 | 1833 | # flip exemplar, the flip operation for exemplar should be consistent as the original img 1834 | for idx, exemplar_img in enumerate(exemplar_img_list): 1835 | exemplar_coordinate = exemplar_skeleton_coordinates_list[ 1836 | idx] 1837 | if exemplar_coordinate is None: 1838 | break 1839 | exemplar_img_list[idx] = torch.fliplr(exemplar_img) 1840 | coordinates = self.flip_skeleton_coordinates( 1841 | exemplar_coordinate) 1842 | canvas = np.zeros_like(canvas) 1843 | exemplar_skeleton_map = self.draw_bodypose( 1844 | canvas, coordinates['candidate'], 1845 | coordinates['subset']) 1846 | exemplar_skeleton_map = torch.from_numpy( 1847 | exemplar_skeleton_map) / 255.0 1848 | exemplar_skeleton_map = torch.fliplr( 1849 | exemplar_skeleton_map) 1850 | exemplar_skeleton_map_list[idx] = exemplar_skeleton_map 1851 | 1852 | exemplar_img_list = torch.stack(exemplar_img_list, dim=0) 1853 | exemplar_skeleton_map_list = torch.stack( 1854 | exemplar_skeleton_map_list, dim=0) 1855 | exemplar_color_block_list = torch.stack( 1856 | exemplar_color_block_list, dim=0) 1857 | id_feature_channel = torch.stack( 1858 | id_feature_channel_list, dim=0) 1859 | 1860 | assert img.size()[0] == 512 1861 | assert img.size()[1] == 512 1862 | 1863 | break 1864 | except Exception as e: 1865 | print(e) 1866 | index = random.randint(0, len(self.data_path_list) - 1) 1867 | 1868 | return { 1869 | 'GT': img, 1870 | 'masked_image': revised_img, 1871 | 'mask': inpaint_mask, 1872 | 'text': 'A photo of group portrait.', 1873 | 'skeleton_map': skeleton_map, 1874 | 'skeleton_mask': skeleton_mask, 1875 | 'exemplar': exemplar_img_list, 1876 | 'exemplar_skeleton': exemplar_skeleton_map_list, 1877 | 'id_color_map': id_color_map, 1878 | 'exemplar_color_block': exemplar_color_block_list, 1879 | 'id_feature_channel': id_feature_channel 1880 | } 1881 | 1882 | -------------------------------------------------------------------------------- /data/openpose/__init__.py: -------------------------------------------------------------------------------- 1 | # Openpose 2 | # Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose 3 | # 2nd Edited by https://github.com/Hzzone/pytorch-openpose 4 | # 3rd Edited by ControlNet 5 | 6 | import os 7 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" 8 | 9 | import torch 10 | import numpy as np 11 | from . import util 12 | from .body import Body 13 | from .hand import Hand 14 | 15 | 16 | 17 | class OpenposeDetector: 18 | def __init__(self, device=None): 19 | body_modelpath = './pretrained_models/body_pose_model.pth' 20 | hand_modelpath = './pretrained_models/hand_pose_model.pth' 21 | 22 | self.body_estimation = Body(body_modelpath, device) 23 | self.hand_estimation = Hand(hand_modelpath, device) 24 | 25 | def __call__(self, oriImg, hand=False): 26 | oriImg = oriImg[:, :, ::-1].copy() 27 | with torch.no_grad(): 28 | candidate, subset = self.body_estimation(oriImg) 29 | canvas = np.zeros_like(oriImg) 30 | canvas = util.draw_bodypose(canvas, candidate, subset) 31 | if hand: 32 | hands_list = util.handDetect(candidate, subset, oriImg) 33 | all_hand_peaks = [] 34 | for x, y, w, is_left in hands_list: 35 | peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :]) 36 | peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x) 37 | peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y) 38 | all_hand_peaks.append(peaks) 39 | canvas = util.draw_handpose(canvas, all_hand_peaks) 40 | all_hand_peaks = [peak.tolist() for peak in all_hand_peaks] 41 | return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist(), all_hand_peaks = all_hand_peaks) 42 | else: 43 | return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist()) 44 | -------------------------------------------------------------------------------- /data/openpose/body.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import math 4 | import time 5 | from scipy.ndimage.filters import gaussian_filter 6 | import matplotlib.pyplot as plt 7 | import matplotlib 8 | import torch 9 | from torchvision import transforms 10 | 11 | from . import util 12 | from .model import bodypose_model 13 | 14 | class Body(object): 15 | def __init__(self, model_path, device=None): 16 | self.model = bodypose_model() 17 | self.device = device 18 | if device is not None: 19 | self.model = self.model.cuda() 20 | # if torch.cuda.is_available(): 21 | # self.model = self.model.cuda() 22 | # print('cuda') 23 | model_dict = util.transfer(self.model, torch.load(model_path)) 24 | self.model.load_state_dict(model_dict) 25 | self.model.eval() 26 | 27 | def __call__(self, oriImg): 28 | # scale_search = [0.5, 1.0, 1.5, 2.0] 29 | scale_search = [0.5] 30 | boxsize = 368 31 | stride = 8 32 | padValue = 128 33 | thre1 = 0.1 34 | thre2 = 0.05 35 | multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search] 36 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19)) 37 | paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38)) 38 | 39 | for m in range(len(multiplier)): 40 | scale = multiplier[m] 41 | imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 42 | imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue) 43 | im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5 44 | im = np.ascontiguousarray(im) 45 | 46 | data = torch.from_numpy(im).float() 47 | if self.device is not None: 48 | # if torch.cuda.is_available(): 49 | data = data.cuda() 50 | # data = data.permute([2, 0, 1]).unsqueeze(0).float() 51 | with torch.no_grad(): 52 | Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data) 53 | Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy() 54 | Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy() 55 | 56 | # extract outputs, resize, and remove padding 57 | # heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps 58 | heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps 59 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 60 | heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] 61 | heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) 62 | 63 | # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs 64 | paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs 65 | paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 66 | paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] 67 | paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) 68 | 69 | heatmap_avg += heatmap_avg + heatmap / len(multiplier) 70 | paf_avg += + paf / len(multiplier) 71 | 72 | all_peaks = [] 73 | peak_counter = 0 74 | 75 | for part in range(18): 76 | map_ori = heatmap_avg[:, :, part] 77 | one_heatmap = gaussian_filter(map_ori, sigma=3) 78 | 79 | map_left = np.zeros(one_heatmap.shape) 80 | map_left[1:, :] = one_heatmap[:-1, :] 81 | map_right = np.zeros(one_heatmap.shape) 82 | map_right[:-1, :] = one_heatmap[1:, :] 83 | map_up = np.zeros(one_heatmap.shape) 84 | map_up[:, 1:] = one_heatmap[:, :-1] 85 | map_down = np.zeros(one_heatmap.shape) 86 | map_down[:, :-1] = one_heatmap[:, 1:] 87 | 88 | peaks_binary = np.logical_and.reduce( 89 | (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1)) 90 | peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse 91 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 92 | peak_id = range(peak_counter, peak_counter + len(peaks)) 93 | peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))] 94 | 95 | all_peaks.append(peaks_with_score_and_id) 96 | peak_counter += len(peaks) 97 | 98 | # find connection in the specified sequence, center 29 is in the position 15 99 | limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ 100 | [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ 101 | [1, 16], [16, 18], [3, 17], [6, 18]] 102 | # the middle joints heatmap correpondence 103 | mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \ 104 | [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \ 105 | [55, 56], [37, 38], [45, 46]] 106 | 107 | connection_all = [] 108 | special_k = [] 109 | mid_num = 10 110 | 111 | for k in range(len(mapIdx)): 112 | score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]] 113 | candA = all_peaks[limbSeq[k][0] - 1] 114 | candB = all_peaks[limbSeq[k][1] - 1] 115 | nA = len(candA) 116 | nB = len(candB) 117 | indexA, indexB = limbSeq[k] 118 | if (nA != 0 and nB != 0): 119 | connection_candidate = [] 120 | for i in range(nA): 121 | for j in range(nB): 122 | vec = np.subtract(candB[j][:2], candA[i][:2]) 123 | norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) 124 | norm = max(0.001, norm) 125 | vec = np.divide(vec, norm) 126 | 127 | startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ 128 | np.linspace(candA[i][1], candB[j][1], num=mid_num))) 129 | 130 | vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ 131 | for I in range(len(startend))]) 132 | vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ 133 | for I in range(len(startend))]) 134 | 135 | score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) 136 | score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min( 137 | 0.5 * oriImg.shape[0] / norm - 1, 0) 138 | criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts) 139 | criterion2 = score_with_dist_prior > 0 140 | if criterion1 and criterion2: 141 | connection_candidate.append( 142 | [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]]) 143 | 144 | connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) 145 | connection = np.zeros((0, 5)) 146 | for c in range(len(connection_candidate)): 147 | i, j, s = connection_candidate[c][0:3] 148 | if (i not in connection[:, 3] and j not in connection[:, 4]): 149 | connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) 150 | if (len(connection) >= min(nA, nB)): 151 | break 152 | 153 | connection_all.append(connection) 154 | else: 155 | special_k.append(k) 156 | connection_all.append([]) 157 | 158 | # last number in each row is the total parts number of that person 159 | # the second last number in each row is the score of the overall configuration 160 | subset = -1 * np.ones((0, 20)) 161 | candidate = np.array([item for sublist in all_peaks for item in sublist]) 162 | 163 | for k in range(len(mapIdx)): 164 | if k not in special_k: 165 | partAs = connection_all[k][:, 0] 166 | partBs = connection_all[k][:, 1] 167 | indexA, indexB = np.array(limbSeq[k]) - 1 168 | 169 | for i in range(len(connection_all[k])): # = 1:size(temp,1) 170 | found = 0 171 | subset_idx = [-1, -1] 172 | for j in range(len(subset)): # 1:size(subset,1): 173 | if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: 174 | subset_idx[found] = j 175 | found += 1 176 | 177 | if found == 1: 178 | j = subset_idx[0] 179 | if subset[j][indexB] != partBs[i]: 180 | subset[j][indexB] = partBs[i] 181 | subset[j][-1] += 1 182 | subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 183 | elif found == 2: # if found 2 and disjoint, merge them 184 | j1, j2 = subset_idx 185 | membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] 186 | if len(np.nonzero(membership == 2)[0]) == 0: # merge 187 | subset[j1][:-2] += (subset[j2][:-2] + 1) 188 | subset[j1][-2:] += subset[j2][-2:] 189 | subset[j1][-2] += connection_all[k][i][2] 190 | subset = np.delete(subset, j2, 0) 191 | else: # as like found == 1 192 | subset[j1][indexB] = partBs[i] 193 | subset[j1][-1] += 1 194 | subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] 195 | 196 | # if find no partA in the subset, create a new subset 197 | elif not found and k < 17: 198 | row = -1 * np.ones(20) 199 | row[indexA] = partAs[i] 200 | row[indexB] = partBs[i] 201 | row[-1] = 2 202 | row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] 203 | subset = np.vstack([subset, row]) 204 | # delete some rows of subset which has few parts occur 205 | deleteIdx = [] 206 | for i in range(len(subset)): 207 | if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: 208 | deleteIdx.append(i) 209 | subset = np.delete(subset, deleteIdx, axis=0) 210 | 211 | # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts 212 | # candidate: x, y, score, id 213 | return candidate, subset 214 | 215 | if __name__ == "__main__": 216 | body_estimation = Body('../model/body_pose_model.pth') 217 | 218 | test_image = '../images/ski.jpg' 219 | oriImg = cv2.imread(test_image) # B,G,R order 220 | candidate, subset = body_estimation(oriImg) 221 | canvas = util.draw_bodypose(oriImg, candidate, subset) 222 | plt.imshow(canvas[:, :, [2, 1, 0]]) 223 | plt.show() 224 | -------------------------------------------------------------------------------- /data/openpose/hand.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import json 3 | import numpy as np 4 | import math 5 | import time 6 | from scipy.ndimage.filters import gaussian_filter 7 | import matplotlib.pyplot as plt 8 | import matplotlib 9 | import torch 10 | from skimage.measure import label 11 | 12 | from .model import handpose_model 13 | from . import util 14 | 15 | class Hand(object): 16 | def __init__(self, model_path, device=None): 17 | self.model = handpose_model() 18 | self.device = device 19 | if device is not None: 20 | self.model = self.model.cuda() 21 | # if torch.cuda.is_available(): 22 | # self.model = self.model.cuda() 23 | # print('cuda') 24 | model_dict = util.transfer(self.model, torch.load(model_path)) 25 | self.model.load_state_dict(model_dict) 26 | self.model.eval() 27 | 28 | def __call__(self, oriImg): 29 | scale_search = [0.5, 1.0, 1.5, 2.0] 30 | # scale_search = [0.5] 31 | boxsize = 368 32 | stride = 8 33 | padValue = 128 34 | thre = 0.05 35 | multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search] 36 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22)) 37 | # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38)) 38 | 39 | for m in range(len(multiplier)): 40 | scale = multiplier[m] 41 | imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 42 | imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue) 43 | im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5 44 | im = np.ascontiguousarray(im) 45 | 46 | data = torch.from_numpy(im).float() 47 | if self.device is not None: 48 | # if torch.cuda.is_available(): 49 | data = data.cuda() 50 | # data = data.permute([2, 0, 1]).unsqueeze(0).float() 51 | with torch.no_grad(): 52 | output = self.model(data).cpu().numpy() 53 | # output = self.model(data).numpy()q 54 | 55 | # extract outputs, resize, and remove padding 56 | heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps 57 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 58 | heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] 59 | heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) 60 | 61 | heatmap_avg += heatmap / len(multiplier) 62 | 63 | all_peaks = [] 64 | for part in range(21): 65 | map_ori = heatmap_avg[:, :, part] 66 | one_heatmap = gaussian_filter(map_ori, sigma=3) 67 | binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8) 68 | # 全部小于阈值 69 | if np.sum(binary) == 0: 70 | all_peaks.append([0, 0]) 71 | continue 72 | label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim) 73 | max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1 74 | label_img[label_img != max_index] = 0 75 | map_ori[label_img == 0] = 0 76 | 77 | y, x = util.npmax(map_ori) 78 | all_peaks.append([x, y]) 79 | return np.array(all_peaks) 80 | 81 | if __name__ == "__main__": 82 | hand_estimation = Hand('../model/hand_pose_model.pth') 83 | 84 | # test_image = '../images/hand.jpg' 85 | test_image = '../images/hand.jpg' 86 | oriImg = cv2.imread(test_image) # B,G,R order 87 | peaks = hand_estimation(oriImg) 88 | canvas = util.draw_handpose(oriImg, peaks, True) 89 | cv2.imshow('', canvas) 90 | cv2.waitKey(0) 91 | -------------------------------------------------------------------------------- /data/openpose/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from collections import OrderedDict 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | def make_layers(block, no_relu_layers): 8 | layers = [] 9 | for layer_name, v in block.items(): 10 | if 'pool' in layer_name: 11 | layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1], 12 | padding=v[2]) 13 | layers.append((layer_name, layer)) 14 | else: 15 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], 16 | kernel_size=v[2], stride=v[3], 17 | padding=v[4]) 18 | layers.append((layer_name, conv2d)) 19 | if layer_name not in no_relu_layers: 20 | layers.append(('relu_'+layer_name, nn.ReLU(inplace=True))) 21 | 22 | return nn.Sequential(OrderedDict(layers)) 23 | 24 | class bodypose_model(nn.Module): 25 | def __init__(self): 26 | super(bodypose_model, self).__init__() 27 | 28 | # these layers have no relu layer 29 | no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\ 30 | 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\ 31 | 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\ 32 | 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1'] 33 | blocks = {} 34 | block0 = OrderedDict([ 35 | ('conv1_1', [3, 64, 3, 1, 1]), 36 | ('conv1_2', [64, 64, 3, 1, 1]), 37 | ('pool1_stage1', [2, 2, 0]), 38 | ('conv2_1', [64, 128, 3, 1, 1]), 39 | ('conv2_2', [128, 128, 3, 1, 1]), 40 | ('pool2_stage1', [2, 2, 0]), 41 | ('conv3_1', [128, 256, 3, 1, 1]), 42 | ('conv3_2', [256, 256, 3, 1, 1]), 43 | ('conv3_3', [256, 256, 3, 1, 1]), 44 | ('conv3_4', [256, 256, 3, 1, 1]), 45 | ('pool3_stage1', [2, 2, 0]), 46 | ('conv4_1', [256, 512, 3, 1, 1]), 47 | ('conv4_2', [512, 512, 3, 1, 1]), 48 | ('conv4_3_CPM', [512, 256, 3, 1, 1]), 49 | ('conv4_4_CPM', [256, 128, 3, 1, 1]) 50 | ]) 51 | 52 | 53 | # Stage 1 54 | block1_1 = OrderedDict([ 55 | ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]), 56 | ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]), 57 | ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]), 58 | ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]), 59 | ('conv5_5_CPM_L1', [512, 38, 1, 1, 0]) 60 | ]) 61 | 62 | block1_2 = OrderedDict([ 63 | ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]), 64 | ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]), 65 | ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]), 66 | ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]), 67 | ('conv5_5_CPM_L2', [512, 19, 1, 1, 0]) 68 | ]) 69 | blocks['block1_1'] = block1_1 70 | blocks['block1_2'] = block1_2 71 | 72 | self.model0 = make_layers(block0, no_relu_layers) 73 | 74 | # Stages 2 - 6 75 | for i in range(2, 7): 76 | blocks['block%d_1' % i] = OrderedDict([ 77 | ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]), 78 | ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]), 79 | ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]), 80 | ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]), 81 | ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]), 82 | ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]), 83 | ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0]) 84 | ]) 85 | 86 | blocks['block%d_2' % i] = OrderedDict([ 87 | ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]), 88 | ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]), 89 | ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]), 90 | ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]), 91 | ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]), 92 | ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]), 93 | ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0]) 94 | ]) 95 | 96 | for k in blocks.keys(): 97 | blocks[k] = make_layers(blocks[k], no_relu_layers) 98 | 99 | self.model1_1 = blocks['block1_1'] 100 | self.model2_1 = blocks['block2_1'] 101 | self.model3_1 = blocks['block3_1'] 102 | self.model4_1 = blocks['block4_1'] 103 | self.model5_1 = blocks['block5_1'] 104 | self.model6_1 = blocks['block6_1'] 105 | 106 | self.model1_2 = blocks['block1_2'] 107 | self.model2_2 = blocks['block2_2'] 108 | self.model3_2 = blocks['block3_2'] 109 | self.model4_2 = blocks['block4_2'] 110 | self.model5_2 = blocks['block5_2'] 111 | self.model6_2 = blocks['block6_2'] 112 | 113 | 114 | def forward(self, x): 115 | 116 | out1 = self.model0(x) 117 | 118 | out1_1 = self.model1_1(out1) 119 | out1_2 = self.model1_2(out1) 120 | out2 = torch.cat([out1_1, out1_2, out1], 1) 121 | 122 | out2_1 = self.model2_1(out2) 123 | out2_2 = self.model2_2(out2) 124 | out3 = torch.cat([out2_1, out2_2, out1], 1) 125 | 126 | out3_1 = self.model3_1(out3) 127 | out3_2 = self.model3_2(out3) 128 | out4 = torch.cat([out3_1, out3_2, out1], 1) 129 | 130 | out4_1 = self.model4_1(out4) 131 | out4_2 = self.model4_2(out4) 132 | out5 = torch.cat([out4_1, out4_2, out1], 1) 133 | 134 | out5_1 = self.model5_1(out5) 135 | out5_2 = self.model5_2(out5) 136 | out6 = torch.cat([out5_1, out5_2, out1], 1) 137 | 138 | out6_1 = self.model6_1(out6) 139 | out6_2 = self.model6_2(out6) 140 | 141 | return out6_1, out6_2 142 | 143 | class handpose_model(nn.Module): 144 | def __init__(self): 145 | super(handpose_model, self).__init__() 146 | 147 | # these layers have no relu layer 148 | no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\ 149 | 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6'] 150 | # stage 1 151 | block1_0 = OrderedDict([ 152 | ('conv1_1', [3, 64, 3, 1, 1]), 153 | ('conv1_2', [64, 64, 3, 1, 1]), 154 | ('pool1_stage1', [2, 2, 0]), 155 | ('conv2_1', [64, 128, 3, 1, 1]), 156 | ('conv2_2', [128, 128, 3, 1, 1]), 157 | ('pool2_stage1', [2, 2, 0]), 158 | ('conv3_1', [128, 256, 3, 1, 1]), 159 | ('conv3_2', [256, 256, 3, 1, 1]), 160 | ('conv3_3', [256, 256, 3, 1, 1]), 161 | ('conv3_4', [256, 256, 3, 1, 1]), 162 | ('pool3_stage1', [2, 2, 0]), 163 | ('conv4_1', [256, 512, 3, 1, 1]), 164 | ('conv4_2', [512, 512, 3, 1, 1]), 165 | ('conv4_3', [512, 512, 3, 1, 1]), 166 | ('conv4_4', [512, 512, 3, 1, 1]), 167 | ('conv5_1', [512, 512, 3, 1, 1]), 168 | ('conv5_2', [512, 512, 3, 1, 1]), 169 | ('conv5_3_CPM', [512, 128, 3, 1, 1]) 170 | ]) 171 | 172 | block1_1 = OrderedDict([ 173 | ('conv6_1_CPM', [128, 512, 1, 1, 0]), 174 | ('conv6_2_CPM', [512, 22, 1, 1, 0]) 175 | ]) 176 | 177 | blocks = {} 178 | blocks['block1_0'] = block1_0 179 | blocks['block1_1'] = block1_1 180 | 181 | # stage 2-6 182 | for i in range(2, 7): 183 | blocks['block%d' % i] = OrderedDict([ 184 | ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]), 185 | ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]), 186 | ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]), 187 | ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]), 188 | ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]), 189 | ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]), 190 | ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0]) 191 | ]) 192 | 193 | for k in blocks.keys(): 194 | blocks[k] = make_layers(blocks[k], no_relu_layers) 195 | 196 | self.model1_0 = blocks['block1_0'] 197 | self.model1_1 = blocks['block1_1'] 198 | self.model2 = blocks['block2'] 199 | self.model3 = blocks['block3'] 200 | self.model4 = blocks['block4'] 201 | self.model5 = blocks['block5'] 202 | self.model6 = blocks['block6'] 203 | 204 | def forward(self, x): 205 | out1_0 = self.model1_0(x) 206 | out1_1 = self.model1_1(out1_0) 207 | concat_stage2 = torch.cat([out1_1, out1_0], 1) 208 | out_stage2 = self.model2(concat_stage2) 209 | concat_stage3 = torch.cat([out_stage2, out1_0], 1) 210 | out_stage3 = self.model3(concat_stage3) 211 | concat_stage4 = torch.cat([out_stage3, out1_0], 1) 212 | out_stage4 = self.model4(concat_stage4) 213 | concat_stage5 = torch.cat([out_stage4, out1_0], 1) 214 | out_stage5 = self.model5(concat_stage5) 215 | concat_stage6 = torch.cat([out_stage5, out1_0], 1) 216 | out_stage6 = self.model6(concat_stage6) 217 | return out_stage6 218 | 219 | 220 | -------------------------------------------------------------------------------- /data/openpose/util.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import matplotlib 4 | import cv2 5 | 6 | 7 | def padRightDownCorner(img, stride, padValue): 8 | h = img.shape[0] 9 | w = img.shape[1] 10 | 11 | pad = 4 * [None] 12 | pad[0] = 0 # up 13 | pad[1] = 0 # left 14 | pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down 15 | pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right 16 | 17 | img_padded = img 18 | pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1)) 19 | img_padded = np.concatenate((pad_up, img_padded), axis=0) 20 | pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1)) 21 | img_padded = np.concatenate((pad_left, img_padded), axis=1) 22 | pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1)) 23 | img_padded = np.concatenate((img_padded, pad_down), axis=0) 24 | pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1)) 25 | img_padded = np.concatenate((img_padded, pad_right), axis=1) 26 | 27 | return img_padded, pad 28 | 29 | # transfer caffe model to pytorch which will match the layer name 30 | def transfer(model, model_weights): 31 | transfered_model_weights = {} 32 | for weights_name in model.state_dict().keys(): 33 | transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])] 34 | return transfered_model_weights 35 | 36 | # draw the body keypoint and lims 37 | def draw_bodypose(canvas, candidate, subset): 38 | stickwidth = 4 39 | limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ 40 | [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ 41 | [1, 16], [16, 18], [3, 17], [6, 18]] 42 | 43 | colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ 44 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ 45 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] 46 | for i in range(18): 47 | for n in range(len(subset)): 48 | index = int(subset[n][i]) 49 | if index == -1: 50 | continue 51 | x, y = candidate[index][0:2] 52 | cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1) 53 | for i in range(17): 54 | for n in range(len(subset)): 55 | index = subset[n][np.array(limbSeq[i]) - 1] 56 | if -1 in index: 57 | continue 58 | cur_canvas = canvas.copy() 59 | Y = candidate[index.astype(int), 0] 60 | X = candidate[index.astype(int), 1] 61 | mX = np.mean(X) 62 | mY = np.mean(Y) 63 | length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 64 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) 65 | polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) 66 | cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) 67 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) 68 | # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]]) 69 | # plt.imshow(canvas[:, :, [2, 1, 0]]) 70 | return canvas 71 | 72 | 73 | # image drawed by opencv is not good. 74 | def draw_handpose(canvas, all_hand_peaks, show_number=False): 75 | edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \ 76 | [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]] 77 | 78 | for peaks in all_hand_peaks: 79 | for ie, e in enumerate(edges): 80 | if np.sum(np.all(peaks[e], axis=1)==0)==0: 81 | x1, y1 = peaks[e[0]] 82 | x2, y2 = peaks[e[1]] 83 | cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])*255, thickness=2) 84 | 85 | for i, keyponit in enumerate(peaks): 86 | x, y = keyponit 87 | cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1) 88 | if show_number: 89 | cv2.putText(canvas, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), lineType=cv2.LINE_AA) 90 | return canvas 91 | 92 | # detect hand according to body pose keypoints 93 | # please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp 94 | def handDetect(candidate, subset, oriImg): 95 | # right hand: wrist 4, elbow 3, shoulder 2 96 | # left hand: wrist 7, elbow 6, shoulder 5 97 | ratioWristElbow = 0.33 98 | detect_result = [] 99 | image_height, image_width = oriImg.shape[0:2] 100 | for person in subset.astype(int): 101 | # if any of three not detected 102 | has_left = np.sum(person[[5, 6, 7]] == -1) == 0 103 | has_right = np.sum(person[[2, 3, 4]] == -1) == 0 104 | if not (has_left or has_right): 105 | continue 106 | hands = [] 107 | #left hand 108 | if has_left: 109 | left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]] 110 | x1, y1 = candidate[left_shoulder_index][:2] 111 | x2, y2 = candidate[left_elbow_index][:2] 112 | x3, y3 = candidate[left_wrist_index][:2] 113 | hands.append([x1, y1, x2, y2, x3, y3, True]) 114 | # right hand 115 | if has_right: 116 | right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]] 117 | x1, y1 = candidate[right_shoulder_index][:2] 118 | x2, y2 = candidate[right_elbow_index][:2] 119 | x3, y3 = candidate[right_wrist_index][:2] 120 | hands.append([x1, y1, x2, y2, x3, y3, False]) 121 | 122 | for x1, y1, x2, y2, x3, y3, is_left in hands: 123 | # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox 124 | # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]); 125 | # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]); 126 | # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow); 127 | # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder); 128 | # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder); 129 | x = x3 + ratioWristElbow * (x3 - x2) 130 | y = y3 + ratioWristElbow * (y3 - y2) 131 | distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2) 132 | distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) 133 | width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder) 134 | # x-y refers to the center --> offset to topLeft point 135 | # handRectangle.x -= handRectangle.width / 2.f; 136 | # handRectangle.y -= handRectangle.height / 2.f; 137 | x -= width / 2 138 | y -= width / 2 # width = height 139 | # overflow the image 140 | if x < 0: x = 0 141 | if y < 0: y = 0 142 | width1 = width 143 | width2 = width 144 | if x + width > image_width: width1 = image_width - x 145 | if y + width > image_height: width2 = image_height - y 146 | width = min(width1, width2) 147 | # the max hand box value is 20 pixels 148 | if width >= 20: 149 | detect_result.append([int(x), int(y), int(width), is_left]) 150 | 151 | ''' 152 | return value: [[x, y, w, True if left hand else False]]. 153 | width=height since the network require squared input. 154 | x, y is the coordinate of top left 155 | ''' 156 | return detect_result 157 | 158 | # get max index of 2d array 159 | def npmax(array): 160 | arrayindex = array.argmax(1) 161 | arrayvalue = array.max(1) 162 | i = arrayvalue.argmax() 163 | j = arrayindex[i] 164 | return i, j 165 | --------------------------------------------------------------------------------