├── README.md
└── data
├── group_diff_data_gen.py
└── openpose
├── __init__.py
├── body.py
├── hand.py
├── model.py
└── util.py
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
GroupDiff: Diffusion-based Group Portrait Editing
4 |
5 |
6 |
7 |
8 | ## Dataset
9 |
10 | Our dataset is developed based on NUS LV Multiple-Human Parsing Dataset v2.0. Please download the source data from the [link](https://drive.google.com/file/d/1YVBGMru0dlwB8zu1OoErOazZoc8ISSJn/view?usp=sharing).
11 |
12 | We use [MMPose](https://github.com/open-mmlab/mmpose) to estimate the pose using the "vitpose_h" model. You can download the pose estimation results from this [link](https://drive.google.com/file/d/1_ivJ5jTv0p-gdcZ8XLvTix_ymg7KOJTL/view?usp=sharing).
13 |
14 | After downloading the dataset, unzip the file and put them under the dataset folder with the following structure:
15 | ```
16 | ./LV-MHP-v2
17 | ├── train
18 | ├── images
19 | ├── parsing_annos
20 | └── pose_estimation
21 | └── shhq_dataset
22 | ├── images
23 | ├── parsing_annos
24 | └── pose_estimation
25 | ```
26 |
27 | Based on the preprocessed data, we propose a comprehensive training data generation engine to synthesize paired data. The data generation codes can be found [here](https://github.com/yumingj/GroupDiff/blob/main/data/group_diff_data_gen.py).
28 |
--------------------------------------------------------------------------------
/data/group_diff_data_gen.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import glob
3 | import json
4 | import math
5 | import random
6 |
7 | import cv2
8 | import numpy as np
9 | import torch
10 | import torch.utils.data as data
11 | import torchvision.transforms as transforms
12 | import torchvision.transforms.functional as F
13 | from PIL import Image, ImageDraw
14 | from scipy.ndimage import binary_dilation
15 | from torch import Tensor
16 | from torchvision.ops import masks_to_boxes
17 |
18 | from openpose import OpenposeDetector
19 |
20 |
21 | class GroupDiffDataGen(data.Dataset):
22 |
23 | def __init__(self,
24 | state,
25 | skeleton_path_prefix,
26 | add_harmonization=False,
27 | reposing_exemplar=True,
28 | use_localssd=False):
29 | self.state = state
30 | self.add_harmonization = add_harmonization
31 | self.use_localssd = use_localssd
32 |
33 | if state == 'train':
34 | data_dir = './LV-MHP-v2/train'
35 | self.data_path_list = glob.glob(f'{data_dir}/images/*.jpg')
36 | self.parsing_dir = f'{data_dir}/parsing_annos'
37 | self.pose_estimation_path = f'{data_dir}/pose_estimation'
38 | self.data_path_list.sort()
39 | else:
40 | data_dir = './LV-MHP-v2/val'
41 | self.data_path_list = glob.glob(f'{data_dir}/images/*.jpg')
42 | self.parsing_dir = f'{data_dir}/parsing_annos'
43 | self.data_path_list.sort()
44 |
45 | self.skeleton_path_prefix = skeleton_path_prefix
46 |
47 | self.resize_transform_img = transforms.Resize(size=512)
48 | self.resize_transform_mask = transforms.Resize(
49 | size=512, interpolation=transforms.InterpolationMode.NEAREST)
50 |
51 | self.resize_transform_exemplar = transforms.Resize(size=224)
52 |
53 | self.apply_openpose = OpenposeDetector()
54 |
55 | self.reposing_exemplar = reposing_exemplar
56 |
57 | self.random_color_identity_group = [[(0, 0, 255), (0, 0, 200),
58 | (0, 0, 150)],
59 | [(255, 0, 0), (200, 0, 0),
60 | (150, 0, 0)],
61 | [(0, 255, 0), (0, 200, 0),
62 | (0, 200, 0)],
63 | [(255, 0, 255), (200, 0, 200),
64 | (150, 0, 150)],
65 | [(0, 255, 255), (0, 200, 200),
66 | (0, 150, 150)]]
67 |
68 | def transform_exemplar(self):
69 | transform_list = []
70 | transform_list += [
71 | transforms.RandomAffine(
72 | degrees=20,
73 | translate=(0.1, 0.1),
74 | scale=(0.9, 1.10),
75 | fill=255,
76 | interpolation=transforms.InterpolationMode.BILINEAR)
77 | ]
78 | if self.add_harmonization:
79 | transform_list += [
80 | transforms.ColorJitter(
81 | brightness=(0.9, 1.1),
82 | contrast=(0.9, 1.1),
83 | saturation=(0.8, 1.3))
84 | ]
85 | transform_list += [transforms.Resize(size=512)]
86 | # transform_list += [transforms.Normalize((0.48145466, 0.4578275, 0.40821073),
87 | # (0.26862954, 0.26130258, 0.27577711))]
88 |
89 | return transforms.Compose(transform_list)
90 |
91 | def get_candidate_parsing_list_for_exemplar(self, inpaint_mask,
92 | seleted_idx,
93 | instance_parsing_list):
94 | candidate_parsing_list = []
95 | idx_in_candidate_list = 0
96 | count = 0
97 | for idx, instance_parsing in enumerate(instance_parsing_list):
98 | mask_binary = np.zeros(
99 | (inpaint_mask.shape[0], inpaint_mask.shape[1]), dtype=np.uint8)
100 | mask_binary[instance_parsing > 0] = 1
101 |
102 | if np.sum(mask_binary * inpaint_mask) == 0:
103 | continue
104 |
105 | candidate_parsing_list.append(instance_parsing)
106 |
107 | if idx == seleted_idx:
108 | idx_in_candidate_list = count
109 |
110 | count += 1
111 |
112 | return candidate_parsing_list, idx_in_candidate_list
113 |
114 |
115 |
116 | def warp_parsing(self, parsing, rect1, rect2):
117 | shape1 = parsing.shape
118 | h = shape1[0]
119 | w = shape1[1]
120 |
121 | rect1 = np.array(rect1, dtype=np.float32)
122 | rect2 = np.array(rect2, dtype=np.float32)
123 |
124 | # ===== homography
125 | H = cv2.getPerspectiveTransform(src=rect1, dst=rect2)
126 | # print(H)
127 | # H_inverse = np.linalg.inv(H)
128 |
129 | # img_warped = cv2.warpPerspective(src=img, M=H_inverse, dsize=(w, h))
130 | parsing_warped = cv2.warpPerspective(
131 | src=parsing, M=H, dsize=(w, h), flags=cv2.INTER_NEAREST)
132 |
133 | return parsing_warped
134 |
135 | def rotate_whole_arms(self, ori_point, point_a, point_b, alpha):
136 | x_0, y_0 = ori_point
137 | x_a, y_a = point_a
138 | x_b, y_b = point_b
139 |
140 | x_a = x_a - x_0
141 | y_a = y_a - y_0
142 |
143 | x_b = x_b - x_0
144 | y_b = y_b - y_0
145 |
146 | x_a_prime = x_a * math.cos(alpha) - y_a * math.sin(alpha)
147 | y_a_prime = x_a * math.sin(alpha) + y_a * math.cos(alpha)
148 |
149 | x_b_dif = x_b - x_a
150 | y_b_dif = y_b - y_a
151 |
152 | x_b_prime = x_b_dif * math.cos(alpha) - y_b_dif * math.sin(
153 | alpha) + x_a_prime + x_0
154 | y_b_prime = x_b_dif * math.sin(alpha) + y_b_dif * math.cos(
155 | alpha) + y_a_prime + y_0
156 |
157 | return [x_a_prime + x_0, y_a_prime + y_0], [x_b_prime, y_b_prime]
158 |
159 | def rotate_part_arms(self, ori_point, point_a, alpha):
160 | x_0, y_0 = ori_point
161 | x_a, y_a = point_a
162 |
163 | x_a = x_a - x_0
164 | y_a = y_a - y_0
165 |
166 | x_a_prime = x_a * math.cos(alpha) - y_a * math.sin(alpha)
167 | y_a_prime = x_a * math.sin(alpha) + y_a * math.cos(alpha)
168 |
169 | return [x_a_prime + x_0, y_a_prime + y_0]
170 |
171 | def randomly_change_pose(self, ori_coordinates, selected_person_idx):
172 | new_coordinates = copy.deepcopy(ori_coordinates)
173 | candidate = ori_coordinates['candidate']
174 | subset = ori_coordinates['subset']
175 |
176 | augmentation_type = random.uniform(0, 1)
177 | try:
178 | index_2 = int(subset[selected_person_idx][2])
179 | index_3 = int(subset[selected_person_idx][3])
180 | index_4 = int(subset[selected_person_idx][4])
181 | index_5 = int(subset[selected_person_idx][5])
182 | index_6 = int(subset[selected_person_idx][6])
183 | index_7 = int(subset[selected_person_idx][7])
184 | except:
185 | return new_coordinates
186 |
187 | if (index_2 == -1 or index_3 == -1
188 | or index_4 == -1) and (index_3 == -1 or index_4 == -1) and (
189 | index_5 == -1 or index_6 == -1
190 | or index_7 == -1) and (index_6 == -1 or index_7 == -1):
191 | return new_coordinates
192 |
193 | augmentation_type = random.uniform(0, 1)
194 | trial_num = 0
195 | while (trial_num < 5):
196 | if augmentation_type < 0.25:
197 | if index_2 == -1 or index_3 == -1 or index_4 == -1:
198 | trial_num += 1
199 | augmentation_type = random.uniform(0, 1)
200 | continue
201 | # left arms
202 | # change from the body_idx 2
203 | changed_x3, changed_x4 = self.rotate_whole_arms(
204 | candidate[int(subset[selected_person_idx][2])][0:2],
205 | candidate[int(subset[selected_person_idx][3])][0:2],
206 | candidate[int(subset[selected_person_idx][4])][0:2],
207 | 2 * math.pi * random.random())
208 |
209 | new_coordinates['candidate'][int(
210 | subset[selected_person_idx][3])][0:2] = changed_x3
211 | new_coordinates['candidate'][int(
212 | subset[selected_person_idx][4])][0:2] = changed_x4
213 | elif augmentation_type < 0.5:
214 | # left arms
215 | # change from the body_idx 3
216 | if index_3 == -1 or index_4 == -1:
217 | trial_num += 1
218 | augmentation_type = random.uniform(0, 1)
219 | continue
220 | changed_x4 = self.rotate_part_arms(
221 | candidate[int(subset[selected_person_idx][3])][0:2],
222 | candidate[int(subset[selected_person_idx][4])][0:2],
223 | 2 * math.pi * random.random())
224 | new_coordinates['candidate'][int(
225 | subset[selected_person_idx][4])][0:2] = changed_x4
226 | elif augmentation_type < 0.75:
227 | # right arms
228 | # change from the body_idx 5
229 | if index_5 == -1 or index_6 == -1 or index_7 == -1:
230 | trial_num += 1
231 | augmentation_type = random.uniform(0, 1)
232 | continue
233 | changed_x6, changed_x7 = self.rotate_whole_arms(
234 | candidate[int(subset[selected_person_idx][5])][0:2],
235 | candidate[int(subset[selected_person_idx][6])][0:2],
236 | candidate[int(subset[selected_person_idx][7])][0:2],
237 | 2 * math.pi * random.random())
238 |
239 | new_coordinates['candidate'][int(
240 | subset[selected_person_idx][6])][0:2] = changed_x6
241 | new_coordinates['candidate'][int(
242 | subset[selected_person_idx][7])][0:2] = changed_x7
243 | else:
244 | # right arms
245 | # change from the body_idx 5
246 | if index_6 == -1 or index_7 == -1:
247 | trial_num += 1
248 | augmentation_type = random.uniform(0, 1)
249 | continue
250 | changed_x7 = self.rotate_part_arms(
251 | candidate[int(subset[selected_person_idx][6])][0:2],
252 | candidate[int(subset[selected_person_idx][7])][0:2],
253 | 2 * math.pi * random.random())
254 | new_coordinates['candidate'][int(
255 | subset[selected_person_idx][7])][0:2] = changed_x7
256 |
257 | break
258 |
259 | return new_coordinates
260 |
261 |
262 | def reposing_exemplar_img(self, exemplar_img, parsing_map):
263 | _, ori_coordinates = self.apply_openpose(exemplar_img)
264 |
265 | if self.reposing_exemplar:
266 | selected_person_idx = 0
267 | new_coordinates = self.randomly_change_pose(
268 | ori_coordinates, selected_person_idx)
269 |
270 | connected_line_list = [[2, 3], [3, 4], [5, 6], [6, 7]]
271 |
272 | new_exemplar_img = exemplar_img.copy()
273 | for connected_line in connected_line_list:
274 | try:
275 | index = int(
276 | ori_coordinates['subset'][0][connected_line[0]])
277 | except:
278 | continue
279 | if index == -1:
280 | continue
281 | point1 = ori_coordinates['candidate'][index][0:2]
282 |
283 | try:
284 | index = int(
285 | ori_coordinates['subset'][0][connected_line[1]])
286 | except:
287 | continue
288 | if index == -1:
289 | continue
290 | point2 = ori_coordinates['candidate'][index][0:2]
291 |
292 | try:
293 | index = int(
294 | new_coordinates['subset'][0][connected_line[0]])
295 | except:
296 | continue
297 | if index == -1:
298 | continue
299 | new_point1 = new_coordinates['candidate'][index][0:2]
300 |
301 | try:
302 | index = int(
303 | new_coordinates['subset'][0][connected_line[1]])
304 | except:
305 | continue
306 |
307 | if index == -1:
308 | continue
309 | new_point2 = new_coordinates['candidate'][index][0:2]
310 |
311 | if (point1 == new_point1) and (point2 == new_point2):
312 | continue
313 |
314 | # if the arm, extend the point2
315 | if (connected_line == [3, 4]) or (connected_line == [6, 7]):
316 | # import pdb
317 | # pdb.set_trace()
318 | point2[0] = point2[0] + 0.6 * (point2[0] - point1[0])
319 | point2[1] = point2[1] + 0.6 * (point2[1] - point1[1])
320 |
321 | length = ((point1[0] - point2[0])**2 +
322 | (point1[1] - point2[1])**2)**0.5
323 |
324 | ori_rec_points = self.find_parallel_points(
325 | point1, point2, 0.25 * length)
326 |
327 | # if the arm, extend the point2
328 | if (connected_line == [3, 4]) or (connected_line == [6, 7]):
329 | # import pdb
330 | # pdb.set_trace()
331 | new_point2[0] = new_point2[0] + 0.6 * (
332 | new_point2[0] - new_point1[0])
333 | new_point2[1] = new_point2[1] + 0.6 * (
334 | new_point2[1] - new_point1[1])
335 |
336 | length = ((new_point1[0] - new_point2[0])**2 +
337 | (new_point1[1] - new_point2[1])**2)**0.5
338 | new_rec_points = self.find_parallel_points(
339 | new_point1, new_point2, 0.25 * length)
340 |
341 | warped_exemplar = self.warp_img(exemplar_img, ori_rec_points,
342 | new_rec_points)
343 |
344 | masked_area = np.zeros_like(exemplar_img[:, :, 0])
345 | cv2.fillPoly(masked_area, [np.array(ori_rec_points)], 255)
346 | masked_area = masked_area * (parsing_map > 0)
347 |
348 | new_exemplar_img[masked_area == 255] = 255
349 |
350 | warped_parsing = self.warp_parsing(parsing_map, ori_rec_points,
351 | new_rec_points)
352 |
353 | masked_area = np.zeros_like(exemplar_img[:, :, 0])
354 | cv2.fillPoly(masked_area, [np.array(new_rec_points)], 255)
355 | masked_area = masked_area * (warped_parsing > 0)
356 |
357 | new_exemplar_img[masked_area == 255] = warped_exemplar[
358 | masked_area == 255]
359 |
360 | return new_exemplar_img, new_coordinates
361 | else:
362 | return exemplar_img, ori_coordinates
363 |
364 | def warp_img(self, img, rect1, rect2):
365 | shape1 = img.shape
366 | h = shape1[0]
367 | w = shape1[1]
368 |
369 | rect1 = np.array(rect1, dtype=np.float32)
370 | rect2 = np.array(rect2, dtype=np.float32)
371 |
372 | # ===== homography
373 | H = cv2.getPerspectiveTransform(src=rect1, dst=rect2)
374 | # print(H)
375 | # H_inverse = np.linalg.inv(H)
376 |
377 | # img_warped = cv2.warpPerspective(src=img, M=H_inverse, dsize=(w, h))
378 | img_warped = cv2.warpPerspective(src=img, M=H, dsize=(w, h))
379 |
380 | return img_warped
381 |
382 | def find_parallel_points(self, point1, point2, distance):
383 | # Calculate slope and intercept of the line passing through the two points
384 | # slope = (point2[1] - point1[1]) / (point2[0] - point1[0])
385 | # intercept = point1[1] - slope * point1[0]
386 |
387 | # Calculate the angle of the line
388 | angle = np.arctan2(point2[1] - point1[1], point2[0] - point1[0])
389 |
390 | # Calculate new points parallel to the line
391 | parallel_points = []
392 | for direction in [-1,
393 | 1]: # Two directions (left and right of the line)
394 | new_x = point1[0] + direction * distance * np.sin(angle)
395 | new_y = point1[1] - direction * distance * np.cos(angle)
396 | parallel_points.append((int(new_x), int(new_y)))
397 |
398 | for direction in [1,
399 | -1]: # Two directions (left and right of the line)
400 | new_x = point2[0] + direction * distance * np.sin(angle)
401 | new_y = point2[1] - direction * distance * np.cos(angle)
402 | parallel_points.append((int(new_x), int(new_y)))
403 |
404 | return parallel_points
405 |
406 | def read_img(self, img_path):
407 |
408 | img = np.array(Image.open(img_path).convert('RGB'))
409 |
410 | return img
411 |
412 | def read_img_exemplar_mask(self, img, candidate_parsing_list):
413 |
414 | img_exemplar_list = []
415 | parsing_exemplar_list = []
416 | for parsing in candidate_parsing_list:
417 | mask_binary = np.zeros((img.shape[0], img.shape[1]),
418 | dtype=np.uint8)
419 | mask_binary[parsing > 0] = 1
420 |
421 | img_exemplar = img.copy()
422 |
423 | img_exemplar[mask_binary == 0] = 255.
424 | inner_dilated_aug = random.uniform(0, 1)
425 | if inner_dilated_aug < 0.2:
426 | structuring_element = np.ones((5, 5), dtype=bool)
427 | dilated_mask_binary = binary_dilation(
428 | 1 - mask_binary, structure=structuring_element)
429 | img_exemplar[dilated_mask_binary == 1] = 255.
430 |
431 | mask_tensor = torch.from_numpy(mask_binary).unsqueeze(0)
432 |
433 | obj_ids = torch.unique(mask_tensor)
434 | obj_ids = obj_ids[1:]
435 | masks = mask_tensor == obj_ids[:, None, None]
436 |
437 | boxes = masks_to_boxes(masks)
438 |
439 | h, w = mask_binary.shape
440 |
441 | # make the bounding box slightly larger
442 | enlarge_ratio = 0.1
443 | enlarge_margin_h = int((boxes[0][3] - boxes[0][1]) * enlarge_ratio)
444 | enlarge_margin_w = int((boxes[0][2] - boxes[0][0]) * enlarge_ratio)
445 |
446 | bbox_y1, bbox_y2 = max(0,
447 | int(boxes[0][1]) - enlarge_margin_h), min(
448 | h,
449 | int(boxes[0][3]) + enlarge_margin_h)
450 | bbox_x1, bbox_x2 = max(0,
451 | int(boxes[0][0]) - enlarge_margin_w), min(
452 | w,
453 | int(boxes[0][2]) + enlarge_margin_w)
454 | img_exemplar = img_exemplar[bbox_y1:bbox_y2, bbox_x1:bbox_x2]
455 | img_exemplar_list.append(img_exemplar)
456 | parsing_exemplar_list.append(parsing[bbox_y1:bbox_y2,
457 | bbox_x1:bbox_x2])
458 |
459 | return img_exemplar_list, parsing_exemplar_list
460 |
461 | def transform_exemplar_and_parsing(self, exemplar_img, parsing):
462 |
463 | random_affine_transformation = transforms.RandomAffine(
464 | degrees=20,
465 | translate=(0.1, 0.1),
466 | scale=(0.9, 1.10),
467 | fill=255,
468 | interpolation=transforms.InterpolationMode.BILINEAR)
469 | resize_transform_img = transforms.Resize(size=512)
470 | resize_transform_parsing = transforms.Resize(
471 | size=512, interpolation=transforms.InterpolationMode.NEAREST)
472 |
473 | channels, height, width = exemplar_img.size()
474 |
475 | ret = random_affine_transformation.get_params(
476 | random_affine_transformation.degrees,
477 | random_affine_transformation.translate,
478 | random_affine_transformation.scale,
479 | random_affine_transformation.shear, [width, height])
480 |
481 | fill = 255
482 | if isinstance(exemplar_img, Tensor):
483 | if isinstance(fill, (int, float)):
484 | fill = [float(fill)] * channels
485 | else:
486 | fill = [float(f) for f in fill]
487 |
488 | exemplar_img = F.affine(
489 | exemplar_img,
490 | *ret,
491 | interpolation=transforms.InterpolationMode.BILINEAR,
492 | fill=fill,
493 | center=random_affine_transformation.center)
494 |
495 | channels, _, _ = parsing.size()
496 | fill = 0
497 | if isinstance(parsing, Tensor):
498 | if isinstance(fill, (int, float)):
499 | fill = [float(fill)] * channels
500 | else:
501 | fill = [float(f) for f in fill]
502 |
503 | parsing = F.affine(
504 | parsing,
505 | *ret,
506 | interpolation=transforms.InterpolationMode.NEAREST,
507 | fill=fill,
508 | center=random_affine_transformation.center)
509 |
510 | exemplar_img = resize_transform_img(exemplar_img)
511 | parsing = resize_transform_parsing(parsing)
512 |
513 | return exemplar_img, parsing
514 |
515 | def random_brush_top_down(self, skeleton_mask, ori_rec_points):
516 | mask = Image.new('L', (skeleton_mask.shape[1], skeleton_mask.shape[0]), 0)
517 |
518 | num_points = int(np.random.uniform(8, 15))
519 |
520 | sampled_points_top = np.linspace(ori_rec_points[0], ori_rec_points[1], num_points)
521 | sampled_points_top = [(int(x), int(y)) for x, y in sampled_points_top]
522 |
523 | sampled_points_down = np.linspace(ori_rec_points[3], ori_rec_points[2], num_points)
524 | sampled_points_down = [(int(x), int(y)) for x, y in sampled_points_down]
525 |
526 | vertex = []
527 | for top_point, down_point in zip(sampled_points_top, sampled_points_down):
528 | random_move = np.random.uniform(-0.6, 0.6)
529 | sampled_x, sampled_y = top_point
530 | sampled_x = sampled_x + int(random_move * (sampled_points_top[1][0] - sampled_points_top[0][0]))
531 | sampled_y = sampled_y - int(np.random.uniform(0, 1.0) * (sampled_points_down[1][1] - sampled_points_down[0][1]))
532 | vertex.append((sampled_x, sampled_y))
533 |
534 | sampled_x, sampled_y = down_point
535 | random_move = np.random.uniform(-0.6, 0.6)
536 | sampled_x = sampled_x + int(random_move * (sampled_points_top[1][0] - sampled_points_top[0][0]))
537 | sampled_y = sampled_y + int(np.random.uniform(0, 1.0) * (sampled_points_down[1][1] - sampled_points_down[0][1]))
538 | vertex.append((sampled_x, sampled_y))
539 |
540 | draw = ImageDraw.Draw(mask)
541 | min_width = 12
542 | max_width = 48
543 | width = int(np.random.uniform(min_width, max_width))
544 | draw.line(vertex, fill=1, width=width)
545 | for v in vertex:
546 | draw.ellipse((v[0] - width//2,
547 | v[1] - width//2,
548 | v[0] + width//2,
549 | v[1] + width//2),
550 | fill=1)
551 |
552 | mask = np.asarray(mask, np.uint8) * 255
553 |
554 | return mask
555 |
556 | def load_arm_hand_masks(self, skeleton_mask, selected_person_bbox,
557 | instance_parsing_list):
558 | area_list = []
559 | for instance_parsing in instance_parsing_list:
560 | mask_binary = np.zeros((instance_parsing.shape[0], instance_parsing.shape[1]), dtype=np.uint8)
561 | mask_binary[instance_parsing > 0] = 1
562 | area = np.sum(selected_person_bbox * mask_binary)
563 | area_list.append(area)
564 |
565 | seleted_idx = np.argmax(area_list)
566 |
567 | selected_parsing = instance_parsing_list[seleted_idx]
568 |
569 | temp_mask = np.zeros_like(selected_parsing)
570 | for value in [5, 7]:
571 | temp_mask[selected_parsing == value] = 1
572 | if np.sum(temp_mask) != 0:
573 | kernel_width = 28
574 | kernel_height = 45
575 |
576 | kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_width, kernel_height))
577 | dilated_mask = cv2.dilate(temp_mask, kernel)
578 |
579 | skeleton_mask[skeleton_mask == 0] = dilated_mask[skeleton_mask == 0]
580 |
581 | temp_mask = np.zeros_like(selected_parsing)
582 | for value in [6, 8]:
583 | temp_mask[selected_parsing == value] = 1
584 | if np.sum(temp_mask) != 0:
585 | kernel_width = 28
586 | kernel_height = 45
587 |
588 | kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_width, kernel_height))
589 | dilated_mask = cv2.dilate(temp_mask, kernel)
590 |
591 | skeleton_mask[skeleton_mask == 0] = dilated_mask[skeleton_mask == 0]
592 |
593 | return skeleton_mask, seleted_idx
594 |
595 | def random_brush_down_top(self, skeleton_mask, ori_rec_points):
596 | mask = Image.new('L', (skeleton_mask.shape[1], skeleton_mask.shape[0]), 0)
597 |
598 | num_points = int(np.random.uniform(8, 15))
599 |
600 | sampled_points_top = np.linspace(ori_rec_points[0], ori_rec_points[1], num_points)
601 | sampled_points_top = [(int(x), int(y)) for x, y in sampled_points_top]
602 |
603 | sampled_points_down = np.linspace(ori_rec_points[3], ori_rec_points[2], num_points)
604 | sampled_points_down = [(int(x), int(y)) for x, y in sampled_points_down]
605 |
606 | vertex = []
607 | for top_point, down_point in zip(sampled_points_down, sampled_points_top):
608 | random_move = np.random.uniform(-0.6, 0.6)
609 | sampled_x, sampled_y = top_point
610 | sampled_x = sampled_x + int(random_move * (sampled_points_top[1][0] - sampled_points_top[0][0]))
611 | sampled_y = sampled_y - int(np.random.uniform(0, 1.0) * (sampled_points_down[1][1] - sampled_points_down[0][1]))
612 | vertex.append((sampled_x, sampled_y))
613 |
614 | sampled_x, sampled_y = down_point
615 | random_move = np.random.uniform(-0.6, 0.6)
616 | sampled_x = sampled_x + int(random_move * (sampled_points_top[1][0] - sampled_points_top[0][0]))
617 | sampled_y = sampled_y + int(np.random.uniform(0, 1.0) * (sampled_points_down[1][1] - sampled_points_down[0][1]))
618 | vertex.append((sampled_x, sampled_y))
619 |
620 | draw = ImageDraw.Draw(mask)
621 | min_width = 12
622 | max_width = 48
623 | width = int(np.random.uniform(min_width, max_width))
624 | draw.line(vertex, fill=1, width=width)
625 | for v in vertex:
626 | draw.ellipse((v[0] - width//2,
627 | v[1] - width//2,
628 | v[0] + width//2,
629 | v[1] + width//2),
630 | fill=1)
631 |
632 | mask = np.asarray(mask, np.uint8) * 255
633 |
634 | # import pdb
635 | # pdb.set_trace()
636 |
637 | return mask
638 |
639 | def random_brush_left_right(self, skeleton_mask, ori_rec_points):
640 | mask = Image.new('L', (skeleton_mask.shape[1], skeleton_mask.shape[0]), 0)
641 |
642 | num_points = int(np.random.uniform(8, 15))
643 |
644 | sampled_points_top = np.linspace(ori_rec_points[3], ori_rec_points[0], num_points)
645 | sampled_points_top = [(int(x), int(y)) for x, y in sampled_points_top]
646 |
647 | sampled_points_down = np.linspace(ori_rec_points[2], ori_rec_points[1], num_points)
648 | sampled_points_down = [(int(x), int(y)) for x, y in sampled_points_down]
649 |
650 | vertex = []
651 | for top_point, down_point in zip(sampled_points_down, sampled_points_top):
652 | random_move = np.random.uniform(-0.6, 0.6)
653 | sampled_x, sampled_y = top_point
654 | sampled_x = sampled_x - int(np.random.uniform(0, 1.0) * (sampled_points_top[1][0] - sampled_points_top[0][0]))
655 | sampled_y = sampled_y + int(random_move * (sampled_points_down[1][1] - sampled_points_down[0][1]))
656 | vertex.append((sampled_x, sampled_y))
657 |
658 | sampled_x, sampled_y = down_point
659 | random_move = np.random.uniform(-0.6, 0.6)
660 | sampled_x = sampled_x + int(np.random.uniform(0, 1.0) * (sampled_points_top[1][0] - sampled_points_top[0][0]))
661 | sampled_y = sampled_y + int(random_move * (sampled_points_down[1][1] - sampled_points_down[0][1]))
662 | vertex.append((sampled_x, sampled_y))
663 |
664 | draw = ImageDraw.Draw(mask)
665 | min_width = 12
666 | max_width = 48
667 | width = int(np.random.uniform(min_width, max_width))
668 | draw.line(vertex, fill=1, width=width)
669 | for v in vertex:
670 | draw.ellipse((v[0] - width//2,
671 | v[1] - width//2,
672 | v[0] + width//2,
673 | v[1] + width//2),
674 | fill=1)
675 |
676 | mask = np.asarray(mask, np.uint8) * 255
677 |
678 | # import pdb
679 | # pdb.set_trace()
680 |
681 | return mask
682 |
683 | def random_brush_right_left(self, skeleton_mask, ori_rec_points):
684 | mask = Image.new('L', (skeleton_mask.shape[1], skeleton_mask.shape[0]), 0)
685 |
686 | num_points = int(np.random.uniform(8, 15))
687 |
688 | sampled_points_top = np.linspace(ori_rec_points[3], ori_rec_points[0], num_points)
689 | sampled_points_top = [(int(x), int(y)) for x, y in sampled_points_top]
690 |
691 | sampled_points_down = np.linspace(ori_rec_points[2], ori_rec_points[1], num_points)
692 | sampled_points_down = [(int(x), int(y)) for x, y in sampled_points_down]
693 |
694 | vertex = []
695 | for top_point, down_point in zip(sampled_points_top, sampled_points_down):
696 | random_move = np.random.uniform(-0.6, 0.6)
697 | sampled_x, sampled_y = top_point
698 | sampled_x = sampled_x + int(np.random.uniform(0, 1.0) * (sampled_points_top[1][0] - sampled_points_top[0][0]))
699 | sampled_y = sampled_y + int(random_move * (sampled_points_down[1][1] - sampled_points_down[0][1]))
700 | vertex.append((sampled_x, sampled_y))
701 |
702 | sampled_x, sampled_y = down_point
703 | random_move = np.random.uniform(-0.6, 0.6)
704 | sampled_x = sampled_x - int(np.random.uniform(0, 1.0) * (sampled_points_top[1][0] - sampled_points_top[0][0]))
705 | sampled_y = sampled_y + int(random_move * (sampled_points_down[1][1] - sampled_points_down[0][1]))
706 | vertex.append((sampled_x, sampled_y))
707 |
708 | draw = ImageDraw.Draw(mask)
709 | min_width = 12
710 | max_width = 48
711 | width = int(np.random.uniform(min_width, max_width))
712 | draw.line(vertex, fill=1, width=width)
713 | for v in vertex:
714 | draw.ellipse((v[0] - width//2,
715 | v[1] - width//2,
716 | v[0] + width//2,
717 | v[1] + width//2),
718 | fill=1)
719 |
720 | mask = np.asarray(mask, np.uint8) * 255
721 |
722 | return mask
723 |
724 | def random_brush_augment(self, skeleton_mask, ori_rec_points):
725 |
726 | brush_direction_type = random.uniform(0, 1)
727 | if brush_direction_type < 0.25:
728 | brush_mask = self.random_brush_top_down(skeleton_mask, ori_rec_points)
729 | elif brush_direction_type < 0.5:
730 | brush_mask = self.random_brush_down_top(skeleton_mask, ori_rec_points)
731 | elif brush_direction_type < 0.75:
732 | brush_mask = self.random_brush_left_right(skeleton_mask, ori_rec_points)
733 | else:
734 | brush_mask = self.random_brush_right_left(skeleton_mask, ori_rec_points)
735 |
736 | skeleton_mask[skeleton_mask == 0] = brush_mask[skeleton_mask == 0]
737 | return skeleton_mask
738 |
739 | def compute_diff_mask(self, ori_coordinates, new_coordinates,
740 | skeleton_mask):
741 |
742 | skeleton_mask = skeleton_mask * 255
743 |
744 | diff_skeleton_list = []
745 | for subset_idx, subset in enumerate(ori_coordinates['subset']):
746 | for skeleton_idx in range(18):
747 | if ori_coordinates['candidate'][
748 | ori_coordinates['subset'][subset_idx]
749 | [skeleton_idx]] != new_coordinates['candidate'][
750 | new_coordinates['subset'][subset_idx][skeleton_idx]]:
751 | diff_skeleton_list.append(f'{subset_idx}_{skeleton_idx}')
752 |
753 | limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
754 | [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
755 | [1, 16], [16, 18], [3, 17], [6, 18]]
756 |
757 | for diff_skeleton in diff_skeleton_list:
758 | subset_idx, skeleton_idx = diff_skeleton.split('_')
759 | subset_idx = int(subset_idx)
760 | for limb in limbSeq:
761 | if int(skeleton_idx) + 1 in limb:
762 | index_point_1 = int(
763 | ori_coordinates['subset'][subset_idx][limb[0] - 1])
764 | index_point_2 = int(
765 | ori_coordinates['subset'][subset_idx][limb[1] - 1])
766 |
767 | if index_point_1 != -1 and index_point_2 != -1:
768 | point1 = ori_coordinates['candidate'][index_point_1][
769 | 0:2]
770 | point2 = ori_coordinates['candidate'][index_point_2][
771 | 0:2]
772 |
773 | point2[0] = point2[0] + 0.7 * (point2[0] - point1[0])
774 | point2[1] = point2[1] + 0.7 * (point2[1] - point1[1])
775 |
776 | length = ((point1[0] - point2[0])**2 +
777 | (point1[1] - point2[1])**2)**0.5
778 |
779 | length_ratio = random.uniform(0.20, 0.40)
780 | ori_rec_points = self.find_parallel_points(
781 | point1, point2, length_ratio * length)
782 |
783 | cv2.fillPoly(skeleton_mask, [np.array(ori_rec_points)],
784 | 255)
785 | skeleton_mask = self.random_brush_augment(skeleton_mask, ori_rec_points)
786 |
787 | index_point_1 = int(
788 | new_coordinates['subset'][subset_idx][limb[0] - 1])
789 | index_point_2 = int(
790 | new_coordinates['subset'][subset_idx][limb[1] - 1])
791 |
792 | if index_point_1 != -1 and index_point_2 != -1:
793 | point1 = new_coordinates['candidate'][index_point_1][
794 | 0:2]
795 | point2 = new_coordinates['candidate'][index_point_2][
796 | 0:2]
797 |
798 | point2[0] = point2[0] + 0.7 * (point2[0] - point1[0])
799 | point2[1] = point2[1] + 0.7 * (point2[1] - point1[1])
800 |
801 | length = ((point1[0] - point2[0])**2 +
802 | (point1[1] - point2[1])**2)**0.5
803 |
804 | length_ratio = random.uniform(0.20, 0.40)
805 | ori_rec_points = self.find_parallel_points(
806 | point1, point2, length_ratio * length)
807 |
808 | cv2.fillPoly(skeleton_mask, [np.array(ori_rec_points)],
809 | 255)
810 | skeleton_mask = self.random_brush_augment(skeleton_mask, ori_rec_points)
811 | skeleton_mask = skeleton_mask / 255
812 |
813 | return skeleton_mask
814 |
815 | def get_id_feature(self, candidate_parsing_list):
816 |
817 | id_feature_list = []
818 | for instance_parsing in candidate_parsing_list:
819 | bbox_mask = np.zeros(
820 | (instance_parsing.shape[0], instance_parsing.shape[1]),
821 | dtype=np.uint8)
822 | mask_binary = np.zeros(
823 | (instance_parsing.shape[0], instance_parsing.shape[1]),
824 | dtype=np.uint8)
825 | mask_binary[instance_parsing > 0] = 1
826 |
827 | mask_tensor = torch.from_numpy(mask_binary).unsqueeze(0)
828 |
829 | obj_ids = torch.unique(mask_tensor)
830 | obj_ids = obj_ids[1:]
831 | masks = mask_tensor == obj_ids[:, None, None]
832 |
833 | boxes = masks_to_boxes(masks)
834 |
835 | h, w = mask_binary.shape
836 |
837 | enlarge_ratio = 0.1
838 | enlarge_margin_h = int((boxes[0][3] - boxes[0][1]) * enlarge_ratio)
839 | enlarge_margin_w = int((boxes[0][2] - boxes[0][0]) * enlarge_ratio)
840 |
841 | bbox_y1, bbox_y2 = max(0,
842 | int(boxes[0][1]) - enlarge_margin_h), min(
843 | h,
844 | int(boxes[0][3]) + enlarge_margin_h)
845 | bbox_x1, bbox_x2 = max(0,
846 | int(boxes[0][0]) - enlarge_margin_w), min(
847 | w,
848 | int(boxes[0][2]) + enlarge_margin_w)
849 | bbox_mask[bbox_y1:bbox_y2, bbox_x1:bbox_x2] = 1
850 | id_feature_list.append(bbox_mask)
851 |
852 | return id_feature_list
853 |
854 | def generate_skeletion_mask(self, coordinates, skeleton_map):
855 | skeleton_mask = np.zeros(
856 | (skeleton_map.shape[0], skeleton_map.shape[1]), dtype=np.uint8)
857 |
858 | candidate = coordinates['candidate']
859 | subset = coordinates['subset']
860 |
861 | selected_person_idx = random.choice(range(len(subset)))
862 |
863 | skeleton_joint_list = []
864 | random_type = random.uniform(0, 1)
865 | if random_type < 0.35:
866 | skeleton_joint_list.append([2, 3])
867 | skeleton_joint_list.append([3, 4])
868 | elif random_type < 0.7:
869 | skeleton_joint_list.append([5, 6])
870 | skeleton_joint_list.append([6, 7])
871 | else:
872 | skeleton_joint_list.append([2, 3])
873 | skeleton_joint_list.append([3, 4])
874 | skeleton_joint_list.append([5, 6])
875 | skeleton_joint_list.append([6, 7])
876 |
877 | # left and right arms
878 | for skeleton_joint in skeleton_joint_list:
879 | index_point_1 = int(subset[selected_person_idx][skeleton_joint[0]])
880 | index_point_2 = int(subset[selected_person_idx][skeleton_joint[1]])
881 |
882 | if index_point_1 != -1 and index_point_2 != -1:
883 | point1 = candidate[index_point_1][0:2]
884 | point2 = candidate[index_point_2][0:2]
885 |
886 | point2[0] = point2[0] + 0.7 * (point2[0] - point1[0])
887 | point2[1] = point2[1] + 0.7 * (point2[1] - point1[1])
888 |
889 | length = ((point1[0] - point2[0])**2 +
890 | (point1[1] - point2[1])**2)**0.5
891 |
892 | length_ratio = random.uniform(0.20, 0.40)
893 | ori_rec_points = self.find_parallel_points(
894 | point1, point2, length_ratio * length)
895 |
896 | cv2.fillPoly(skeleton_mask, [np.array(ori_rec_points)], 255)
897 |
898 | # import pdb
899 | # pdb.set_trace()
900 | # Image.fromarray(skeleton_mask).save('temp_skeleton_mask.png')
901 | skeleton_mask = self.random_brush_augment(skeleton_mask, ori_rec_points)
902 | # import pdb
903 | # pdb.set_trace()
904 | # Image.fromarray(skeleton_mask).save('temp_skeleton_mask.png')
905 |
906 | skeleton_mask = skeleton_mask / 255
907 |
908 | # selected person bbox
909 | selected_person_bbox = np.zeros(
910 | (skeleton_map.shape[0], skeleton_map.shape[1]), dtype=np.uint8)
911 | x_list = []
912 | y_list = []
913 |
914 | for i in range(18):
915 | index = int(subset[selected_person_idx][i])
916 | if index == -1:
917 | continue
918 | x, y = candidate[index][0:2]
919 | x_list.append(x)
920 | y_list.append(y)
921 |
922 | x_min = min(x_list)
923 | x_max = max(x_list)
924 | y_min = min(y_list)
925 | y_max = max(y_list)
926 |
927 | x1 = int(max(0, x_min - 0.4 * (x_max - x_min)))
928 | x2 = int(x_max + 0.4 * (x_max - x_min))
929 | y1 = int(max(0, y_min - 0.4 * (y_max - y_min)))
930 | y2 = int(y_max + 0.4 * (y_max - y_min))
931 |
932 | selected_person_bbox[y1:y2, x1:x2] = 1
933 |
934 | return skeleton_mask, selected_person_idx, selected_person_bbox
935 |
936 | def mmpose_to_openpose(self, mmpose_coordinates, bbox_threshold=0.2):
937 | num_persons = len(mmpose_coordinates)
938 | coordinates = {}
939 | coordinates['subset'] = []
940 | coordinates['candidate'] = []
941 |
942 | coordinate_count = 0
943 | for person_idx in range(num_persons):
944 | if mmpose_coordinates[person_idx]["bbox_score"] < bbox_threshold:
945 | continue
946 | subset = {}
947 | for subset_idx in range(18):
948 | subset[subset_idx] = -1
949 | for subset_idx, skeleton_idx in enumerate(
950 | [0, 17, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4,
951 | 3]):
952 | if skeleton_idx == 17:
953 | if mmpose_coordinates[person_idx]["keypoint_scores"][
954 | 6] < 0.1:
955 | continue
956 | if mmpose_coordinates[person_idx]["keypoint_scores"][
957 | 5] < 0.1:
958 | continue
959 | subset[subset_idx] = coordinate_count
960 | coordinates_6 = mmpose_coordinates[person_idx][
961 | "keypoints"][6]
962 | coordinates_5 = mmpose_coordinates[person_idx][
963 | "keypoints"][5]
964 | coordinates['candidate'].append([
965 | (coordinates_6[0] + coordinates_5[0]) / 2.0,
966 | (coordinates_6[1] + coordinates_5[1]) / 2.0
967 | ])
968 | coordinate_count += 1
969 | else:
970 | if mmpose_coordinates[person_idx]["keypoint_scores"][
971 | skeleton_idx] < 0.5:
972 | continue
973 | subset[subset_idx] = coordinate_count
974 | coordinates['candidate'].append(
975 | mmpose_coordinates[person_idx]["keypoints"]
976 | [skeleton_idx])
977 | coordinate_count += 1
978 |
979 | coordinates['subset'].append(subset)
980 |
981 | return coordinates
982 |
983 | def generate_bbox_from_mask(self, mask):
984 | # Find the coordinates of non-zero elements in the mask
985 | y_coords, x_coords = np.where(mask)
986 |
987 | if len(y_coords) == 0 or len(x_coords) == 0:
988 | # No non-zero elements found (empty mask)
989 | return None
990 |
991 | # Compute the bounding box corners
992 | y_min, y_max = np.min(y_coords), np.max(y_coords)
993 | x_min, x_max = np.min(x_coords), np.max(x_coords)
994 |
995 | # Return the bounding box coordinates as (y_min, x_min, y_max, x_max)
996 | return y_min, x_min, y_max, x_max
997 |
998 | def generate_skeletion_mask(self, coordinates, skeleton_map):
999 | skeleton_mask = np.zeros(
1000 | (skeleton_map.shape[0], skeleton_map.shape[1]), dtype=np.uint8)
1001 |
1002 | candidate = coordinates['candidate']
1003 | subset = coordinates['subset']
1004 |
1005 | selected_person_idx = random.choice(range(len(subset)))
1006 |
1007 | # left arms
1008 | coordinates_x_list = []
1009 | coordinates_y_list = []
1010 | for body_idx in [2, 3, 4]:
1011 | index = int(subset[selected_person_idx][body_idx])
1012 | if index == -1:
1013 | continue
1014 | coordinates_x, coordinates_y = candidate[index][0:2]
1015 | coordinates_x_list.append(coordinates_x)
1016 | coordinates_y_list.append(coordinates_y)
1017 |
1018 | if len(coordinates_x_list) != 0:
1019 | left_x = int(min(coordinates_x_list))
1020 | up_y = int(min(coordinates_y_list))
1021 |
1022 | right_x = int(max(coordinates_x_list))
1023 | down_y = int(max(coordinates_y_list))
1024 |
1025 | pad_width = int(max(down_y - up_y, right_x - left_x) * 0.15)
1026 |
1027 | skeleton_mask[max(0, up_y - pad_width):down_y + pad_width,
1028 | max(0, left_x - pad_width):right_x + pad_width] = 1
1029 |
1030 | # right arms
1031 | coordinates_x_list = []
1032 | coordinates_y_list = []
1033 | for body_idx in [5, 6, 7]:
1034 | index = int(subset[selected_person_idx][body_idx])
1035 | if index == -1:
1036 | continue
1037 | coordinates_x, coordinates_y = candidate[index][0:2]
1038 | coordinates_x_list.append(coordinates_x)
1039 | coordinates_y_list.append(coordinates_y)
1040 |
1041 | if len(coordinates_x_list) != 0:
1042 | left_x = int(min(coordinates_x_list))
1043 | up_y = int(min(coordinates_y_list))
1044 |
1045 | right_x = int(max(coordinates_x_list))
1046 | down_y = int(max(coordinates_y_list))
1047 |
1048 | pad_width = int(max(down_y - up_y, right_x - left_x) * 0.15)
1049 |
1050 | skeleton_mask[max(0, up_y - pad_width):down_y + pad_width,
1051 | max(0, left_x - pad_width):right_x + pad_width] = 1
1052 |
1053 | # selected person bbox
1054 | selected_person_bbox = np.zeros(
1055 | (skeleton_map.shape[0], skeleton_map.shape[1]), dtype=np.uint8)
1056 | x_list = []
1057 | y_list = []
1058 |
1059 | for i in range(18):
1060 | index = int(subset[selected_person_idx][i])
1061 | if index == -1:
1062 | continue
1063 | x, y = candidate[index][0:2]
1064 | x_list.append(x)
1065 | y_list.append(y)
1066 |
1067 | x_min = min(x_list)
1068 | x_max = max(x_list)
1069 | y_min = min(y_list)
1070 | y_max = max(y_list)
1071 |
1072 | x1 = int(max(0, x_min - 0.4 * (x_max - x_min)))
1073 | x2 = int(x_max + 0.4 * (x_max - x_min))
1074 | y1 = int(max(0, y_min - 0.4 * (y_max - y_min)))
1075 | y2 = int(y_max + 0.4 * (y_max - y_min))
1076 |
1077 | selected_person_bbox[y1:y2, x1:x2] = 1
1078 |
1079 | return skeleton_mask, selected_person_idx, selected_person_bbox
1080 |
1081 | def expand_identity_feature(self, id_feature_list, selected_idx,
1082 | inpaint_mask):
1083 |
1084 | id_feature_temp = id_feature_list[selected_idx].copy()
1085 | id_feature_temp[inpaint_mask == 1] = 1
1086 |
1087 | if np.sum(id_feature_temp) == (id_feature_temp.shape[0] *
1088 | id_feature_temp.shape[1]):
1089 | id_feature_list[selected_idx] = id_feature_temp
1090 | return id_feature_list
1091 |
1092 | mask_tensor = torch.from_numpy(id_feature_temp).unsqueeze(0)
1093 |
1094 | obj_ids = torch.unique(mask_tensor)
1095 | obj_ids = obj_ids[1:]
1096 | masks = mask_tensor == obj_ids[:, None, None]
1097 |
1098 | boxes = masks_to_boxes(masks)
1099 |
1100 | bbox_y1, bbox_y2 = max(0, int(boxes[0][1])), int(boxes[0][3])
1101 | bbox_x1, bbox_x2 = max(0, int(boxes[0][0])), int(boxes[0][2])
1102 |
1103 | id_feature_temp[bbox_y1:bbox_y2, bbox_x1:bbox_x2] = 1
1104 |
1105 | id_feature_list[selected_idx] = id_feature_temp
1106 |
1107 | return id_feature_list
1108 |
1109 | def adjust_coordinates(self, coordinates, original_size):
1110 | ratio = 512. / original_size
1111 | for candidate in coordinates['candidate']:
1112 | candidate[0] = candidate[0] * ratio
1113 | candidate[1] = candidate[1] * ratio
1114 |
1115 | return coordinates
1116 |
1117 | def flip_skeleton_coordinates(self, coordinates):
1118 |
1119 | for subset_index in range(len(coordinates['subset'])):
1120 | new_subset = {}
1121 | for index in range(18):
1122 | if index == 2:
1123 | new_subset[index] = coordinates['subset'][subset_index][5]
1124 | elif index == 3:
1125 | new_subset[index] = coordinates['subset'][subset_index][6]
1126 | elif index == 4:
1127 | new_subset[index] = coordinates['subset'][subset_index][7]
1128 | elif index == 8:
1129 | new_subset[index] = coordinates['subset'][subset_index][11]
1130 | elif index == 9:
1131 | new_subset[index] = coordinates['subset'][subset_index][12]
1132 | elif index == 10:
1133 | new_subset[index] = coordinates['subset'][subset_index][13]
1134 | elif index == 5:
1135 | new_subset[index] = coordinates['subset'][subset_index][2]
1136 | elif index == 6:
1137 | new_subset[index] = coordinates['subset'][subset_index][3]
1138 | elif index == 7:
1139 | new_subset[index] = coordinates['subset'][subset_index][4]
1140 | elif index == 11:
1141 | new_subset[index] = coordinates['subset'][subset_index][8]
1142 | elif index == 12:
1143 | new_subset[index] = coordinates['subset'][subset_index][9]
1144 | elif index == 13:
1145 | new_subset[index] = coordinates['subset'][subset_index][10]
1146 | else:
1147 | new_subset[index] = coordinates['subset'][subset_index][
1148 | index]
1149 | coordinates['subset'][subset_index] = new_subset
1150 |
1151 | return coordinates
1152 |
1153 |
1154 |
1155 | def draw_bodypose(self, canvas, candidate, subset):
1156 | stickwidth = 4
1157 | limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
1158 | [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
1159 | [1, 16], [16, 18], [3, 17], [6, 18]]
1160 |
1161 | colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
1162 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
1163 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
1164 | for i in range(18):
1165 | for n in range(len(subset)):
1166 | index = int(subset[n][i])
1167 | if index == -1:
1168 | continue
1169 | x, y = candidate[index][0:2]
1170 | cv2.circle(
1171 | canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
1172 | for i in range(17):
1173 | for n in range(len(subset)):
1174 | index = [subset[n][point - 1] for point in limbSeq[i]]
1175 | if -1 in index:
1176 | continue
1177 | cur_canvas = canvas.copy()
1178 | Y = [candidate[int(point)][0] for point in index]
1179 | X = [candidate[int(point)][1] for point in index]
1180 | mX = np.mean(X)
1181 | mY = np.mean(Y)
1182 | length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5
1183 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
1184 | polygon = cv2.ellipse2Poly((int(mY), int(mX)),
1185 | (int(length / 2), stickwidth),
1186 | int(angle), 0, 360, 1)
1187 | cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
1188 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
1189 | # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
1190 | # plt.imshow(canvas[:, :, [2, 1, 0]])
1191 | return canvas
1192 |
1193 | def crop_img_mask(self, img, human_mask, bbox_mask, bbox_coor, face_mask,
1194 | target_person_face_mask, skeleton_map, skeleton_mask,
1195 | coordinates, instance_parsing_list):
1196 | h, w, _ = img.shape
1197 |
1198 | x1, y1, x2, y2 = bbox_coor
1199 |
1200 | center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
1201 |
1202 | bbox_h, bbox_w = y2 - y1, x2 - x1
1203 |
1204 | enlarge_bbox_ratio = 1.1
1205 |
1206 | enlarged_bbox = int(max([bbox_h, bbox_w]) * enlarge_bbox_ratio)
1207 |
1208 | cropped_size = min([h, w, enlarged_bbox])
1209 | cropped_size = cropped_size // 2 * 2
1210 |
1211 | crop_y1 = center_y - cropped_size // 2
1212 | crop_y2 = center_y + cropped_size // 2
1213 | crop_x1 = center_x - cropped_size // 2
1214 | crop_x2 = center_x + cropped_size // 2
1215 | if crop_y1 < 0:
1216 | crop_y1 = 0
1217 | crop_y2 = cropped_size
1218 |
1219 | if crop_y2 > h:
1220 | crop_y1 = h - cropped_size
1221 | crop_y2 = h
1222 |
1223 | if crop_x1 < 0:
1224 | crop_x1 = 0
1225 | crop_x2 = cropped_size
1226 |
1227 | if crop_x2 > w:
1228 | crop_x1 = w - cropped_size
1229 | crop_x2 = w
1230 |
1231 | img = img[crop_y1:crop_y2, crop_x1:crop_x2]
1232 | human_mask = human_mask[crop_y1:crop_y2, crop_x1:crop_x2]
1233 | bbox_mask = bbox_mask[crop_y1:crop_y2, crop_x1:crop_x2]
1234 | face_mask = face_mask[crop_y1:crop_y2, crop_x1:crop_x2]
1235 | target_person_face_mask = target_person_face_mask[crop_y1:crop_y2,
1236 | crop_x1:crop_x2]
1237 | skeleton_map = skeleton_map[crop_y1:crop_y2, crop_x1:crop_x2]
1238 | skeleton_mask = skeleton_mask[crop_y1:crop_y2, crop_x1:crop_x2]
1239 |
1240 | cropped_instance_parsing_list = []
1241 | for instance_parsing in instance_parsing_list:
1242 | cropped_instance_parsing_list.append(
1243 | instance_parsing[crop_y1:crop_y2, crop_x1:crop_x2])
1244 |
1245 | for candidate in coordinates['candidate']:
1246 | candidate[0] = candidate[0] - crop_x1
1247 | candidate[1] = candidate[1] - crop_y1
1248 |
1249 | # import pdb
1250 | # pdb.set_trace()
1251 | current_width = img.shape[0]
1252 | for subset in coordinates['subset']:
1253 | for index in range(17):
1254 | if subset[index] == -1:
1255 | continue
1256 | return img, human_mask, bbox_mask, face_mask, target_person_face_mask, skeleton_map, skeleton_mask, coordinates, cropped_instance_parsing_list
1257 |
1258 | def occlusion_deleting(self, bbox_mask):
1259 | indices = np.where(bbox_mask != 0)
1260 | x_min, y_min = np.min(indices, axis=1)
1261 | x_max, y_max = np.max(indices, axis=1)
1262 |
1263 | inpaint_mask = np.zeros((bbox_mask.shape[0], bbox_mask.shape[1]),
1264 | dtype=np.uint8)
1265 | random_length = int(random.uniform(0.2, 0.4) * (y_max - y_min))
1266 |
1267 | location = random.choice([0, 1])
1268 | if location == 0:
1269 | inpaint_mask[:,
1270 | max(0, y_max - random_length // 2):y_max +
1271 | random_length // 2] = 1
1272 | else:
1273 | inpaint_mask[:,
1274 | max(0, y_min - random_length // 2):y_min +
1275 | random_length // 2] = 1
1276 |
1277 | return inpaint_mask
1278 |
1279 | def get_id_color_map(self, id_feature_list):
1280 | random.shuffle(self.random_color_identity_group)
1281 |
1282 | color_list = []
1283 | identity_map = np.zeros(
1284 | (id_feature_list[0].shape[0], id_feature_list[0].shape[1], 3))
1285 | count_map = np.zeros(
1286 | (id_feature_list[0].shape[0], id_feature_list[0].shape[1]))
1287 | for idx, mask in enumerate(id_feature_list):
1288 | color_group_idx = idx % 5
1289 | random_color = random.choices(
1290 | self.random_color_identity_group[color_group_idx], k=1)[0]
1291 | temp_mask = np.zeros(
1292 | (id_feature_list[0].shape[0], id_feature_list[0].shape[1], 3),
1293 | dtype=np.uint8)
1294 |
1295 | temp_mask[mask == 1] = random_color
1296 | # import pdb
1297 | # pdb.set_trace()
1298 | # identity_color_indicator.append(random_color)
1299 | identity_map += temp_mask
1300 | # import pdb
1301 | # pdb.set_trace()
1302 | count_map += mask
1303 | color_list.append(random_color)
1304 |
1305 | count_map[count_map == 0] = 1
1306 | count_map = count_map[:, :, np.newaxis]
1307 | # import pdb
1308 | # pdb.set_trace()
1309 | identity_map = identity_map / count_map
1310 | identity_map = identity_map.astype(np.uint8)
1311 |
1312 | return identity_map, color_list
1313 |
1314 | def reposing_add(self, bbox_mask, inpaint_mask):
1315 | indices = np.where(bbox_mask != 0)
1316 | y_min, x_min = np.min(indices, axis=1)
1317 | y_max, x_max = np.max(indices, axis=1)
1318 |
1319 | augmentation = random.uniform(0.4, 1.0)
1320 | random_length = int(augmentation * (y_max - y_min))
1321 | inpaint_mask[y_min + random_length:y_max, x_min:x_max] = 1
1322 |
1323 | return inpaint_mask
1324 |
1325 | def harmonization_add(self, img, bbox_mask, human_mask, inpaint_mask):
1326 |
1327 | img_augmented = Image.fromarray(img)
1328 | transform = transforms.ColorJitter(
1329 | brightness=(0.7, 1.3), contrast=(0.7, 1.3), saturation=(0.7, 1.5))
1330 | img_augmented = np.array(transform(img_augmented))
1331 |
1332 | revised_img = img.copy()
1333 | revised_img[human_mask == 1, :] = img_augmented[human_mask == 1, :]
1334 | inpaint_mask[bbox_mask == 1] = 1
1335 |
1336 | return revised_img, inpaint_mask
1337 |
1338 | def occlusion_add(self, img, human_mask, bbox_mask):
1339 | indices = np.where(bbox_mask != 0)
1340 | y_min, x_min = np.min(indices, axis=1)
1341 | y_max, x_max = np.max(indices, axis=1)
1342 |
1343 | bbox_mask_revised = bbox_mask.copy()
1344 | column_mask = random.uniform(0, 1)
1345 | if column_mask < 0.5:
1346 | bbox_mask_revised[:, x_min:x_max] = 1
1347 |
1348 | inpaint_mask = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
1349 | inpaint_mask[(bbox_mask_revised - human_mask) > 0] = 1
1350 |
1351 | return inpaint_mask, bbox_mask_revised
1352 |
1353 | def read_mask_for_delete(self, selected_parsing_idx,
1354 | instance_parsing_list):
1355 | mask = instance_parsing_list[selected_parsing_idx]
1356 |
1357 | mask_binary = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
1358 | mask_binary[mask > 0] = 1
1359 |
1360 | mask_tensor = torch.from_numpy(mask_binary).unsqueeze(0)
1361 |
1362 | obj_ids = torch.unique(mask_tensor)
1363 | obj_ids = obj_ids[1:]
1364 | masks = mask_tensor == obj_ids[:, None, None]
1365 |
1366 | boxes = masks_to_boxes(masks)
1367 |
1368 | bbox_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
1369 |
1370 | h, w = mask.shape
1371 |
1372 | bbox_y1, bbox_y2 = max(0, int(boxes[0][1])), min(h, int(boxes[0][3]))
1373 | bbox_x1, bbox_x2 = max(0, int(boxes[0][0])), min(w, int(boxes[0][2]))
1374 | bbox_mask[bbox_y1:bbox_y2, bbox_x1:bbox_x2] = 1
1375 |
1376 | face_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
1377 | for idx, parsing in enumerate(instance_parsing_list):
1378 |
1379 | if idx == selected_parsing_idx:
1380 | target_person_face_mask = np.zeros(
1381 | (mask.shape[0], mask.shape[1]), dtype=np.uint8)
1382 |
1383 | for i in range(1, 5):
1384 | face_mask[parsing == i] = 1
1385 |
1386 | if idx == selected_parsing_idx:
1387 | target_person_face_mask[parsing == i] = 1
1388 |
1389 | return mask_binary, bbox_mask, (
1390 | bbox_x1, bbox_y1, bbox_x2,
1391 | bbox_y2), face_mask, target_person_face_mask
1392 |
1393 | def read_mask(self, selected_parsing_idx, instance_parsing_list):
1394 | mask = instance_parsing_list[selected_parsing_idx]
1395 |
1396 | # mask_name = mask_path.split('/')[-1][:-4]
1397 | # img_id = int(mask_name.split('_')[0])
1398 | num_persons = len(instance_parsing_list)
1399 | person_id = selected_parsing_idx + 1
1400 | # num_persons = int(mask_name.split('_')[-2])
1401 | # person_id = int(mask_name.split('_')[-1])
1402 |
1403 | mask_binary = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
1404 | mask_binary[mask > 0] = 1
1405 |
1406 | mask_tensor = torch.from_numpy(mask_binary).unsqueeze(0)
1407 |
1408 | obj_ids = torch.unique(mask_tensor)
1409 | obj_ids = obj_ids[1:]
1410 | masks = mask_tensor == obj_ids[:, None, None]
1411 |
1412 | boxes = masks_to_boxes(masks)
1413 |
1414 | bbox_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
1415 |
1416 | h, w = mask.shape
1417 |
1418 | # make the bounding box slightly larger
1419 | enlarge_ratio = 0.05
1420 | enlarge_margin_h = int((boxes[0][3] - boxes[0][1]) * enlarge_ratio)
1421 | enlarge_margin_w = int((boxes[0][2] - boxes[0][0]) * enlarge_ratio)
1422 |
1423 | if person_id > 1:
1424 | # left_person = f'{mask_path[:-4][:-len(mask_name)]}/{img_id}_{num_persons:02d}_{(person_id-1):02d}.png'
1425 | mask_left = instance_parsing_list[selected_parsing_idx - 1]
1426 |
1427 | mask_binary_left = np.zeros(
1428 | (mask_left.shape[0], mask_left.shape[1]), dtype=np.uint8)
1429 | mask_binary_left[mask_left > 0] = 1
1430 |
1431 | mask_tensor_left = torch.from_numpy(mask_binary_left).unsqueeze(0)
1432 |
1433 | obj_ids_left = torch.unique(mask_tensor_left)
1434 | obj_ids_left = obj_ids_left[1:]
1435 | masks_left = mask_tensor_left == obj_ids_left[:, None, None]
1436 |
1437 | boxes_left = masks_to_boxes(masks_left)
1438 |
1439 | enlarge_margin_left = min(
1440 | enlarge_margin_w,
1441 | int((boxes_left[0][2] - boxes_left[0][0]) * 0.05))
1442 | else:
1443 | enlarge_margin_left = enlarge_margin_w
1444 |
1445 | if person_id < num_persons:
1446 | # right_person = f'{mask_path[:-4][:-len(mask_name)]}/{img_id}_{num_persons:02d}_{(person_id+1):02d}.png'
1447 | mask_right = instance_parsing_list[selected_parsing_idx + 1]
1448 |
1449 | mask_binary_right = np.zeros(
1450 | (mask_right.shape[0], mask_right.shape[1]), dtype=np.uint8)
1451 | mask_binary_right[mask_right > 0] = 1
1452 |
1453 | mask_tensor_right = torch.from_numpy(mask_binary_right).unsqueeze(
1454 | 0)
1455 |
1456 | obj_ids_right = torch.unique(mask_tensor_right)
1457 | obj_ids_right = obj_ids_right[1:]
1458 | masks_right = mask_tensor_right == obj_ids_right[:, None, None]
1459 |
1460 | boxes_right = masks_to_boxes(masks_right)
1461 |
1462 | enlarge_margin_right = min(
1463 | enlarge_margin_w,
1464 | int((boxes_right[0][2] - boxes_right[0][0]) * 0.05))
1465 | else:
1466 | enlarge_margin_right = enlarge_margin_w
1467 |
1468 | bbox_y1, bbox_y2 = max(0,
1469 | int(boxes[0][1]) - enlarge_margin_h), min(
1470 | h,
1471 | int(boxes[0][3]) + enlarge_margin_h)
1472 | bbox_x1, bbox_x2 = max(0,
1473 | int(boxes[0][0]) - enlarge_margin_left), min(
1474 | w,
1475 | int(boxes[0][2]) + enlarge_margin_right)
1476 | bbox_mask[bbox_y1:bbox_y2, bbox_x1:bbox_x2] = 1
1477 |
1478 | face_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
1479 |
1480 | for idx, parsing in enumerate(instance_parsing_list):
1481 |
1482 | if idx == selected_parsing_idx:
1483 | target_person_face_mask = np.zeros(
1484 | (mask.shape[0], mask.shape[1]), dtype=np.uint8)
1485 |
1486 | for i in range(1, 5):
1487 | face_mask[parsing == i] = 1
1488 |
1489 | if idx == selected_parsing_idx:
1490 | target_person_face_mask[parsing == i] = 1
1491 |
1492 | return mask_binary, bbox_mask, (
1493 | bbox_x1, bbox_y1, bbox_x2,
1494 | bbox_y2), face_mask, target_person_face_mask
1495 |
1496 | def remove_background(self, img, instance_parsing_list):
1497 |
1498 | mask_binary = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
1499 | for instance_parsing in instance_parsing_list:
1500 | mask_binary[instance_parsing > 0] = 1
1501 |
1502 | img[mask_binary == 0] = 255
1503 |
1504 | return img
1505 |
1506 | def load_instance_parsing_maps(self, parsing_path_list):
1507 |
1508 | parsing_list = []
1509 | for parsing_path in parsing_path_list:
1510 | mask = np.array(Image.open(parsing_path).convert('RGB'))[:, :, 0]
1511 | parsing_list.append(mask)
1512 |
1513 | return parsing_list
1514 |
1515 |
1516 | def __getitem__(self, index):
1517 |
1518 | while True:
1519 | try:
1520 | img_path = self.data_path_list[index]
1521 | img_name = img_path.split('/')[-1][:-4]
1522 | parsing_path_list = glob.glob(
1523 | f'{self.parsing_dir}/{img_name}_*.png')
1524 |
1525 | instance_parsing_list = self.load_instance_parsing_maps(
1526 | parsing_path_list)
1527 |
1528 | coordinates_path = f'{self.pose_estimation_path}/{img_name}.json'
1529 | # a function to load json file from locally
1530 | mmpose_coordinates = json.load(open(coordinates_path, ))
1531 |
1532 | coordinates = self.mmpose_to_openpose(mmpose_coordinates)
1533 |
1534 | img = self.read_img(img_path)
1535 |
1536 | # remove background
1537 | remove_bg_aug = random.uniform(0, 1)
1538 | if remove_bg_aug < 0.2:
1539 | img = self.remove_background(img, instance_parsing_list)
1540 |
1541 | canvas = np.zeros_like(img)
1542 | skeleton_map = self.draw_bodypose(canvas,
1543 | coordinates['candidate'],
1544 | coordinates['subset'])
1545 |
1546 | skeleton_mask, selected_person_idx, selected_person_bbox = self.generate_skeletion_mask(
1547 | coordinates, skeleton_map)
1548 |
1549 | new_coordinates = self.randomly_change_pose(
1550 | coordinates, selected_person_idx)
1551 |
1552 | skeleton_mask = self.compute_diff_mask(coordinates,
1553 | new_coordinates,
1554 | skeleton_mask)
1555 |
1556 | skeleton_mask, selected_parsing_idx = self.load_arm_hand_masks(
1557 | skeleton_mask, selected_person_bbox, instance_parsing_list)
1558 |
1559 | # parsing_path = random.choice(candidate_parsing_path_list)
1560 | # augmentation types:
1561 | # 1) occlusion for adding a person;
1562 | # 2) harmonization when adding a person;
1563 | # 3) reposing when adding a person;
1564 | # 4) occlusion for removing a persons
1565 | augmentation_type = random.uniform(0, 1)
1566 | if augmentation_type < 0.8:
1567 | # add person cases
1568 | human_mask, bbox_mask, bbox_coor, face_mask, target_person_face_mask = self.read_mask(
1569 | selected_parsing_idx, instance_parsing_list)
1570 |
1571 | # define the crop region
1572 | img, human_mask, bbox_mask, face_mask, target_person_face_mask, skeleton_map, skeleton_mask, coordinates, instance_parsing_list = self.crop_img_mask(
1573 | img, human_mask, bbox_mask, bbox_coor, face_mask,
1574 | target_person_face_mask, skeleton_map, skeleton_mask,
1575 | coordinates, instance_parsing_list)
1576 | assert np.sum(bbox_mask) != 0
1577 |
1578 | revised_img = img.copy()
1579 |
1580 | # for each add person type, we need to deal with the occlusion region
1581 | inpaint_mask, bbox_mask_revised = self.occlusion_add(
1582 | img, human_mask, bbox_mask)
1583 |
1584 | inpaint_mask[skeleton_mask == 1] = 1
1585 |
1586 | reposing_aug = random.uniform(0, 1)
1587 | if reposing_aug > 0.4:
1588 | inpaint_mask = self.reposing_add(
1589 | bbox_mask, inpaint_mask)
1590 |
1591 | inpaint_mask[face_mask == 1] = 0
1592 |
1593 | # dilated inpaint mask
1594 | dilate_inpaint_aug = random.uniform(0, 1)
1595 | if dilate_inpaint_aug < 0.4:
1596 | structuring_element = np.ones((5, 5), dtype=bool)
1597 | inpaint_mask = binary_dilation(
1598 | inpaint_mask,
1599 | structure=structuring_element).astype(np.uint8)
1600 |
1601 | inpaint_mask_after_reposing = inpaint_mask.copy()
1602 |
1603 | harmonization_aug = random.uniform(0, 1)
1604 | # add_harmonization = 0.1
1605 | if harmonization_aug < 0.5 and self.add_harmonization:
1606 | revised_img, inpaint_mask = self.harmonization_add(
1607 | img, bbox_mask_revised, human_mask, inpaint_mask)
1608 |
1609 | # exclude the surrounding person from the inpainting regions
1610 | inpaint_mask[(face_mask -
1611 | target_person_face_mask) == 1] = 0
1612 | # else:
1613 | # inpaint_mask[bbox_mask_revised == 1] = 1
1614 | # inpaint_mask[(face_mask - target_person_face_mask) == 1] = 0
1615 | else:
1616 | human_mask, bbox_mask, bbox_coor, face_mask, target_person_face_mask = self.read_mask_for_delete(
1617 | selected_parsing_idx, instance_parsing_list)
1618 |
1619 | # define the crop region
1620 | img, human_mask, bbox_mask, face_mask, target_person_face_mask, skeleton_map, skeleton_mask, coordinates, instance_parsing_list = self.crop_img_mask(
1621 | img, human_mask, bbox_mask, bbox_coor, face_mask,
1622 | target_person_face_mask, skeleton_map, skeleton_mask,
1623 | coordinates, instance_parsing_list)
1624 | assert np.sum(bbox_mask) != 0
1625 |
1626 | inpaint_mask = self.occlusion_deleting(human_mask)
1627 | revised_img = img.copy()
1628 |
1629 | inpaint_mask[skeleton_mask == 1] = 1
1630 | inpaint_mask[face_mask == 1] = 0
1631 |
1632 | inpaint_mask_after_reposing = inpaint_mask.copy()
1633 |
1634 | # load the exemplar image
1635 | candidate_parsing_list, idx_in_candidate_list = self.get_candidate_parsing_list_for_exemplar(
1636 | inpaint_mask, selected_parsing_idx, instance_parsing_list)
1637 |
1638 | if len(candidate_parsing_list) == 0:
1639 | index = random.randint(0, len(self.data_path_list) - 1)
1640 | continue
1641 |
1642 | # get_indicator
1643 | id_feature_list = self.get_id_feature(candidate_parsing_list)
1644 |
1645 | # expand the id feature list using the inpaint mask
1646 | id_feature_list = self.expand_identity_feature(
1647 | id_feature_list, idx_in_candidate_list, inpaint_mask)
1648 |
1649 | id_color_map, color_list = self.get_id_color_map(
1650 | id_feature_list)
1651 |
1652 | img_exemplar_list, parsing_exemplar_list = self.read_img_exemplar_mask(
1653 | img, candidate_parsing_list)
1654 | for idx, (img_exemplar, parsing) in enumerate(
1655 | zip(img_exemplar_list, parsing_exemplar_list)):
1656 | incomplete_exemplar_aug = random.uniform(0, 1)
1657 | if incomplete_exemplar_aug < 0.4:
1658 | length = img_exemplar.shape[0]
1659 | random_portion = random.uniform(0.2, 0.6)
1660 | # the masked part should be directly cropped out, rather than applying the mask
1661 | # img_exemplar[-int(random_portion * length):, :] = 255
1662 | img_exemplar = img_exemplar[:-int(random_portion *
1663 | length), :]
1664 | parsing = parsing[:-int(random_portion * length), :]
1665 | img_exemplar_list[idx] = img_exemplar
1666 | parsing_exemplar_list[idx] = parsing
1667 |
1668 | img = torch.from_numpy(img).permute(2, 0, 1)
1669 | id_color_map = torch.from_numpy(id_color_map).permute(2, 0, 1)
1670 | skeleton_map = torch.from_numpy(skeleton_map).permute(2, 0, 1)
1671 | revised_img = torch.from_numpy(revised_img).permute(2, 0, 1)
1672 | inpaint_mask = torch.from_numpy(inpaint_mask).unsqueeze(0)
1673 | human_mask = torch.from_numpy(human_mask).unsqueeze(0)
1674 | skeleton_mask = torch.from_numpy(skeleton_mask).unsqueeze(0)
1675 |
1676 | exemplar_img_list = []
1677 | exemplar_skeleton_map_list = []
1678 | exemplar_skeleton_coordinates_list = []
1679 | exemplar_color_block_list = []
1680 | for idx, (img_exemplar, parsing) in enumerate(
1681 | zip(img_exemplar_list, parsing_exemplar_list)):
1682 |
1683 | img_exemplar = torch.from_numpy(img_exemplar).permute(
1684 | 2, 0, 1)
1685 | height, width = img_exemplar.size(1), img_exemplar.size(2)
1686 |
1687 | parsing = torch.from_numpy(parsing).unsqueeze(0)
1688 |
1689 | if height == width:
1690 | pass
1691 | elif height < width:
1692 | diff = width - height
1693 | top_pad = diff // 2
1694 | down_pad = diff - top_pad
1695 | left_pad = 0
1696 | right_pad = 0
1697 | padding_size = [left_pad, top_pad, right_pad, down_pad]
1698 | img_exemplar = F.pad(
1699 | img_exemplar, padding=padding_size, fill=255)
1700 | parsing = F.pad(parsing, padding=padding_size, fill=0)
1701 | else:
1702 | diff = height - width
1703 | left_pad = diff // 2
1704 | right_pad = diff - left_pad
1705 | top_pad = 0
1706 | down_pad = 0
1707 | padding_size = [left_pad, top_pad, right_pad, down_pad]
1708 | img_exemplar = F.pad(
1709 | img_exemplar, padding=padding_size, fill=255)
1710 | parsing = F.pad(parsing, padding=padding_size, fill=0)
1711 |
1712 | exemplar_img, parsing = self.transform_exemplar_and_parsing(
1713 | img_exemplar, parsing)
1714 | exemplar_img = exemplar_img.permute(1, 2, 0)
1715 | parsing = parsing.squeeze(0)
1716 |
1717 | exemplar_img, new_coordinates = self.reposing_exemplar_img(
1718 | exemplar_img.numpy(), parsing.numpy())
1719 |
1720 | exemplar_skeleton_map = self.draw_bodypose(
1721 | np.zeros_like(exemplar_img),
1722 | new_coordinates['candidate'],
1723 | new_coordinates['subset'])
1724 |
1725 | exemplar_img = self.resize_transform_exemplar(
1726 | torch.from_numpy(exemplar_img).permute(
1727 | 2, 0, 1)).permute(1, 2, 0) / 255.
1728 | exemplar_skeleton_map = torch.from_numpy(
1729 | exemplar_skeleton_map) / 255.0
1730 |
1731 | exemplar_skeleton_coordinates_list.append(new_coordinates)
1732 | # flip_random = random.uniform(0, 1)
1733 | # flip_random = 0.1
1734 | # if flip_random < 0.5:
1735 | # flip image
1736 | # exemplar_img = torch.fliplr(exemplar_img)
1737 | # flip skeleton
1738 | # new_coordinates = self.flip_skeleton_coordinates(new_coordinates)
1739 | # canvas = np.zeros_like(exemplar_skeleton_map)
1740 | # exemplar_skeleton_map = self.draw_bodypose(canvas, new_coordinates['candidate'], new_coordinates['subset'])
1741 | # exemplar_skeleton_map = torch.from_numpy(exemplar_skeleton_map) / 255.0
1742 | # exemplar_skeleton_map = torch.fliplr(exemplar_skeleton_map)
1743 |
1744 | exemplar_skeleton_map_list.append(exemplar_skeleton_map)
1745 | exemplar_img_list.append(exemplar_img)
1746 |
1747 | # generate color block
1748 | # import pdb
1749 | # pdb.set_trace()
1750 | exemplar_color_block = torch.zeros_like(
1751 | exemplar_skeleton_map)
1752 | exemplar_color_block[:, :, 0] = color_list[idx][0]
1753 | exemplar_color_block[:, :, 1] = color_list[idx][1]
1754 | exemplar_color_block[:, :, 2] = color_list[idx][2]
1755 | exemplar_color_block = exemplar_color_block / 255.
1756 | # exemplar_color_block = torch.tensor([[[color_list[idx][0], color_list[idx][1], color_list[idx][2]]] * 224] * 224) / 255.
1757 | exemplar_color_block_list.append(exemplar_color_block)
1758 |
1759 | if len(exemplar_img_list) > 5:
1760 | index = random.randint(0, len(self.data_path_list) - 1)
1761 | continue
1762 |
1763 | if len(exemplar_img_list) < 5:
1764 | add_length = 5 - len(exemplar_img_list)
1765 | for _ in range(add_length):
1766 | exemplar_img_list.append(
1767 | torch.zeros_like(exemplar_img_list[0]))
1768 | exemplar_skeleton_map_list.append(
1769 | torch.zeros_like(exemplar_skeleton_map_list[0]))
1770 | exemplar_skeleton_coordinates_list.append(None)
1771 | exemplar_color_block_list.append(
1772 | torch.zeros_like(exemplar_color_block_list[0]))
1773 | id_feature_list.append(
1774 | np.zeros_like(id_feature_list[0]))
1775 |
1776 | id_feature_channel_list = []
1777 | for id_feature in id_feature_list:
1778 | id_feature_channel_list.append(
1779 | torch.from_numpy(id_feature.astype(
1780 | np.uint8)).unsqueeze(0))
1781 |
1782 | # id_feature_channel = torch.from_numpy(np.stack(id_feature_list, axis=0, dtype=np.uint8))
1783 |
1784 | img = self.resize_transform_img(img).permute(1, 2,
1785 | 0) / 127.5 - 1
1786 | id_color_map = self.resize_transform_mask(
1787 | id_color_map).permute(1, 2, 0) / 255.0
1788 | revised_img = self.resize_transform_img(revised_img).permute(
1789 | 1, 2, 0) / 127.5 - 1
1790 | coordinates = self.adjust_coordinates(coordinates,
1791 | inpaint_mask.size(1))
1792 | canvas = np.zeros_like(img)
1793 | skeleton_map = self.draw_bodypose(canvas,
1794 | coordinates['candidate'],
1795 | coordinates['subset'])
1796 | skeleton_map = torch.from_numpy(skeleton_map) / 255.0
1797 | inpaint_mask = self.resize_transform_mask(
1798 | inpaint_mask).permute(1, 2, 0)
1799 | human_mask = self.resize_transform_mask(human_mask).permute(
1800 | 1, 2, 0)
1801 | skeleton_mask = self.resize_transform_mask(
1802 | skeleton_mask).permute(1, 2, 0)
1803 |
1804 | for idx, id_feature in enumerate(id_feature_channel_list):
1805 | id_feature_channel_list[idx] = self.resize_transform_mask(
1806 | id_feature).permute(1, 2, 0)
1807 |
1808 | inpaint_mask_after_reposing = torch.from_numpy(
1809 | inpaint_mask_after_reposing).unsqueeze(0)
1810 | inpaint_mask_after_reposing = self.resize_transform_mask(
1811 | inpaint_mask_after_reposing).permute(1, 2, 0).squeeze(2)
1812 | revised_img[inpaint_mask_after_reposing == 1] = 0
1813 |
1814 | flip_img = random.uniform(0, 1)
1815 | # flip_img = 0.1
1816 | if flip_img < 0.5:
1817 | img = torch.fliplr(img)
1818 | id_color_map = torch.fliplr(id_color_map)
1819 | revised_img = torch.fliplr(revised_img)
1820 | inpaint_mask = torch.fliplr(inpaint_mask)
1821 | skeleton_mask = torch.fliplr(skeleton_mask)
1822 | coordinates = self.flip_skeleton_coordinates(coordinates)
1823 | canvas = np.zeros_like(img)
1824 | skeleton_map = self.draw_bodypose(canvas,
1825 | coordinates['candidate'],
1826 | coordinates['subset'])
1827 | skeleton_map = torch.from_numpy(skeleton_map) / 255.0
1828 | skeleton_map = torch.fliplr(skeleton_map)
1829 |
1830 | for idx, id_feature in enumerate(id_feature_channel_list):
1831 | id_feature_channel_list[idx] = torch.fliplr(id_feature)
1832 |
1833 | # flip exemplar, the flip operation for exemplar should be consistent as the original img
1834 | for idx, exemplar_img in enumerate(exemplar_img_list):
1835 | exemplar_coordinate = exemplar_skeleton_coordinates_list[
1836 | idx]
1837 | if exemplar_coordinate is None:
1838 | break
1839 | exemplar_img_list[idx] = torch.fliplr(exemplar_img)
1840 | coordinates = self.flip_skeleton_coordinates(
1841 | exemplar_coordinate)
1842 | canvas = np.zeros_like(canvas)
1843 | exemplar_skeleton_map = self.draw_bodypose(
1844 | canvas, coordinates['candidate'],
1845 | coordinates['subset'])
1846 | exemplar_skeleton_map = torch.from_numpy(
1847 | exemplar_skeleton_map) / 255.0
1848 | exemplar_skeleton_map = torch.fliplr(
1849 | exemplar_skeleton_map)
1850 | exemplar_skeleton_map_list[idx] = exemplar_skeleton_map
1851 |
1852 | exemplar_img_list = torch.stack(exemplar_img_list, dim=0)
1853 | exemplar_skeleton_map_list = torch.stack(
1854 | exemplar_skeleton_map_list, dim=0)
1855 | exemplar_color_block_list = torch.stack(
1856 | exemplar_color_block_list, dim=0)
1857 | id_feature_channel = torch.stack(
1858 | id_feature_channel_list, dim=0)
1859 |
1860 | assert img.size()[0] == 512
1861 | assert img.size()[1] == 512
1862 |
1863 | break
1864 | except Exception as e:
1865 | print(e)
1866 | index = random.randint(0, len(self.data_path_list) - 1)
1867 |
1868 | return {
1869 | 'GT': img,
1870 | 'masked_image': revised_img,
1871 | 'mask': inpaint_mask,
1872 | 'text': 'A photo of group portrait.',
1873 | 'skeleton_map': skeleton_map,
1874 | 'skeleton_mask': skeleton_mask,
1875 | 'exemplar': exemplar_img_list,
1876 | 'exemplar_skeleton': exemplar_skeleton_map_list,
1877 | 'id_color_map': id_color_map,
1878 | 'exemplar_color_block': exemplar_color_block_list,
1879 | 'id_feature_channel': id_feature_channel
1880 | }
1881 |
1882 |
--------------------------------------------------------------------------------
/data/openpose/__init__.py:
--------------------------------------------------------------------------------
1 | # Openpose
2 | # Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
3 | # 2nd Edited by https://github.com/Hzzone/pytorch-openpose
4 | # 3rd Edited by ControlNet
5 |
6 | import os
7 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
8 |
9 | import torch
10 | import numpy as np
11 | from . import util
12 | from .body import Body
13 | from .hand import Hand
14 |
15 |
16 |
17 | class OpenposeDetector:
18 | def __init__(self, device=None):
19 | body_modelpath = './pretrained_models/body_pose_model.pth'
20 | hand_modelpath = './pretrained_models/hand_pose_model.pth'
21 |
22 | self.body_estimation = Body(body_modelpath, device)
23 | self.hand_estimation = Hand(hand_modelpath, device)
24 |
25 | def __call__(self, oriImg, hand=False):
26 | oriImg = oriImg[:, :, ::-1].copy()
27 | with torch.no_grad():
28 | candidate, subset = self.body_estimation(oriImg)
29 | canvas = np.zeros_like(oriImg)
30 | canvas = util.draw_bodypose(canvas, candidate, subset)
31 | if hand:
32 | hands_list = util.handDetect(candidate, subset, oriImg)
33 | all_hand_peaks = []
34 | for x, y, w, is_left in hands_list:
35 | peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :])
36 | peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x)
37 | peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y)
38 | all_hand_peaks.append(peaks)
39 | canvas = util.draw_handpose(canvas, all_hand_peaks)
40 | all_hand_peaks = [peak.tolist() for peak in all_hand_peaks]
41 | return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist(), all_hand_peaks = all_hand_peaks)
42 | else:
43 | return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist())
44 |
--------------------------------------------------------------------------------
/data/openpose/body.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import math
4 | import time
5 | from scipy.ndimage.filters import gaussian_filter
6 | import matplotlib.pyplot as plt
7 | import matplotlib
8 | import torch
9 | from torchvision import transforms
10 |
11 | from . import util
12 | from .model import bodypose_model
13 |
14 | class Body(object):
15 | def __init__(self, model_path, device=None):
16 | self.model = bodypose_model()
17 | self.device = device
18 | if device is not None:
19 | self.model = self.model.cuda()
20 | # if torch.cuda.is_available():
21 | # self.model = self.model.cuda()
22 | # print('cuda')
23 | model_dict = util.transfer(self.model, torch.load(model_path))
24 | self.model.load_state_dict(model_dict)
25 | self.model.eval()
26 |
27 | def __call__(self, oriImg):
28 | # scale_search = [0.5, 1.0, 1.5, 2.0]
29 | scale_search = [0.5]
30 | boxsize = 368
31 | stride = 8
32 | padValue = 128
33 | thre1 = 0.1
34 | thre2 = 0.05
35 | multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
36 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
37 | paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
38 |
39 | for m in range(len(multiplier)):
40 | scale = multiplier[m]
41 | imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
42 | imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
43 | im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
44 | im = np.ascontiguousarray(im)
45 |
46 | data = torch.from_numpy(im).float()
47 | if self.device is not None:
48 | # if torch.cuda.is_available():
49 | data = data.cuda()
50 | # data = data.permute([2, 0, 1]).unsqueeze(0).float()
51 | with torch.no_grad():
52 | Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
53 | Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
54 | Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
55 |
56 | # extract outputs, resize, and remove padding
57 | # heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps
58 | heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
59 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
60 | heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
61 | heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
62 |
63 | # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs
64 | paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
65 | paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
66 | paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
67 | paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
68 |
69 | heatmap_avg += heatmap_avg + heatmap / len(multiplier)
70 | paf_avg += + paf / len(multiplier)
71 |
72 | all_peaks = []
73 | peak_counter = 0
74 |
75 | for part in range(18):
76 | map_ori = heatmap_avg[:, :, part]
77 | one_heatmap = gaussian_filter(map_ori, sigma=3)
78 |
79 | map_left = np.zeros(one_heatmap.shape)
80 | map_left[1:, :] = one_heatmap[:-1, :]
81 | map_right = np.zeros(one_heatmap.shape)
82 | map_right[:-1, :] = one_heatmap[1:, :]
83 | map_up = np.zeros(one_heatmap.shape)
84 | map_up[:, 1:] = one_heatmap[:, :-1]
85 | map_down = np.zeros(one_heatmap.shape)
86 | map_down[:, :-1] = one_heatmap[:, 1:]
87 |
88 | peaks_binary = np.logical_and.reduce(
89 | (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
90 | peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
91 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
92 | peak_id = range(peak_counter, peak_counter + len(peaks))
93 | peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
94 |
95 | all_peaks.append(peaks_with_score_and_id)
96 | peak_counter += len(peaks)
97 |
98 | # find connection in the specified sequence, center 29 is in the position 15
99 | limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
100 | [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
101 | [1, 16], [16, 18], [3, 17], [6, 18]]
102 | # the middle joints heatmap correpondence
103 | mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
104 | [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
105 | [55, 56], [37, 38], [45, 46]]
106 |
107 | connection_all = []
108 | special_k = []
109 | mid_num = 10
110 |
111 | for k in range(len(mapIdx)):
112 | score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
113 | candA = all_peaks[limbSeq[k][0] - 1]
114 | candB = all_peaks[limbSeq[k][1] - 1]
115 | nA = len(candA)
116 | nB = len(candB)
117 | indexA, indexB = limbSeq[k]
118 | if (nA != 0 and nB != 0):
119 | connection_candidate = []
120 | for i in range(nA):
121 | for j in range(nB):
122 | vec = np.subtract(candB[j][:2], candA[i][:2])
123 | norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
124 | norm = max(0.001, norm)
125 | vec = np.divide(vec, norm)
126 |
127 | startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
128 | np.linspace(candA[i][1], candB[j][1], num=mid_num)))
129 |
130 | vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
131 | for I in range(len(startend))])
132 | vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
133 | for I in range(len(startend))])
134 |
135 | score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
136 | score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
137 | 0.5 * oriImg.shape[0] / norm - 1, 0)
138 | criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
139 | criterion2 = score_with_dist_prior > 0
140 | if criterion1 and criterion2:
141 | connection_candidate.append(
142 | [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
143 |
144 | connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
145 | connection = np.zeros((0, 5))
146 | for c in range(len(connection_candidate)):
147 | i, j, s = connection_candidate[c][0:3]
148 | if (i not in connection[:, 3] and j not in connection[:, 4]):
149 | connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
150 | if (len(connection) >= min(nA, nB)):
151 | break
152 |
153 | connection_all.append(connection)
154 | else:
155 | special_k.append(k)
156 | connection_all.append([])
157 |
158 | # last number in each row is the total parts number of that person
159 | # the second last number in each row is the score of the overall configuration
160 | subset = -1 * np.ones((0, 20))
161 | candidate = np.array([item for sublist in all_peaks for item in sublist])
162 |
163 | for k in range(len(mapIdx)):
164 | if k not in special_k:
165 | partAs = connection_all[k][:, 0]
166 | partBs = connection_all[k][:, 1]
167 | indexA, indexB = np.array(limbSeq[k]) - 1
168 |
169 | for i in range(len(connection_all[k])): # = 1:size(temp,1)
170 | found = 0
171 | subset_idx = [-1, -1]
172 | for j in range(len(subset)): # 1:size(subset,1):
173 | if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
174 | subset_idx[found] = j
175 | found += 1
176 |
177 | if found == 1:
178 | j = subset_idx[0]
179 | if subset[j][indexB] != partBs[i]:
180 | subset[j][indexB] = partBs[i]
181 | subset[j][-1] += 1
182 | subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
183 | elif found == 2: # if found 2 and disjoint, merge them
184 | j1, j2 = subset_idx
185 | membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
186 | if len(np.nonzero(membership == 2)[0]) == 0: # merge
187 | subset[j1][:-2] += (subset[j2][:-2] + 1)
188 | subset[j1][-2:] += subset[j2][-2:]
189 | subset[j1][-2] += connection_all[k][i][2]
190 | subset = np.delete(subset, j2, 0)
191 | else: # as like found == 1
192 | subset[j1][indexB] = partBs[i]
193 | subset[j1][-1] += 1
194 | subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
195 |
196 | # if find no partA in the subset, create a new subset
197 | elif not found and k < 17:
198 | row = -1 * np.ones(20)
199 | row[indexA] = partAs[i]
200 | row[indexB] = partBs[i]
201 | row[-1] = 2
202 | row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
203 | subset = np.vstack([subset, row])
204 | # delete some rows of subset which has few parts occur
205 | deleteIdx = []
206 | for i in range(len(subset)):
207 | if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
208 | deleteIdx.append(i)
209 | subset = np.delete(subset, deleteIdx, axis=0)
210 |
211 | # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
212 | # candidate: x, y, score, id
213 | return candidate, subset
214 |
215 | if __name__ == "__main__":
216 | body_estimation = Body('../model/body_pose_model.pth')
217 |
218 | test_image = '../images/ski.jpg'
219 | oriImg = cv2.imread(test_image) # B,G,R order
220 | candidate, subset = body_estimation(oriImg)
221 | canvas = util.draw_bodypose(oriImg, candidate, subset)
222 | plt.imshow(canvas[:, :, [2, 1, 0]])
223 | plt.show()
224 |
--------------------------------------------------------------------------------
/data/openpose/hand.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import json
3 | import numpy as np
4 | import math
5 | import time
6 | from scipy.ndimage.filters import gaussian_filter
7 | import matplotlib.pyplot as plt
8 | import matplotlib
9 | import torch
10 | from skimage.measure import label
11 |
12 | from .model import handpose_model
13 | from . import util
14 |
15 | class Hand(object):
16 | def __init__(self, model_path, device=None):
17 | self.model = handpose_model()
18 | self.device = device
19 | if device is not None:
20 | self.model = self.model.cuda()
21 | # if torch.cuda.is_available():
22 | # self.model = self.model.cuda()
23 | # print('cuda')
24 | model_dict = util.transfer(self.model, torch.load(model_path))
25 | self.model.load_state_dict(model_dict)
26 | self.model.eval()
27 |
28 | def __call__(self, oriImg):
29 | scale_search = [0.5, 1.0, 1.5, 2.0]
30 | # scale_search = [0.5]
31 | boxsize = 368
32 | stride = 8
33 | padValue = 128
34 | thre = 0.05
35 | multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
36 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
37 | # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
38 |
39 | for m in range(len(multiplier)):
40 | scale = multiplier[m]
41 | imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
42 | imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
43 | im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
44 | im = np.ascontiguousarray(im)
45 |
46 | data = torch.from_numpy(im).float()
47 | if self.device is not None:
48 | # if torch.cuda.is_available():
49 | data = data.cuda()
50 | # data = data.permute([2, 0, 1]).unsqueeze(0).float()
51 | with torch.no_grad():
52 | output = self.model(data).cpu().numpy()
53 | # output = self.model(data).numpy()q
54 |
55 | # extract outputs, resize, and remove padding
56 | heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
57 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
58 | heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
59 | heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
60 |
61 | heatmap_avg += heatmap / len(multiplier)
62 |
63 | all_peaks = []
64 | for part in range(21):
65 | map_ori = heatmap_avg[:, :, part]
66 | one_heatmap = gaussian_filter(map_ori, sigma=3)
67 | binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
68 | # 全部小于阈值
69 | if np.sum(binary) == 0:
70 | all_peaks.append([0, 0])
71 | continue
72 | label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
73 | max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
74 | label_img[label_img != max_index] = 0
75 | map_ori[label_img == 0] = 0
76 |
77 | y, x = util.npmax(map_ori)
78 | all_peaks.append([x, y])
79 | return np.array(all_peaks)
80 |
81 | if __name__ == "__main__":
82 | hand_estimation = Hand('../model/hand_pose_model.pth')
83 |
84 | # test_image = '../images/hand.jpg'
85 | test_image = '../images/hand.jpg'
86 | oriImg = cv2.imread(test_image) # B,G,R order
87 | peaks = hand_estimation(oriImg)
88 | canvas = util.draw_handpose(oriImg, peaks, True)
89 | cv2.imshow('', canvas)
90 | cv2.waitKey(0)
91 |
--------------------------------------------------------------------------------
/data/openpose/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from collections import OrderedDict
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | def make_layers(block, no_relu_layers):
8 | layers = []
9 | for layer_name, v in block.items():
10 | if 'pool' in layer_name:
11 | layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
12 | padding=v[2])
13 | layers.append((layer_name, layer))
14 | else:
15 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
16 | kernel_size=v[2], stride=v[3],
17 | padding=v[4])
18 | layers.append((layer_name, conv2d))
19 | if layer_name not in no_relu_layers:
20 | layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
21 |
22 | return nn.Sequential(OrderedDict(layers))
23 |
24 | class bodypose_model(nn.Module):
25 | def __init__(self):
26 | super(bodypose_model, self).__init__()
27 |
28 | # these layers have no relu layer
29 | no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
30 | 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
31 | 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
32 | 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
33 | blocks = {}
34 | block0 = OrderedDict([
35 | ('conv1_1', [3, 64, 3, 1, 1]),
36 | ('conv1_2', [64, 64, 3, 1, 1]),
37 | ('pool1_stage1', [2, 2, 0]),
38 | ('conv2_1', [64, 128, 3, 1, 1]),
39 | ('conv2_2', [128, 128, 3, 1, 1]),
40 | ('pool2_stage1', [2, 2, 0]),
41 | ('conv3_1', [128, 256, 3, 1, 1]),
42 | ('conv3_2', [256, 256, 3, 1, 1]),
43 | ('conv3_3', [256, 256, 3, 1, 1]),
44 | ('conv3_4', [256, 256, 3, 1, 1]),
45 | ('pool3_stage1', [2, 2, 0]),
46 | ('conv4_1', [256, 512, 3, 1, 1]),
47 | ('conv4_2', [512, 512, 3, 1, 1]),
48 | ('conv4_3_CPM', [512, 256, 3, 1, 1]),
49 | ('conv4_4_CPM', [256, 128, 3, 1, 1])
50 | ])
51 |
52 |
53 | # Stage 1
54 | block1_1 = OrderedDict([
55 | ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
56 | ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
57 | ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
58 | ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
59 | ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
60 | ])
61 |
62 | block1_2 = OrderedDict([
63 | ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
64 | ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
65 | ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
66 | ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
67 | ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
68 | ])
69 | blocks['block1_1'] = block1_1
70 | blocks['block1_2'] = block1_2
71 |
72 | self.model0 = make_layers(block0, no_relu_layers)
73 |
74 | # Stages 2 - 6
75 | for i in range(2, 7):
76 | blocks['block%d_1' % i] = OrderedDict([
77 | ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
78 | ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
79 | ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
80 | ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
81 | ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
82 | ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
83 | ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
84 | ])
85 |
86 | blocks['block%d_2' % i] = OrderedDict([
87 | ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
88 | ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
89 | ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
90 | ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
91 | ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
92 | ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
93 | ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
94 | ])
95 |
96 | for k in blocks.keys():
97 | blocks[k] = make_layers(blocks[k], no_relu_layers)
98 |
99 | self.model1_1 = blocks['block1_1']
100 | self.model2_1 = blocks['block2_1']
101 | self.model3_1 = blocks['block3_1']
102 | self.model4_1 = blocks['block4_1']
103 | self.model5_1 = blocks['block5_1']
104 | self.model6_1 = blocks['block6_1']
105 |
106 | self.model1_2 = blocks['block1_2']
107 | self.model2_2 = blocks['block2_2']
108 | self.model3_2 = blocks['block3_2']
109 | self.model4_2 = blocks['block4_2']
110 | self.model5_2 = blocks['block5_2']
111 | self.model6_2 = blocks['block6_2']
112 |
113 |
114 | def forward(self, x):
115 |
116 | out1 = self.model0(x)
117 |
118 | out1_1 = self.model1_1(out1)
119 | out1_2 = self.model1_2(out1)
120 | out2 = torch.cat([out1_1, out1_2, out1], 1)
121 |
122 | out2_1 = self.model2_1(out2)
123 | out2_2 = self.model2_2(out2)
124 | out3 = torch.cat([out2_1, out2_2, out1], 1)
125 |
126 | out3_1 = self.model3_1(out3)
127 | out3_2 = self.model3_2(out3)
128 | out4 = torch.cat([out3_1, out3_2, out1], 1)
129 |
130 | out4_1 = self.model4_1(out4)
131 | out4_2 = self.model4_2(out4)
132 | out5 = torch.cat([out4_1, out4_2, out1], 1)
133 |
134 | out5_1 = self.model5_1(out5)
135 | out5_2 = self.model5_2(out5)
136 | out6 = torch.cat([out5_1, out5_2, out1], 1)
137 |
138 | out6_1 = self.model6_1(out6)
139 | out6_2 = self.model6_2(out6)
140 |
141 | return out6_1, out6_2
142 |
143 | class handpose_model(nn.Module):
144 | def __init__(self):
145 | super(handpose_model, self).__init__()
146 |
147 | # these layers have no relu layer
148 | no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
149 | 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
150 | # stage 1
151 | block1_0 = OrderedDict([
152 | ('conv1_1', [3, 64, 3, 1, 1]),
153 | ('conv1_2', [64, 64, 3, 1, 1]),
154 | ('pool1_stage1', [2, 2, 0]),
155 | ('conv2_1', [64, 128, 3, 1, 1]),
156 | ('conv2_2', [128, 128, 3, 1, 1]),
157 | ('pool2_stage1', [2, 2, 0]),
158 | ('conv3_1', [128, 256, 3, 1, 1]),
159 | ('conv3_2', [256, 256, 3, 1, 1]),
160 | ('conv3_3', [256, 256, 3, 1, 1]),
161 | ('conv3_4', [256, 256, 3, 1, 1]),
162 | ('pool3_stage1', [2, 2, 0]),
163 | ('conv4_1', [256, 512, 3, 1, 1]),
164 | ('conv4_2', [512, 512, 3, 1, 1]),
165 | ('conv4_3', [512, 512, 3, 1, 1]),
166 | ('conv4_4', [512, 512, 3, 1, 1]),
167 | ('conv5_1', [512, 512, 3, 1, 1]),
168 | ('conv5_2', [512, 512, 3, 1, 1]),
169 | ('conv5_3_CPM', [512, 128, 3, 1, 1])
170 | ])
171 |
172 | block1_1 = OrderedDict([
173 | ('conv6_1_CPM', [128, 512, 1, 1, 0]),
174 | ('conv6_2_CPM', [512, 22, 1, 1, 0])
175 | ])
176 |
177 | blocks = {}
178 | blocks['block1_0'] = block1_0
179 | blocks['block1_1'] = block1_1
180 |
181 | # stage 2-6
182 | for i in range(2, 7):
183 | blocks['block%d' % i] = OrderedDict([
184 | ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
185 | ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
186 | ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
187 | ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
188 | ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
189 | ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
190 | ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
191 | ])
192 |
193 | for k in blocks.keys():
194 | blocks[k] = make_layers(blocks[k], no_relu_layers)
195 |
196 | self.model1_0 = blocks['block1_0']
197 | self.model1_1 = blocks['block1_1']
198 | self.model2 = blocks['block2']
199 | self.model3 = blocks['block3']
200 | self.model4 = blocks['block4']
201 | self.model5 = blocks['block5']
202 | self.model6 = blocks['block6']
203 |
204 | def forward(self, x):
205 | out1_0 = self.model1_0(x)
206 | out1_1 = self.model1_1(out1_0)
207 | concat_stage2 = torch.cat([out1_1, out1_0], 1)
208 | out_stage2 = self.model2(concat_stage2)
209 | concat_stage3 = torch.cat([out_stage2, out1_0], 1)
210 | out_stage3 = self.model3(concat_stage3)
211 | concat_stage4 = torch.cat([out_stage3, out1_0], 1)
212 | out_stage4 = self.model4(concat_stage4)
213 | concat_stage5 = torch.cat([out_stage4, out1_0], 1)
214 | out_stage5 = self.model5(concat_stage5)
215 | concat_stage6 = torch.cat([out_stage5, out1_0], 1)
216 | out_stage6 = self.model6(concat_stage6)
217 | return out_stage6
218 |
219 |
220 |
--------------------------------------------------------------------------------
/data/openpose/util.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 | import matplotlib
4 | import cv2
5 |
6 |
7 | def padRightDownCorner(img, stride, padValue):
8 | h = img.shape[0]
9 | w = img.shape[1]
10 |
11 | pad = 4 * [None]
12 | pad[0] = 0 # up
13 | pad[1] = 0 # left
14 | pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
15 | pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
16 |
17 | img_padded = img
18 | pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
19 | img_padded = np.concatenate((pad_up, img_padded), axis=0)
20 | pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
21 | img_padded = np.concatenate((pad_left, img_padded), axis=1)
22 | pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
23 | img_padded = np.concatenate((img_padded, pad_down), axis=0)
24 | pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
25 | img_padded = np.concatenate((img_padded, pad_right), axis=1)
26 |
27 | return img_padded, pad
28 |
29 | # transfer caffe model to pytorch which will match the layer name
30 | def transfer(model, model_weights):
31 | transfered_model_weights = {}
32 | for weights_name in model.state_dict().keys():
33 | transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
34 | return transfered_model_weights
35 |
36 | # draw the body keypoint and lims
37 | def draw_bodypose(canvas, candidate, subset):
38 | stickwidth = 4
39 | limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
40 | [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
41 | [1, 16], [16, 18], [3, 17], [6, 18]]
42 |
43 | colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
44 | [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
45 | [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
46 | for i in range(18):
47 | for n in range(len(subset)):
48 | index = int(subset[n][i])
49 | if index == -1:
50 | continue
51 | x, y = candidate[index][0:2]
52 | cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
53 | for i in range(17):
54 | for n in range(len(subset)):
55 | index = subset[n][np.array(limbSeq[i]) - 1]
56 | if -1 in index:
57 | continue
58 | cur_canvas = canvas.copy()
59 | Y = candidate[index.astype(int), 0]
60 | X = candidate[index.astype(int), 1]
61 | mX = np.mean(X)
62 | mY = np.mean(Y)
63 | length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
64 | angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
65 | polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
66 | cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
67 | canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
68 | # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
69 | # plt.imshow(canvas[:, :, [2, 1, 0]])
70 | return canvas
71 |
72 |
73 | # image drawed by opencv is not good.
74 | def draw_handpose(canvas, all_hand_peaks, show_number=False):
75 | edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
76 | [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
77 |
78 | for peaks in all_hand_peaks:
79 | for ie, e in enumerate(edges):
80 | if np.sum(np.all(peaks[e], axis=1)==0)==0:
81 | x1, y1 = peaks[e[0]]
82 | x2, y2 = peaks[e[1]]
83 | cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])*255, thickness=2)
84 |
85 | for i, keyponit in enumerate(peaks):
86 | x, y = keyponit
87 | cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
88 | if show_number:
89 | cv2.putText(canvas, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), lineType=cv2.LINE_AA)
90 | return canvas
91 |
92 | # detect hand according to body pose keypoints
93 | # please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
94 | def handDetect(candidate, subset, oriImg):
95 | # right hand: wrist 4, elbow 3, shoulder 2
96 | # left hand: wrist 7, elbow 6, shoulder 5
97 | ratioWristElbow = 0.33
98 | detect_result = []
99 | image_height, image_width = oriImg.shape[0:2]
100 | for person in subset.astype(int):
101 | # if any of three not detected
102 | has_left = np.sum(person[[5, 6, 7]] == -1) == 0
103 | has_right = np.sum(person[[2, 3, 4]] == -1) == 0
104 | if not (has_left or has_right):
105 | continue
106 | hands = []
107 | #left hand
108 | if has_left:
109 | left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
110 | x1, y1 = candidate[left_shoulder_index][:2]
111 | x2, y2 = candidate[left_elbow_index][:2]
112 | x3, y3 = candidate[left_wrist_index][:2]
113 | hands.append([x1, y1, x2, y2, x3, y3, True])
114 | # right hand
115 | if has_right:
116 | right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
117 | x1, y1 = candidate[right_shoulder_index][:2]
118 | x2, y2 = candidate[right_elbow_index][:2]
119 | x3, y3 = candidate[right_wrist_index][:2]
120 | hands.append([x1, y1, x2, y2, x3, y3, False])
121 |
122 | for x1, y1, x2, y2, x3, y3, is_left in hands:
123 | # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
124 | # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
125 | # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
126 | # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
127 | # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
128 | # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
129 | x = x3 + ratioWristElbow * (x3 - x2)
130 | y = y3 + ratioWristElbow * (y3 - y2)
131 | distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
132 | distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
133 | width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
134 | # x-y refers to the center --> offset to topLeft point
135 | # handRectangle.x -= handRectangle.width / 2.f;
136 | # handRectangle.y -= handRectangle.height / 2.f;
137 | x -= width / 2
138 | y -= width / 2 # width = height
139 | # overflow the image
140 | if x < 0: x = 0
141 | if y < 0: y = 0
142 | width1 = width
143 | width2 = width
144 | if x + width > image_width: width1 = image_width - x
145 | if y + width > image_height: width2 = image_height - y
146 | width = min(width1, width2)
147 | # the max hand box value is 20 pixels
148 | if width >= 20:
149 | detect_result.append([int(x), int(y), int(width), is_left])
150 |
151 | '''
152 | return value: [[x, y, w, True if left hand else False]].
153 | width=height since the network require squared input.
154 | x, y is the coordinate of top left
155 | '''
156 | return detect_result
157 |
158 | # get max index of 2d array
159 | def npmax(array):
160 | arrayindex = array.argmax(1)
161 | arrayvalue = array.max(1)
162 | i = arrayvalue.argmax()
163 | j = arrayindex[i]
164 | return i, j
165 |
--------------------------------------------------------------------------------