├── ibug
    ├── face_alignment
    │   ├── fan
    │   │   ├── weights
    │   │   │   └── .gitkeep
    │   │   ├── __init__.py
    │   │   ├── fan.py
    │   │   ├── fan_custom.py
    │   │   └── fan_predictor.py
    │   ├── __init__.py
    │   └── utils.py
    └── face_detection
    │   ├── retina_face
    │       ├── weights
    │       │   └── .gitkeep
    │       ├── __init__.py
    │       ├── config.py
    │       ├── py_cpu_nms.py
    │       ├── prior_box.py
    │       ├── retina_face.py
    │       ├── retina_face_net.py
    │       ├── retina_face_predictor.py
    │       └── box_utils.py
    │   ├── __init__.py
    │   └── utils
    │       ├── __init__.py
    │       ├── data
    │           └── bfm_lms.npy
    │       ├── head_pose_estimator.py
    │       └── simple_face_tracker.py
├── mask_images
    ├── 02_cloth.png
    ├── mask_pts.pkl
    ├── 04_surgical_blue.png
    ├── 03_surgical_white.png
    └── 01_surgical_light_blue.png
├── 02_pickle_data_train_list_copy.py
├── .gitignore
├── README.md
├── LICENSE
├── 00_pickle_data_sort.py
├── 01_data_cleaning.py
├── folder_split.py
├── verify_txts.py
├── create_filename_list.py
├── 10_create_masked_face_dataset_yolo_test_only_one_person.py
├── create_masked_face_dataset_yolo.py
├── create_masked_face_dataset_6drepnet.py
├── FaceMasking.py
├── 11_create_masked_face_dataset_yolo_test_yolov4_filter.py
└── 12_create_masked_face_dataset_yolo_test_yolov4_annotation.py


/ibug/face_alignment/fan/weights/.gitkeep:
--------------------------------------------------------------------------------
1 | 2dfan4.pth


--------------------------------------------------------------------------------
/ibug/face_detection/retina_face/weights/.gitkeep:
--------------------------------------------------------------------------------
1 | mobilenet0.25_Final.pth
2 | Resnet50_Final.pth


--------------------------------------------------------------------------------
/ibug/face_alignment/fan/__init__.py:
--------------------------------------------------------------------------------
1 | from ibug.face_alignment.fan.fan_predictor import FANPredictor
2 | 


--------------------------------------------------------------------------------
/mask_images/02_cloth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/mask_images/02_cloth.png


--------------------------------------------------------------------------------
/mask_images/mask_pts.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/mask_images/mask_pts.pkl


--------------------------------------------------------------------------------
/ibug/face_alignment/__init__.py:
--------------------------------------------------------------------------------
1 | from ibug.face_alignment.fan import FANPredictor
2 | 
3 | 
4 | __version__ = '0.1.0'
5 | 


--------------------------------------------------------------------------------
/ibug/face_detection/retina_face/__init__.py:
--------------------------------------------------------------------------------
1 | from ibug.face_detection.retina_face.retina_face_predictor import RetinaFacePredictor
2 | 


--------------------------------------------------------------------------------
/mask_images/04_surgical_blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/mask_images/04_surgical_blue.png


--------------------------------------------------------------------------------
/ibug/face_detection/__init__.py:
--------------------------------------------------------------------------------
1 | from ibug.face_detection.retina_face import RetinaFacePredictor
2 | 
3 | 
4 | __version__ = '0.1.0'
5 | 


--------------------------------------------------------------------------------
/mask_images/03_surgical_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/mask_images/03_surgical_white.png


--------------------------------------------------------------------------------
/ibug/face_detection/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from head_pose_estimator import HeadPoseEstimator
2 | from simple_face_tracker import SimpleFaceTracker
3 | 


--------------------------------------------------------------------------------
/mask_images/01_surgical_light_blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/mask_images/01_surgical_light_blue.png


--------------------------------------------------------------------------------
/ibug/face_detection/utils/data/bfm_lms.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/ibug/face_detection/utils/data/bfm_lms.npy


--------------------------------------------------------------------------------
/02_pickle_data_train_list_copy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pickle
 3 | 
 4 | with open('3dmm_data/new_param_all_norm_v201.pkl', 'rb') as p:
 5 |     param_all_norm_v201 = pickle.load(p)
 6 | print(f'param_all_norm_v201: {len(param_all_norm_v201)}')
 7 | 
 8 | new_param_all_norm_v201 = []
 9 | for val in param_all_norm_v201:
10 |     new_param_all_norm_v201.append(val)
11 |     new_param_all_norm_v201.append(val)
12 | 
13 | new_np_param_all_norm_v201 = np.asarray(new_param_all_norm_v201)
14 | print(new_np_param_all_norm_v201.shape)
15 | 
16 | with open('3dmm_data/new_new_param_all_norm_v201.pkl', 'wb') as p:
17 |     pickle.dump(new_np_param_all_norm_v201, p)
18 | 
19 | # new_np_param_all_norm_v201: 1272504 -> 636252 x2
20 | print(f'new_np_param_all_norm_v201: {len(new_np_param_all_norm_v201)}')
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ibug_face_alignment.egg-info
 2 | *train_aug_120x120_part/
 3 | *train_aug_120x120_part_masked/
 4 | *train_aug_120x120_part_masked_clean/
 5 | *300W_LP_croped/
 6 | train_aug_120x120_part_masked_clean.tar.gz
 7 | 3dmm_data/
 8 | 3dmm_data.tar.gz
 9 | 300W_LP_croped_masked/
10 | multi_detected.csv
11 | non_detected.csv
12 | __pycache__/
13 | .vscode/
14 | 2dfan4.pth
15 | mobilenet0.25_Final.pth
16 | Resnet50_Final.pth
17 | 300W_LP_w_masked/
18 | 300W_LP_w_no_masked_image_only/
19 | 300W_LP_w_no_masked_image_only.tar.gz
20 | yolov4_headdetection_480x640_post.onnx
21 | yolov7_tiny_head_0.752_post_480x640.onnx
22 | 300W_LP/
23 | 300W_LP_onlyone_person/
24 | 300W_LP_onlyone_person_yolov4_filterd/
25 | 300wlp-*x*/
26 | data/
27 | TensorrtExecutionProvider_*
28 | non_detected.csv
29 | multi_detected.csv


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Face_Mask_Augmentation
 2 | Masked Face Image Augmentation Tool for Dataset 300W-LP with 6D Head Pose Information.
 3 | 
 4 | https://user-images.githubusercontent.com/33194443/180627371-2ee52c66-d032-4021-b437-ba6a5d834eb0.mp4
 5 | 
 6 | https://user-images.githubusercontent.com/33194443/177553855-ca0cea66-6453-4000-9977-861717dab1e5.mp4
 7 | 
 8 | # Acknowledgements
 9 | 1. https://github.com/hhj1897/face_alignment MIT license
10 | 2. https://github.com/hhj1897/face_detection MIT license
11 | 3. https://github.com/GajuuzZ/FaceNetPytoch-Mask No-License
12 | 4. https://ibug.doc.ic.ac.uk/resources/itwmm/ 3DMM
13 | 5. http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm 300W-LP, AFLW2000
14 | 
15 | # References
16 | 1. https://github.com/PINTO0309/DMHead
17 | 
18 |     https://user-images.githubusercontent.com/33194443/175073709-e9c43655-27a9-4760-a38c-768dabe33c1f.mp4
19 | 


--------------------------------------------------------------------------------
/ibug/face_detection/retina_face/config.py:
--------------------------------------------------------------------------------
 1 | # config.py
 2 | 
 3 | cfg_mnet = {
 4 |     'name': 'mobilenet0.25',
 5 |     'min_sizes': [[16, 32], [64, 128], [256, 512]],
 6 |     'steps': [8, 16, 32],
 7 |     'variance': [0.1, 0.2],
 8 |     'clip': False,
 9 |     'loc_weight': 2.0,
10 |     'gpu_train': True,
11 |     'batch_size': 32,
12 |     'ngpu': 1,
13 |     'epoch': 250,
14 |     'decay1': 190,
15 |     'decay2': 220,
16 |     'image_size': 640,
17 |     'pretrain': False,
18 |     'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
19 |     'in_channel': 32,
20 |     'out_channel': 64
21 | }
22 | 
23 | cfg_re50 = {
24 |     'name': 'Resnet50',
25 |     'min_sizes': [[16, 32], [64, 128], [256, 512]],
26 |     'steps': [8, 16, 32],
27 |     'variance': [0.1, 0.2],
28 |     'clip': False,
29 |     'loc_weight': 2.0,
30 |     'gpu_train': True,
31 |     'batch_size': 24,
32 |     'ngpu': 4,
33 |     'epoch': 100,
34 |     'decay1': 70,
35 |     'decay2': 90,
36 |     'image_size': 840,
37 |     'pretrain': False,
38 |     'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
39 |     'in_channel': 256,
40 |     'out_channel': 256
41 | }
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Katsuya Hyodo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/00_pickle_data_sort.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import copy
 3 | import itertools
 4 | import numpy as np
 5 | from natsort import natsorted
 6 | 
 7 | with open('3dmm_data/param_all_norm_v201.pkl', 'rb') as p:
 8 |     param_all_norm_v201 = pickle.load(p)
 9 | 
10 | with open('3dmm_data/train_aug_120x120.list.train') as f:
11 |     train_list = f.read().splitlines()
12 | 
13 | param_all_norm_v201_list = param_all_norm_v201.tolist()
14 | tmp_list = copy.deepcopy(param_all_norm_v201_list)
15 | 
16 | for idx in range(len(param_all_norm_v201_list)):
17 |     tmp_list[idx].append(train_list[idx])
18 | 
19 | train_list_sorted = natsorted(tmp_list, key=lambda x: x[102])
20 | sorted_param_all_norm_v201_list = [val[0:102] for val in train_list_sorted]
21 | sorted_train_list = [val[102:103] for val in train_list_sorted]
22 | 
23 | sorted_param_all_norm_v201_list_np = np.asarray(sorted_param_all_norm_v201_list)
24 | with open('3dmm_data/new_param_all_norm_v201.pkl', 'wb') as p:
25 |     pickle.dump(sorted_param_all_norm_v201_list_np, p)
26 | 
27 | sorted_train_list = list(itertools.chain.from_iterable(sorted_train_list))
28 | str_ = '\n'.join(sorted_train_list)
29 | with open('3dmm_data/new_train_aug_120x120.list.train', 'wt') as f:
30 |     f.write(str_)
31 | 


--------------------------------------------------------------------------------
/ibug/face_detection/retina_face/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | def py_cpu_nms(dets, thresh, top_k):
12 |     """Pure Python NMS baseline."""
13 |     x1 = dets[:, 0]
14 |     y1 = dets[:, 1]
15 |     x2 = dets[:, 2]
16 |     y2 = dets[:, 3]
17 |     scores = dets[:, 4]
18 | 
19 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
20 |     order = scores.argsort()[: -top_k - 1: -1]
21 | 
22 |     keep = []
23 |     while order.size > 0:
24 |         i = order[0]
25 |         keep.append(i)
26 |         xx1 = np.maximum(x1[i], x1[order[1:]])
27 |         yy1 = np.maximum(y1[i], y1[order[1:]])
28 |         xx2 = np.minimum(x2[i], x2[order[1:]])
29 |         yy2 = np.minimum(y2[i], y2[order[1:]])
30 | 
31 |         w = np.maximum(0.0, xx2 - xx1 + 1)
32 |         h = np.maximum(0.0, yy2 - yy1 + 1)
33 |         inter = w * h
34 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
35 | 
36 |         inds = np.where(ovr <= thresh)[0]
37 |         order = order[inds + 1]
38 | 
39 |     return keep
40 | 


--------------------------------------------------------------------------------
/01_data_cleaning.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import shutil
 4 | from tqdm import tqdm
 5 | from natsort import natsorted
 6 | 
 7 | OUTPUT_PATH = 'train_aug_120x120_part_masked_clean'
 8 | os.makedirs(OUTPUT_PATH, exist_ok=True)
 9 | 
10 | file_list1 = natsorted(glob.glob('train_aug_120x120_part/*/*.jpg'))
11 | file_list2 = natsorted(glob.glob('train_aug_120x120_part_masked/*/*.jpg'))
12 | 
13 | with open('3dmm_data/new_train_aug_120x120.list.train') as f:
14 |     train_list = f.read().splitlines()
15 | print(f'train_list: {len(train_list)}')
16 | 
17 | clean_list_count = 0
18 | new_train_list = []
19 | for file_path1, file_path2 in tqdm(zip(file_list1, file_list2)):
20 |     if os.path.basename(file_path1) in train_list:
21 |         shutil.copy2(file_path1, OUTPUT_PATH)
22 |         new_train_list.append(os.path.basename(file_path1))
23 |         clean_list_count += 1
24 | 
25 |     if '_'.join(os.path.splitext(os.path.basename(file_path2))[0].split('_')[:-2]) + '.jpg' in train_list:
26 |         shutil.copy2(file_path2, OUTPUT_PATH)
27 |         new_train_list.append(os.path.basename(file_path2))
28 |         clean_list_count += 1
29 | 
30 | # clean_list: 1272504 -> 636252 x2
31 | print(f'clean_list: {clean_list_count}')
32 | 
33 | new_train_list = '\n'.join(new_train_list)
34 | with open('3dmm_data/new_new_train_aug_120x120.list.train', 'w') as f:
35 |     f.write(new_train_list)
36 | 
37 | 


--------------------------------------------------------------------------------
/folder_split.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import shutil
 4 | from tqdm import tqdm
 5 | from natsort import natsorted
 6 | from argparse import ArgumentParser
 7 | 
 8 | FOLDER_MAX = 2500
 9 | 
10 | def main():
11 |     parser = ArgumentParser()
12 |     parser.add_argument(
13 |         '-i',
14 |         '--image_folder_path',
15 |         type=str,
16 |         default='HELEN',
17 |     )
18 |     args = parser.parse_args()
19 | 
20 |     image_files = natsorted(glob.glob(f"{args.image_folder_path}/*.jpg"))
21 |     mat_files = natsorted(glob.glob(f"{args.image_folder_path}/*.mat"))
22 | 
23 |     assert len(image_files) == len(mat_files)
24 | 
25 |     image_count = 0
26 |     for (image_file, mat_file) in tqdm(zip(image_files, mat_files), dynamic_ncols=True):
27 |         new_folder_number = image_count // FOLDER_MAX
28 |         dirname = os.path.dirname(image_file)
29 |         # print(f'@@@ dirname: {dirname} split: {dirname.split("/")}')
30 |         new_dirname = f'{args.image_folder_path}_{str(new_folder_number).zfill(2)}'
31 |         os.makedirs(new_dirname, exist_ok=True)
32 | 
33 |         shutil.move(image_file, new_dirname)
34 |         shutil.move(mat_file, new_dirname)
35 | 
36 |         image_count += 1
37 | 
38 | 
39 |     print(f'image_count: {image_count}')
40 |     print(f'folder_count: {image_count//FOLDER_MAX+1}')
41 | 
42 | if __name__ == "__main__":
43 |     main()
44 | 


--------------------------------------------------------------------------------
/ibug/face_detection/retina_face/prior_box.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from itertools import product as product
 3 | from math import ceil
 4 | 
 5 | 
 6 | class PriorBox(object):
 7 |     def __init__(self, cfg, image_size=None):
 8 |         super(PriorBox, self).__init__()
 9 |         self.min_sizes = cfg['min_sizes']
10 |         self.steps = cfg['steps']
11 |         self.clip = cfg['clip']
12 |         self.image_size = image_size
13 |         self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
14 |         self.name = "s"
15 | 
16 |     def forward(self):
17 |         anchors = []
18 |         for k, f in enumerate(self.feature_maps):
19 |             min_sizes = self.min_sizes[k]
20 |             for i, j in product(range(f[0]), range(f[1])):
21 |                 for min_size in min_sizes:
22 |                     s_kx = min_size / self.image_size[1]
23 |                     s_ky = min_size / self.image_size[0]
24 |                     dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
25 |                     dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
26 |                     for cy, cx in product(dense_cy, dense_cx):
27 |                         anchors += [cx, cy, s_kx, s_ky]
28 | 
29 |         # back to torch land
30 |         output = torch.Tensor(anchors).view(-1, 4)
31 |         if self.clip:
32 |             output.clamp_(max=1, min=0)
33 |         return output
34 | 


--------------------------------------------------------------------------------
/verify_txts.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | """verify_txts.py
 4 | 
 5 | For verifying correctness of the generated YOLO txt annotations.
 6 | """
 7 | import cv2
 8 | import random
 9 | from pathlib import Path
10 | from argparse import ArgumentParser
11 | 
12 | WINDOW_NAME = "verify_txts"
13 | parser = ArgumentParser()
14 | parser.add_argument('--dim', type=str, default='640x480', help='input width and height, e.g. 640x480')
15 | args = parser.parse_args()
16 | 
17 | if random.random() < 0.5:
18 |     print('Verifying test.txt')
19 |     jpgs_path = Path(f'data/300wlp-{args.dim}_masked/test.txt')
20 | else:
21 |     print('Verifying train.txt')
22 |     jpgs_path = Path(f'data/300wlp-{args.dim}_masked/train.txt')
23 | 
24 | with open(jpgs_path.as_posix(), 'r') as f:
25 |     jpg_names = [l.strip() for l in f.readlines()]
26 | 
27 | random.shuffle(jpg_names)
28 | for jpg_name in jpg_names:
29 |     img = cv2.imread(jpg_name)
30 |     img_h, img_w, _ = img.shape
31 |     txt_name = jpg_name.replace('.jpg', '.txt')
32 |     with open(txt_name, 'r') as f:
33 |         obj_lines = [l.strip() for l in f.readlines()]
34 |     for obj_line in obj_lines:
35 |         cls, cx, cy, nw, nh = [float(item) for item in obj_line.split(' ')]
36 |         color = (0, 0, 255) if cls == 0.0 else (0, 255, 0)
37 |         x_min = int((cx - (nw / 2.0)) * img_w)
38 |         y_min = int((cy - (nh / 2.0)) * img_h)
39 |         x_max = int((cx + (nw / 2.0)) * img_w)
40 |         y_max = int((cy + (nh / 2.0)) * img_h)
41 |         cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, 2)
42 |     cv2.imshow(WINDOW_NAME, img)
43 |     if cv2.waitKey(0) == 27:
44 |         break
45 | 
46 | cv2.destroyAllWindows()
47 | 


--------------------------------------------------------------------------------
/create_filename_list.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import numpy as np
 4 | import scipy.io as sio
 5 | from tqdm import tqdm
 6 | 
 7 | def get_ypr_from_mat(mat_path):
 8 |     # Get yaw, pitch, roll from .mat annotation.
 9 |     # They are in radians
10 |     mat = sio.loadmat(mat_path)
11 |     # [pitch yaw roll tdx tdy tdz scale_factor]
12 |     pre_pose_params = mat['Pose_Para'][0]
13 |     # Get [pitch, yaw, roll]
14 |     pose_params = pre_pose_params[:3]
15 |     return pose_params
16 | 
17 | def parse_args():
18 |     """Parse input arguments."""
19 |     parser = argparse.ArgumentParser()
20 |     parser.add_argument(
21 |         '--root_dir',
22 |         help='root directory of the datasets files',
23 |         default='./datasets/300W_LP',
24 |         type=str
25 |     )
26 |     parser.add_argument(
27 |         '--file_name',
28 |         help='Output filename.',
29 |         default='files.txt',
30 |         type=str
31 |     )
32 |     args = parser.parse_args()
33 | 
34 |     return args
35 | 
36 | if __name__ == '__main__':
37 |     args = parse_args()
38 | 
39 |     os.chdir(args.root_dir)
40 | 
41 |     file_counter = 0
42 |     rej_counter = 0
43 |     outfile = open(args.file_name, 'w')
44 | 
45 |     for root, dirs, files in tqdm(os.walk('.'), dynamic_ncols=True):
46 |         for f in tqdm(files, dynamic_ncols=True):
47 |             if f[-4:] == '.jpg':
48 |                 mat_path = os.path.join(root, f.replace('.jpg', '.mat'))
49 |                 # We get the pose in radians
50 |                 pose = get_ypr_from_mat(mat_path)
51 |                 # And convert to degrees.
52 |                 pitch = pose[0] * 180 / np.pi
53 |                 yaw = pose[1] * 180 / np.pi
54 |                 roll = pose[2] * 180 / np.pi
55 | 
56 |                 if abs(pitch) <= 99 and abs(yaw) <= 99 and abs(roll) <= 99:
57 |                     if file_counter > 0:
58 |                         outfile.write('\n')
59 |                     outfile.write(root + '/' + f[:-4])
60 |                     file_counter += 1
61 |                 else:
62 |                     rej_counter += 1
63 | 
64 |     outfile.close()
65 |     print(f'{file_counter} files listed! {rej_counter} files had out-of-range values and kept out of the list!')
66 | 


--------------------------------------------------------------------------------
/10_create_masked_face_dataset_yolo_test_only_one_person.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import glob
  4 | from tqdm import tqdm
  5 | from copy import deepcopy
  6 | from natsort import natsorted
  7 | from argparse import ArgumentParser
  8 | from ibug.face_detection import RetinaFacePredictor
  9 | 
 10 | face_detector = RetinaFacePredictor(
 11 |     threshold=0.8,
 12 |     device='cuda:0',
 13 |     model=RetinaFacePredictor.get_model('resnet50')
 14 | )
 15 | 
 16 | morethanone = []
 17 | nondetected = []
 18 | 
 19 | def main():
 20 |     parser = ArgumentParser()
 21 |     parser.add_argument(
 22 |         '-i',
 23 |         '--image_folder_path',
 24 |         type=str,
 25 |         default='300W_LP',
 26 |     )
 27 |     args = parser.parse_args()
 28 | 
 29 |     image_files = glob.glob(f"{args.image_folder_path}/*/*.jpg")
 30 | 
 31 |     image_count = 0
 32 |     for image_file in tqdm(natsorted(image_files), dynamic_ncols=True):
 33 | 
 34 |         dirname = os.path.dirname(image_file)
 35 |         # print(f'@@@ dirname: {dirname} split: {dirname.split("/")}')
 36 |         new_dirname = f'{args.image_folder_path}_onlyone_person/{dirname.split("/")[1]}'
 37 |         os.makedirs(new_dirname, exist_ok=True)
 38 | 
 39 |         image = cv2.imread(image_file)
 40 | 
 41 |         debug_image = deepcopy(image)
 42 |         debug_image = debug_image[..., ::-1]
 43 | 
 44 |         detected_faces = face_detector(debug_image, rgb=True)
 45 | 
 46 |         if len(detected_faces) == 1:
 47 | 
 48 |             # for face_box in detected_faces:
 49 |             #     cv2.rectangle(
 50 |             #         image,
 51 |             #         (int(face_box[0]), int(face_box[1])),
 52 |             #         (int(face_box[2]), int(face_box[3])),
 53 |             #         (255,255,255),
 54 |             #         2,
 55 |             #     )
 56 |             #     cv2.rectangle(
 57 |             #         image,
 58 |             #         (int(face_box[0]), int(face_box[1])),
 59 |             #         (int(face_box[2]), int(face_box[3])),
 60 |             #         (0,255,0),
 61 |             #         1,
 62 |             #     )
 63 |             #     cv2.putText(
 64 |             #         image,
 65 |             #         f'{face_box[4]:.2f}',
 66 |             #         (
 67 |             #             int(face_box[0]),
 68 |             #             int(face_box[1]-10) if face_box[1]-10 > 0 else 20
 69 |             #         ),
 70 |             #         cv2.FONT_HERSHEY_SIMPLEX,
 71 |             #         0.7,
 72 |             #         (255, 255, 255),
 73 |             #         2,
 74 |             #         cv2.LINE_AA,
 75 |             #     )
 76 |             #     cv2.putText(
 77 |             #         image,
 78 |             #         f'{face_box[4]:.2f}',
 79 |             #         (
 80 |             #             int(face_box[0]),
 81 |             #             int(face_box[1]-10) if face_box[1]-10 > 0 else 20
 82 |             #         ),
 83 |             #         cv2.FONT_HERSHEY_SIMPLEX,
 84 |             #         0.7,
 85 |             #         (0, 255, 0),
 86 |             #         1,
 87 |             #         cv2.LINE_AA,
 88 |             #     )
 89 | 
 90 |             # cv2.imshow("test", image)
 91 | 
 92 |             # key = cv2.waitKey(0)
 93 |             # if key == 27: # ESC
 94 |             #     break
 95 | 
 96 |             basename = os.path.basename(image_file)
 97 |             cv2.imwrite(f'{new_dirname}/{basename}', image)
 98 |             image_count += 1
 99 | 
100 |     print(f'image_count: {image_count}')
101 | 
102 | if __name__ == "__main__":
103 |     main()


--------------------------------------------------------------------------------
/ibug/face_alignment/utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | from typing import Optional, Sequence, Tuple
 4 | 
 5 | 
 6 | __all__ = ['get_landmark_connectivity', 'plot_landmarks']
 7 | 
 8 | 
 9 | def get_landmark_connectivity(num_landmarks: int) -> Optional[Sequence[Tuple[int, int]]]:
10 |     if num_landmarks == 68:
11 |         return ((0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12),
12 |                 (12, 13), (13, 14), (14, 15), (15, 16), (17, 18), (18, 19), (19, 20), (20, 21), (22, 23), (23, 24),
13 |                 (24, 25), (25, 26), (27, 28), (28, 29), (29, 30), (30, 33), (31, 32), (32, 33), (33, 34), (34, 35),
14 |                 (36, 37), (37, 38), (38, 39), (40, 41), (41, 36), (42, 43), (43, 44), (44, 45), (45, 46), (46, 47),
15 |                 (47, 42), (48, 49), (49, 50), (50, 51), (51, 52), (52, 53), (53, 54), (54, 55), (55, 56), (56, 57),
16 |                 (57, 58), (58, 59), (59, 48), (60, 61), (61, 62), (62, 63), (63, 64), (64, 65), (65, 66), (66, 67),
17 |                 (67, 60), (39, 40))
18 |     elif num_landmarks == 100:
19 |         return ((0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12),
20 |                 (12, 13), (13, 14), (14, 15), (15, 16), (17, 18), (18, 19), (19, 20), (20, 21), (22, 23), (23, 24),
21 |                 (24, 25), (25, 26), (68, 69), (69, 70), (70, 71), (72, 73), (73, 74), (74, 75), (36, 76), (76, 37),
22 |                 (37, 77), (77, 38), (38, 78), (78, 39), (39, 40), (40, 79), (79, 41), (41, 36), (42, 80), (80, 43),
23 |                 (43, 81), (81, 44), (44, 82), (82, 45), (45, 46), (46, 83), (83, 47), (47, 42), (27, 28), (28, 29),
24 |                 (29, 30), (30, 33), (31, 32), (32, 33), (33, 34), (34, 35), (84, 85), (86, 87), (48, 49), (49, 88),
25 |                 (88, 50), (50, 51), (51, 52), (52, 89), (89, 53), (53, 54), (54, 55), (55, 90), (90, 56), (56, 57),
26 |                 (57, 58), (58, 91), (91, 59), (59, 48), (60, 92), (92, 93), (93, 61), (61, 62), (62, 63), (63, 94),
27 |                 (94, 95), (95, 64), (64, 96), (96, 97), (97, 65), (65, 66), (66, 67), (67, 98), (98, 99), (99, 60),
28 |                 (17, 68), (21, 71), (22, 72), (26, 75))
29 |     else:
30 |         return None
31 | 
32 | 
33 | def plot_landmarks(image: np.ndarray, landmarks: np.ndarray, landmark_scores: Optional[Sequence[float]] = None,
34 |                    threshold: float = 0.2, line_colour: Tuple[int, int, int] = (0, 255, 0),
35 |                    pts_colour: Tuple[int, int, int] = (0, 0, 255), line_thickness: int = 1, pts_radius: int = 1,
36 |                    landmark_connectivity: Optional[Sequence[Tuple[int, int]]] = None) -> None:
37 |     num_landmarks = len(landmarks)
38 |     if landmark_scores is None:
39 |         landmark_scores = np.full((num_landmarks,), threshold + 1.0, dtype=float)
40 |     if landmark_connectivity is None:
41 |         landmark_connectivity = get_landmark_connectivity(len(landmarks))
42 |     if landmark_connectivity is not None:
43 |         for (idx1, idx2) in landmark_connectivity:
44 |             if (idx1 < num_landmarks and idx2 < num_landmarks and
45 |                     landmark_scores[idx1] >= threshold and landmark_scores[idx2] >= threshold):
46 |                 cv2.line(image, tuple(landmarks[idx1].astype(int).tolist()),
47 |                          tuple(landmarks[idx2].astype(int).tolist()),
48 |                          color=line_colour, thickness=line_thickness, lineType=cv2.LINE_AA)
49 |     for landmark, score in zip(landmarks, landmark_scores):
50 |         if score >= threshold:
51 |             cv2.circle(image, tuple(landmark.astype(int).tolist()), pts_radius, pts_colour, -1)
52 | 


--------------------------------------------------------------------------------
/ibug/face_detection/utils/head_pose_estimator.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import math
 4 | import numpy as np
 5 | from typing import Optional, Tuple
 6 | 
 7 | 
 8 | __all__ = ['HeadPoseEstimator']
 9 | 
10 | 
11 | class HeadPoseEstimator(object):
12 |     def __init__(self, mean_shape_path: str = os.path.join(os.path.dirname(__file__), 'data', 'bfm_lms.npy')) -> None:
13 |         # Load the 68-point mean shape derived from BFM
14 |         mean_shape = np.load(mean_shape_path)
15 | 
16 |         # Calculate the 5-points mean shape
17 |         left_eye = mean_shape[[37, 38, 40, 41]].mean(axis=0)
18 |         right_eye = mean_shape[[43, 44, 46, 47]].mean(axis=0)
19 |         self._mean_shape_5pts = np.vstack((left_eye, right_eye, mean_shape[[30, 48, 54]]))
20 | 
21 |         # Flip the y coordinates of the mean shape to match that of the image coordinate system
22 |         self._mean_shape_5pts[:, 1] = -self._mean_shape_5pts[:, 1]
23 | 
24 |     def __call__(
25 |         self,
26 |         landmarks: np.ndarray,
27 |         image_width: int = 0,
28 |         image_height: int = 0,
29 |         camera_matrix: Optional[np.ndarray] = None,
30 |         dist_coeffs: Optional[np.ndarray] = None,
31 |         output_preference: int = 0
32 |     ) -> Tuple[float, float, float]:
33 | 
34 |         # Form the camera matrix
35 |         if camera_matrix is None:
36 |             if image_width <= 0 or image_height <= 0:
37 |                 raise ValueError(
38 |                     'image_width and image_height must be specified when camera_matrix is not given directly')
39 |             else:
40 |                 camera_matrix = np.array([
41 |                     [image_width + image_height, 0, image_width / 2.0],
42 |                     [0, image_width + image_height, image_height / 2.0],
43 |                     [0, 0, 1],
44 |                 ], dtype=float)
45 | 
46 |         # Prepare the landmarks
47 |         if landmarks.shape[0] == 68:
48 |             landmarks = landmarks[17:]
49 |         if landmarks.shape[0] in [49, 51]:
50 |             left_eye = landmarks[[20, 21, 23, 24]].mean(axis=0)
51 |             right_eye = landmarks[[26, 27, 29, 30]].mean(axis=0)
52 |             landmarks = np.vstack((left_eye, right_eye, landmarks[[13, 31, 37]]))
53 | 
54 |         # Use EPnP to estimate pitch, yaw, and roll
55 |         _, rvec, _ = cv2.solvePnP(
56 |             self._mean_shape_5pts,
57 |             np.expand_dims(landmarks, axis=1),
58 |             camera_matrix,
59 |             dist_coeffs,
60 |             flags=cv2.SOLVEPNP_EPNP
61 |         )
62 |         rot_mat, _ = cv2.Rodrigues(rvec)
63 |         if 1.0 + rot_mat[2, 0] < 1e-9:
64 |             pitch = 0.0
65 |             yaw = 90.0
66 |             roll = -math.atan2(rot_mat[0, 1], rot_mat[0, 2]) / math.pi * 180.0
67 |         elif 1.0 - rot_mat[2, 0] < 1e-9:
68 |             pitch = 0.0
69 |             yaw = -90.0
70 |             roll = math.atan2(-rot_mat[0, 1], -rot_mat[0, 2]) / math.pi * 180.0
71 |         else:
72 |             pitch = math.atan2(rot_mat[2, 1], rot_mat[2, 2]) / math.pi * 180.0
73 |             yaw = -math.asin(rot_mat[2, 0]) / math.pi * 180.0
74 |             roll = math.atan2(rot_mat[1, 0], rot_mat[0, 0]) / math.pi * 180.0
75 | 
76 |         # Respond to output_preference:
77 |         # output_preference == 1: limit pitch to the range of -90.0 ~ 90.0
78 |         # output_preference == 2: limit yaw to the range of -90.0 ~ 90.0 (already satisfied)
79 |         # output_preference == 3: limit roll to the range of -90.0 ~ 90.0
80 |         # otherwise: minimise total rotation, min(abs(pitch) + abs(yaw) + abs(roll))
81 |         if output_preference != 2:
82 |             alt_pitch = pitch - 180.0 if pitch > 0.0 else pitch + 180.0
83 |             alt_yaw = -180.0 - yaw if yaw < 0.0 else 180.0 - yaw
84 |             alt_roll = roll - 180.0 if roll > 0.0 else roll + 180.0
85 |             if (output_preference == 1 and -90.0 < alt_pitch < 90.0 or
86 |                     output_preference == 3 and -90.0 < alt_roll < 90.0 or
87 |                     output_preference not in (1, 2, 3) and
88 |                     abs(alt_pitch) + abs(alt_yaw) + abs(alt_roll) < abs(pitch) + abs(yaw) + abs(roll)):
89 |                 pitch, yaw, roll = alt_pitch, alt_yaw, alt_roll
90 | 
91 |         return -pitch, yaw, roll
92 | 


--------------------------------------------------------------------------------
/ibug/face_detection/utils/simple_face_tracker.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from typing import List, Optional
 3 | from scipy.optimize import linear_sum_assignment
 4 | 
 5 | 
 6 | __all__ = ['SimpleFaceTracker']
 7 | 
 8 | 
 9 | class SimpleFaceTracker(object):
10 |     def __init__(self, iou_threshold: float = 0.4, minimum_face_size: float = 0.0) -> None:
11 |         self._iou_threshold = iou_threshold
12 |         self._minimum_face_size = minimum_face_size
13 |         self._tracklets = []
14 |         self._tracklet_counter = 0
15 | 
16 |     @property
17 |     def iou_threshold(self) -> float:
18 |         return self._iou_threshold
19 | 
20 |     @iou_threshold.setter
21 |     def iou_threshold(self, threshold: float) -> None:
22 |         self._iou_threshold = threshold
23 | 
24 |     @property
25 |     def minimum_face_size(self) -> float:
26 |         return self._minimum_face_size
27 | 
28 |     @minimum_face_size.setter
29 |     def minimum_face_size(self, face_size: float) -> None:
30 |         self._minimum_face_size = face_size
31 | 
32 |     def __call__(self, face_boxes: np.ndarray) -> List[Optional[int]]:
33 |         if face_boxes.size <= 0:
34 |             self._tracklets = []
35 |             return []
36 | 
37 |         # Calculate area of the faces
38 |         face_areas = np.abs((face_boxes[:, 2] - face_boxes[:, 0]) * (face_boxes[:, 3] - face_boxes[:, 1]))
39 | 
40 |         # Prepare tracklets
41 |         for tracklet in self._tracklets:
42 |             tracklet['tracked'] = False
43 | 
44 |         # Calculate the distance matrix based on IOU
45 |         iou_distance_threshold = np.clip(1.0 - self._iou_threshold, 0.0, 1.0)
46 |         min_face_area = max(self._minimum_face_size ** 2, np.finfo(float).eps)
47 |         distances = np.full(shape=(face_boxes.shape[0], len(self._tracklets)),
48 |                             fill_value=2.0 * min(face_boxes.shape[0], len(self._tracklets)), dtype=float)
49 |         for row, face_box in enumerate(face_boxes):
50 |             if face_areas[row] >= min_face_area:
51 |                 for col, tracklet in enumerate(self._tracklets):
52 |                     x_left = max(min(face_box[0], face_box[2]), min(tracklet['bbox'][0], tracklet['bbox'][2]))
53 |                     y_top = max(min(face_box[1], face_box[3]), min(tracklet['bbox'][1], tracklet['bbox'][3]))
54 |                     x_right = min(max(face_box[2], face_box[0]), max(tracklet['bbox'][2], tracklet['bbox'][0]))
55 |                     y_bottom = min(max(face_box[3], face_box[1]), max(tracklet['bbox'][3], tracklet['bbox'][1]))
56 |                     if x_right <= x_left or y_bottom <= y_top:
57 |                         distance = 1.0
58 |                     else:
59 |                         intersection_area = (x_right - x_left) * (y_bottom - y_top)
60 |                         distance = 1.0 - intersection_area / float(face_areas[row] + tracklet['area'] -
61 |                                                                    intersection_area)
62 |                     if distance <= iou_distance_threshold:
63 |                         distances[row, col] = distance
64 | 
65 |         # ID assignment
66 |         tracked_ids = [None] * face_boxes.shape[0]
67 |         for row, col in zip(*linear_sum_assignment(distances)):
68 |             if distances[row, col] <= iou_distance_threshold:
69 |                 tracked_ids[row] = self._tracklets[col]['id']
70 |                 self._tracklets[col]['bbox'] = face_boxes[row, :4].copy()
71 |                 self._tracklets[col]['area'] = face_areas[row]
72 |                 self._tracklets[col]['tracked'] = True
73 | 
74 |         # Remove expired tracklets
75 |         self._tracklets = [x for x in self._tracklets if x['tracked']]
76 | 
77 |         # Register new faces
78 |         for idx, face_box in enumerate(face_boxes):
79 |             if face_areas[idx] >= min_face_area and tracked_ids[idx] is None:
80 |                 self._tracklet_counter += 1
81 |                 self._tracklets.append({'bbox': face_box[:4].copy(), 'area': face_areas[idx],
82 |                                         'id': self._tracklet_counter, 'tracked': True})
83 |                 tracked_ids[idx] = self._tracklets[-1]['id']
84 | 
85 |         return tracked_ids
86 | 
87 |     def reset(self, reset_tracklet_counter: bool = True) -> None:
88 |         self._tracklets = []
89 |         if reset_tracklet_counter:
90 |             self._tracklet_counter = 0
91 | 


--------------------------------------------------------------------------------
/create_masked_face_dataset_yolo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import pandas as pd
  4 | import numpy as np
  5 | import glob
  6 | import random
  7 | random.seed(0)
  8 | import shutil
  9 | from tqdm import tqdm
 10 | from natsort import natsorted
 11 | from FaceMasking import FaceMasker
 12 | from ibug.face_detection import RetinaFacePredictor
 13 | from ibug.face_alignment import FANPredictor
 14 | 
 15 | face_detector = RetinaFacePredictor(
 16 |     threshold=0.8,
 17 |     device='cuda:0',
 18 |     model=RetinaFacePredictor.get_model('resnet50')
 19 | )
 20 | landmark_detector = FANPredictor(
 21 |     device='cuda:0',
 22 |     model=FANPredictor.get_model('2dfan4')
 23 | )
 24 | 
 25 | 
 26 | source_folder = 'data/300wlp-640x480'
 27 | output_folder = 'data/300wlp-640x480_masked'
 28 | 
 29 | if not os.path.exists(output_folder):
 30 |     os.makedirs(output_folder, exist_ok=True)
 31 | 
 32 | masker = FaceMasker()
 33 | 
 34 | less_filfol = []
 35 | morethanone = []
 36 | nondetected = []
 37 | 
 38 | image_files = natsorted(glob.glob(f"{source_folder}/*.jpg"))
 39 | text_files = natsorted(glob.glob(f"{source_folder}/*.txt"))
 40 | 
 41 | train_txt = ''
 42 | test_txt = ''
 43 | 
 44 | if f'{source_folder}/train.txt' in text_files:
 45 |     text_files.remove(f'{source_folder}/train.txt')
 46 |     with open(glob.glob(f"{source_folder}/train.txt")[0], 'r') as f:
 47 |         train_txt = [l.strip() for l in f.readlines()]
 48 | 
 49 | if f'{source_folder}/test.txt' in text_files:
 50 |     text_files.remove(f'{source_folder}/test.txt')
 51 |     with open(glob.glob(f"{source_folder}/test.txt")[0], 'r') as f:
 52 |         test_txt = [l.strip() for l in f.readlines()]
 53 | 
 54 | if f'{source_folder}/val.txt' in text_files:
 55 |     text_files.remove(f'{source_folder}/val.txt')
 56 | 
 57 | assert len(image_files) == len(text_files), \
 58 |     f"len(image_files) != len(text_files): {len(image_files)} {len(text_files)}"
 59 | 
 60 | 
 61 | output_train_txt_list = []
 62 | output_test_txt_list = []
 63 | 
 64 | for j, (image_file, text_file) in tqdm(enumerate(zip(image_files, text_files))):
 65 |     image_basename = os.path.basename(image_file)
 66 |     image_basename_without_ext = os.path.splitext(image_basename)[0]
 67 |     text_basename = os.path.basename(text_file)
 68 |     text_basename_without_ext = os.path.splitext(text_basename)[0]
 69 |     assert image_basename_without_ext == text_basename_without_ext, \
 70 |         f"image_basename_without_ext != text_basename_without_ext: \
 71 |             {image_basename_without_ext} {text_basename_without_ext}"
 72 | 
 73 |     # Load image
 74 |     image = cv2.imread(image_file)[..., ::-1]
 75 |     width = image.shape[1]
 76 |     height = image.shape[0]
 77 | 
 78 |     detected_faces = face_detector(image, rgb=True)
 79 | 
 80 |     if len(detected_faces) == 0:
 81 |         nondetected.append(image_file)
 82 |         continue
 83 |     if len(detected_faces) > 1:
 84 |         morethanone.append(image_file)
 85 |         continue
 86 | 
 87 |     landmarks, scores = landmark_detector(image, detected_faces, rgb=False)
 88 |     landmarks = [tuple(landmark.astype(np.int32)) for landmark in landmarks[0]]
 89 | 
 90 |     # Save masked-extracted face.
 91 |     image_mask = masker.wear_mask_to_face(image, landmarks)
 92 |     face_mask = image_mask[:, :, ::-1]
 93 | 
 94 |     cv2.imwrite(
 95 |         f'{output_folder}/{image_basename_without_ext}_masked.jpg',
 96 |         face_mask,
 97 |     )
 98 |     shutil.copy(
 99 |         text_file,
100 |         f'{output_folder}/{text_basename_without_ext}_masked.txt',
101 |     )
102 | 
103 |     if image_file in test_txt:
104 |         output_test_txt_list.append(f'{output_folder}/{image_basename_without_ext}_masked.jpg')
105 |     else:
106 |         output_train_txt_list.append(f'{output_folder}/{image_basename_without_ext}_masked.jpg')
107 | 
108 | 
109 | set_path = f'{output_folder}/test.txt'
110 | with open(set_path, 'w') as fset:
111 |     for jpg in output_test_txt_list:
112 |         fset.write(f'{jpg}\n')
113 | 
114 | set_path = f'{output_folder}/train.txt'
115 | with open(set_path, 'w') as fset:
116 |     for jpg in output_train_txt_list:
117 |         fset.write(f'{jpg}\n')
118 | 
119 | nondetected = pd.DataFrame(nondetected)
120 | nondetected.to_csv(f'non_detected.csv', header=None, index=None)
121 | morethanone = pd.DataFrame(morethanone)
122 | morethanone.to_csv(f'multi_detected.csv', header=None, index=None)


--------------------------------------------------------------------------------
/create_masked_face_dataset_6drepnet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import time
  4 | import pandas as pd
  5 | import numpy as np
  6 | import glob
  7 | import scipy.io as sio
  8 | import random
  9 | random.seed(0)
 10 | import shutil
 11 | from FaceMasking import FaceMasker
 12 | from natsort import natsorted
 13 | from ibug.face_detection import RetinaFacePredictor
 14 | from ibug.face_alignment import FANPredictor
 15 | 
 16 | face_detector = RetinaFacePredictor(
 17 |     threshold=0.8,
 18 |     device='cuda:0',
 19 |     model=RetinaFacePredictor.get_model('resnet50')
 20 | )
 21 | landmark_detector = FANPredictor(
 22 |     device='cuda:0',
 23 |     model=FANPredictor.get_model('2dfan4')
 24 | )
 25 | 
 26 | 
 27 | source_folder = '300W_LP_croped'
 28 | output_folder = '300W_LP_croped_masked'
 29 | image_size = 480
 30 | 
 31 | if not os.path.exists(output_folder):
 32 |     os.makedirs(output_folder)
 33 | 
 34 | masker = FaceMasker()
 35 | 
 36 | list_fol = os.listdir(source_folder)
 37 | num_fol = len(list_fol)
 38 | less_filfol = []
 39 | morethanone = []
 40 | nondetected = []
 41 | 
 42 | st = time.time()
 43 | for i, fol in enumerate(list_fol):
 44 |     print('folder: {}/{}'.format(i+1, num_fol))
 45 |     folfil = os.listdir(os.path.join(source_folder, fol))
 46 |     num_fil = len(folfil)
 47 |     if len(folfil) <= 0:
 48 |         less_filfol.append(fol)
 49 |         continue
 50 | 
 51 |     image_files = glob.glob(f"{os.path.join(source_folder, fol)}/*.jpg")
 52 |     mat_files = glob.glob(f"{os.path.join(source_folder, fol)}/*.mat")
 53 | 
 54 |     for j, (image_file, mat_file) in enumerate(zip(natsorted(image_files), natsorted(mat_files))):
 55 | 
 56 |         save_fol = f'{output_folder}/{fol}'
 57 |         if not os.path.exists(save_fol):
 58 |             os.makedirs(save_fol)
 59 | 
 60 |         print('  file: {}/{}'.format(j+1, num_fil))
 61 | 
 62 |         # Load image
 63 |         image = cv2.imread(image_file)[:, :, ::-1]
 64 |         # Load .mat
 65 |         mat = sio.loadmat(mat_file)
 66 |         # Calculate image size for final crop (for cropping to adjust to size of annotation data)
 67 |         pt2d = mat['pt2d']
 68 |         x_min = min(pt2d[0,:])
 69 |         y_min = min(pt2d[1,:])
 70 |         x_max = max(pt2d[0,:])
 71 |         y_max = max(pt2d[1,:])
 72 |         k = 0.20
 73 |         x_min -= 2 * k * abs(x_max - x_min)
 74 |         y_min -= 2 * k * abs(y_max - y_min)
 75 |         x_max += 2 * k * abs(x_max - x_min)
 76 |         y_max += 0.6 * k * abs(y_max - y_min)
 77 |         x_min = max(int(x_min), 0)
 78 |         y_min = max(int(y_min), 0)
 79 |         x_max = min(int(x_max), int(image_size))
 80 |         y_max = min(int(x_max), int(image_size))
 81 |         crop_start_x = 0
 82 |         crop_end_x = int(x_max-x_min)
 83 |         crop_start_y = 0
 84 |         crop_end_y = int(y_max-y_min)
 85 |         width = image.shape[1]
 86 |         height = image.shape[0]
 87 | 
 88 |         detected_faces = face_detector(image, rgb=True)
 89 |         if len(detected_faces) == 0:
 90 |             detected_faces = np.asarray([[0,0,image.shape[1],image.shape[0],1.0]])
 91 |         landmarks, scores = landmark_detector(image, detected_faces, rgb=False)
 92 |         landmarks = [tuple(landmark.astype(np.int32)) for landmark in landmarks[0]]
 93 | 
 94 |         if len(detected_faces) == 0:
 95 |             nondetected.append(image_file)
 96 |             continue
 97 |         if len(detected_faces) > 1:
 98 |             morethanone.append(image_file)
 99 |             continue
100 | 
101 |         # Save masked-extracted face.
102 |         image_mask = masker.wear_mask_to_face(image, landmarks)
103 |         face_mask = image_mask[:, :, ::-1]
104 | 
105 |         cv2.imwrite(
106 |             os.path.join(save_fol, os.path.basename(image_file).split('.')[0] + '.jpg'),
107 |             image[crop_start_y:crop_end_y, crop_start_x:crop_end_x, :][..., ::-1],
108 |         )
109 |         cv2.imwrite(
110 |             os.path.join(save_fol, os.path.basename(image_file).split('.')[0] + '_masked.jpg'),
111 |             face_mask[crop_start_y:crop_end_y, crop_start_x:crop_end_x, :],
112 |         )
113 |         shutil.copy(mat_file, save_fol)
114 |         shutil.copy(mat_file, os.path.join(save_fol, os.path.basename(mat_file).split('.')[0] + '_masked.mat'))
115 | 
116 | 
117 | 
118 | elps = time.time() - st
119 | print('time used: %.0f m : %.0f s' % (elps // 60, elps % 60))
120 | 
121 | nondetected = pd.DataFrame(nondetected)
122 | nondetected.to_csv(f'non_detected.csv', header=None, index=None)
123 | morethanone = pd.DataFrame(morethanone)
124 | morethanone.to_csv(f'multi_detected.csv', header=None, index=None)


--------------------------------------------------------------------------------
/ibug/face_detection/retina_face/retina_face.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torchvision.models as models
  5 | import torchvision.models._utils as _utils
  6 | from .retina_face_net import MobileNetV1, FPN, SSH
  7 | 
  8 | 
  9 | class ClassHead(nn.Module):
 10 |     def __init__(self, inchannels=512, num_anchors=3):
 11 |         super(ClassHead, self).__init__()
 12 |         self.num_anchors = num_anchors
 13 |         self.conv1x1 = nn.Conv2d(inchannels, self.num_anchors*2, kernel_size=(1, 1), stride=1, padding=0)
 14 | 
 15 |     def forward(self, x):
 16 |         out = self.conv1x1(x)
 17 |         out = out.permute(0, 2, 3, 1).contiguous()
 18 | 
 19 |         return out.view(out.shape[0], -1, 2)
 20 | 
 21 | 
 22 | class BboxHead(nn.Module):
 23 |     def __init__(self, inchannels=512, num_anchors=3):
 24 |         super(BboxHead, self).__init__()
 25 |         self.conv1x1 = nn.Conv2d(inchannels, num_anchors*4, kernel_size=(1, 1), stride=1,padding=0)
 26 | 
 27 |     def forward(self, x):
 28 |         out = self.conv1x1(x)
 29 |         out = out.permute(0, 2, 3, 1).contiguous()
 30 | 
 31 |         return out.view(out.shape[0], -1, 4)
 32 | 
 33 | 
 34 | class LandmarkHead(nn.Module):
 35 |     def __init__(self, inchannels=512, num_anchors=3):
 36 |         super(LandmarkHead, self).__init__()
 37 |         self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10, kernel_size=(1, 1), stride=1, padding=0)
 38 | 
 39 |     def forward(self, x):
 40 |         out = self.conv1x1(x)
 41 |         out = out.permute(0, 2, 3, 1).contiguous()
 42 | 
 43 |         return out.view(out.shape[0], -1, 10)
 44 | 
 45 | 
 46 | class RetinaFace(nn.Module):
 47 |     def __init__(self, cfg=None, phase='train'):
 48 |         """
 49 |         :param cfg:  Network related settings.
 50 |         :param phase: train or test.
 51 |         """
 52 |         super(RetinaFace, self).__init__()
 53 |         self.phase = phase
 54 |         backbone = None
 55 |         if cfg['name'] == 'mobilenet0.25':
 56 |             backbone = MobileNetV1()
 57 |             if cfg['pretrain']:
 58 |                 raise ValueError('cfg[\'pretrain\'] cannot be set to True for mobilenet0.25')
 59 |         elif cfg['name'] == 'Resnet50':
 60 |             backbone = models.resnet50(pretrained=cfg['pretrain'])
 61 | 
 62 |         self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers'])
 63 |         in_channels_stage2 = cfg['in_channel']
 64 |         in_channels_list = [
 65 |             in_channels_stage2 * 2,
 66 |             in_channels_stage2 * 4,
 67 |             in_channels_stage2 * 8,
 68 |         ]
 69 |         out_channels = cfg['out_channel']
 70 |         self.fpn = FPN(in_channels_list,out_channels)
 71 |         self.ssh1 = SSH(out_channels, out_channels)
 72 |         self.ssh2 = SSH(out_channels, out_channels)
 73 |         self.ssh3 = SSH(out_channels, out_channels)
 74 | 
 75 |         self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
 76 |         self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
 77 |         self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
 78 | 
 79 |     def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2):
 80 |         classhead = nn.ModuleList()
 81 |         for i in range(fpn_num):
 82 |             classhead.append(ClassHead(inchannels, anchor_num))
 83 |         return classhead
 84 | 
 85 |     def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2):
 86 |         bboxhead = nn.ModuleList()
 87 |         for i in range(fpn_num):
 88 |             bboxhead.append(BboxHead(inchannels, anchor_num))
 89 |         return bboxhead
 90 | 
 91 |     def _make_landmark_head(self, fpn_num=3, inchannels=64, anchor_num=2):
 92 |         landmarkhead = nn.ModuleList()
 93 |         for i in range(fpn_num):
 94 |             landmarkhead.append(LandmarkHead(inchannels, anchor_num))
 95 |         return landmarkhead
 96 | 
 97 |     def forward(self, inputs):
 98 |         out = self.body(inputs)
 99 | 
100 |         # FPN
101 |         fpn = self.fpn(out)
102 | 
103 |         # SSH
104 |         feature1 = self.ssh1(fpn[0])
105 |         feature2 = self.ssh2(fpn[1])
106 |         feature3 = self.ssh3(fpn[2])
107 |         features = [feature1, feature2, feature3]
108 | 
109 |         bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
110 |         classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1)
111 |         ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
112 | 
113 |         if self.phase == 'train':
114 |             output = (bbox_regressions, classifications, ldm_regressions)
115 |         else:
116 |             # output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
117 |             output = (bbox_regressions, F.softmax(classifications, dim=-1)[...,1], ldm_regressions)
118 |         return output
119 | 


--------------------------------------------------------------------------------
/ibug/face_detection/retina_face/retina_face_net.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | def conv_bn(inp, oup, stride = 1, leaky = 0):
  7 |     return nn.Sequential(
  8 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
  9 |         nn.BatchNorm2d(oup),
 10 |         nn.LeakyReLU(negative_slope=leaky, inplace=True)
 11 |     )
 12 | 
 13 | 
 14 | def conv_bn_no_relu(inp, oup, stride):
 15 |     return nn.Sequential(
 16 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 17 |         nn.BatchNorm2d(oup),
 18 |     )
 19 | 
 20 | 
 21 | def conv_bn1X1(inp, oup, stride, leaky=0):
 22 |     return nn.Sequential(
 23 |         nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
 24 |         nn.BatchNorm2d(oup),
 25 |         nn.LeakyReLU(negative_slope=leaky, inplace=True)
 26 |     )
 27 | 
 28 | 
 29 | def conv_dw(inp, oup, stride, leaky=0.1):
 30 |     return nn.Sequential(
 31 |         nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
 32 |         nn.BatchNorm2d(inp),
 33 |         nn.LeakyReLU(negative_slope=leaky, inplace=True),
 34 | 
 35 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 36 |         nn.BatchNorm2d(oup),
 37 |         nn.LeakyReLU(negative_slope=leaky, inplace=True),
 38 |     )
 39 | 
 40 | 
 41 | class SSH(nn.Module):
 42 |     def __init__(self, in_channel, out_channel):
 43 |         super(SSH, self).__init__()
 44 |         assert out_channel % 4 == 0
 45 |         leaky = 0
 46 |         if out_channel <= 64:
 47 |             leaky = 0.1
 48 |         self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
 49 | 
 50 |         self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
 51 |         self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
 52 | 
 53 |         self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
 54 |         self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
 55 | 
 56 |     def forward(self, input):
 57 |         conv3X3 = self.conv3X3(input)
 58 | 
 59 |         conv5X5_1 = self.conv5X5_1(input)
 60 |         conv5X5 = self.conv5X5_2(conv5X5_1)
 61 | 
 62 |         conv7X7_2 = self.conv7X7_2(conv5X5_1)
 63 |         conv7X7 = self.conv7x7_3(conv7X7_2)
 64 | 
 65 |         out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
 66 |         out = F.relu(out)
 67 |         return out
 68 | 
 69 | 
 70 | class FPN(nn.Module):
 71 |     def __init__(self,in_channels_list,out_channels):
 72 |         super(FPN,self).__init__()
 73 |         leaky = 0
 74 |         if out_channels <= 64:
 75 |             leaky = 0.1
 76 |         self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride=1, leaky=leaky)
 77 |         self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride=1, leaky=leaky)
 78 |         self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride=1, leaky=leaky)
 79 | 
 80 |         self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky)
 81 |         self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky)
 82 | 
 83 |     def forward(self, input):
 84 |         # names = list(input.keys())
 85 |         input = list(input.values())
 86 | 
 87 |         output1 = self.output1(input[0])
 88 |         output2 = self.output2(input[1])
 89 |         output3 = self.output3(input[2])
 90 | 
 91 |         up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
 92 |         output2 = output2 + up3
 93 |         output2 = self.merge2(output2)
 94 | 
 95 |         up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
 96 |         output1 = output1 + up2
 97 |         output1 = self.merge1(output1)
 98 | 
 99 |         out = [output1, output2, output3]
100 |         return out
101 | 
102 | 
103 | class MobileNetV1(nn.Module):
104 |     def __init__(self):
105 |         super(MobileNetV1, self).__init__()
106 |         self.stage1 = nn.Sequential(
107 |             conv_bn(3, 8, 2, leaky=0.1),    # 3
108 |             conv_dw(8, 16, 1),   # 7
109 |             conv_dw(16, 32, 2),  # 11
110 |             conv_dw(32, 32, 1),  # 19
111 |             conv_dw(32, 64, 2),  # 27
112 |             conv_dw(64, 64, 1),  # 43
113 |         )
114 |         self.stage2 = nn.Sequential(
115 |             conv_dw(64, 128, 2),    # 43 + 16 = 59
116 |             conv_dw(128, 128, 1),   # 59 + 32 = 91
117 |             conv_dw(128, 128, 1),   # 91 + 32 = 123
118 |             conv_dw(128, 128, 1),   # 123 + 32 = 155
119 |             conv_dw(128, 128, 1),   # 155 + 32 = 187
120 |             conv_dw(128, 128, 1),   # 187 + 32 = 219
121 |         )
122 |         self.stage3 = nn.Sequential(
123 |             conv_dw(128, 256, 2),   # 219 +3 2 = 241
124 |             conv_dw(256, 256, 1),   # 241 + 64 = 301
125 |         )
126 |         self.avg = nn.AdaptiveAvgPool2d((1,1))
127 |         self.fc = nn.Linear(256, 1000)
128 | 
129 |     def forward(self, x):
130 |         x = self.stage1(x)
131 |         x = self.stage2(x)
132 |         x = self.stage3(x)
133 |         x = self.avg(x)
134 |         # x = self.model(x)
135 |         x = x.view(-1, 256)
136 |         x = self.fc(x)
137 |         return x
138 | 


--------------------------------------------------------------------------------
/FaceMasking.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import pickle
  4 | import numpy as np
  5 | from PIL import Image
  6 | import random
  7 | random.seed(0)
  8 | 
  9 | MASK_PTS_FILE = './mask_images/mask_pts.pkl'
 10 | TRI_MASK_IDX = [
 11 |     [0, 1, 3], [3, 1, 4], [3, 4, 6], [6, 4, 7],
 12 |     [4, 7, 8], [4, 5, 8], [1, 5, 4], [1, 2, 5]
 13 | ]
 14 | DEFAULT_TRI_FACE_IDX = [
 15 |     [1, 28, 3], [3, 28, 30], [3, 30, 5], [5, 30, 8],
 16 |     [30, 8, 11], [30, 13, 11], [28, 13, 30], [28, 15, 13]
 17 | ]
 18 | DEFAULT_MASK_PTS = np.array([
 19 |     (30, 12), (125, 5), (220, 12), (20, 80), (125, 80),
 20 |     (230, 80), (65, 140), (125, 160), (185, 140)
 21 | ])
 22 | 
 23 | 
 24 | def get_tri_mask_points(pts_mask, tri_mask_idx):
 25 |     tri_mask_pts = np.zeros((len(tri_mask_idx), 6), dtype=np.float32)
 26 |     for i in range(len(tri_mask_idx)):
 27 |         tri_mask_pts[i] = pts_mask[tri_mask_idx[i]].ravel()
 28 |     return tri_mask_pts
 29 | 
 30 | 
 31 | def closest_point(pt, pts):
 32 |     dist = np.sum((pts - pt) ** 2, axis=1)
 33 |     return np.argmin(dist), np.min(dist)
 34 | 
 35 | 
 36 | def create_mask_mark(png_image):
 37 |     create_mask_mark.done = False
 38 |     create_mask_mark.current = (0, 0)
 39 |     create_mask_mark.pts = DEFAULT_MASK_PTS
 40 |     create_mask_mark.sel_idx = None
 41 |     window = 'Adjust points'
 42 | 
 43 |     def on_mouse(event, x, y, flags, param):
 44 |         if create_mask_mark.done:
 45 |             return
 46 | 
 47 |         if event == cv2.EVENT_MOUSEMOVE:
 48 |             if create_mask_mark.sel_idx is not None:
 49 |                 create_mask_mark.pts[create_mask_mark.sel_idx] = (x, y)
 50 |         elif event == cv2.EVENT_LBUTTONDOWN:
 51 |             idx, dist = closest_point(np.array((x, y)), create_mask_mark.pts)
 52 |             if dist < 10:
 53 |                 create_mask_mark.sel_idx = idx
 54 |         elif event == cv2.EVENT_LBUTTONUP:
 55 |             create_mask_mark.sel_idx = None
 56 | 
 57 |     masks = []
 58 |     idx = []
 59 |     if os.path.exists(MASK_PTS_FILE):
 60 |         masks = pickle.load(open(MASK_PTS_FILE, 'rb'))
 61 |         idx = [i for (i, d) in enumerate(masks) if d['file'] == png_image]
 62 |         if len(idx) > 0:
 63 |             create_mask_mark.pts = masks[idx[0]]['pts']
 64 |     else:
 65 |         pass
 66 | 
 67 |     img = cv2.imread(png_image, cv2.IMREAD_UNCHANGED)
 68 |     cv2.imshow(window, img)
 69 |     cv2.waitKey(1)
 70 |     cv2.setMouseCallback(window, on_mouse)
 71 |     print('Press ESC to finish Adjust.')
 72 | 
 73 |     while not create_mask_mark.done:
 74 |         canvas = np.copy(img)
 75 |         for pt in create_mask_mark.pts:
 76 |             canvas = cv2.circle(canvas, (pt[0], pt[1]), 4, (0, 255, 0), -1)
 77 | 
 78 |         tri_mask_pts = get_tri_mask_points(create_mask_mark.pts, TRI_MASK_IDX)
 79 |         for tri in tri_mask_pts:
 80 |             tri = tri.reshape(3, 2)
 81 |             canvas = cv2.polylines(canvas, [tri.astype(np.int32)], True, (0, 255, 0), 2)
 82 | 
 83 |         cv2.imshow(window, canvas)
 84 |         if cv2.waitKey(50) == 27:
 85 |             create_mask_mark.done = True
 86 | 
 87 |     print('Any KEY to continue.')
 88 |     cv2.imshow(window, canvas)
 89 |     cv2.waitKey(0)
 90 |     cv2.destroyAllWindows()
 91 | 
 92 |     if len(idx) > 0:
 93 |         masks[idx[0]]['pts'] = create_mask_mark.pts
 94 |     else:
 95 |         masks.append({'file': png_image, 'pts': create_mask_mark.pts})
 96 |     pickle.dump(masks, open(MASK_PTS_FILE, 'wb'))
 97 | 
 98 | 
 99 | class FaceMasker:
100 |     def __init__(self, mask_pts_file=MASK_PTS_FILE):
101 |         self.masks_pts_file = mask_pts_file
102 | 
103 |         self.num_pts = 9
104 |         self.tri_mask_idx = TRI_MASK_IDX
105 |         self.tri_face_idx = DEFAULT_TRI_FACE_IDX
106 |         self.masks = None
107 |         self.load_mask()
108 | 
109 |     def load_mask(self):
110 |         masks = pickle.load(open(self.masks_pts_file, 'rb'))
111 | 
112 |         self.masks = []
113 |         for m in masks:
114 |             img = cv2.imread(m['file'], cv2.IMREAD_UNCHANGED)
115 |             img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA)
116 |             self.masks.append(
117 |                 {
118 |                     'img': img,
119 |                     'pts': m['pts'],
120 |                     'tri': get_tri_mask_points(m['pts'], self.tri_mask_idx)
121 |                 }
122 |             )
123 | 
124 |     def get_tri_face_points(self, shape_pts):
125 |         tri_face = np.zeros((len(self.tri_face_idx), 6), dtype=np.float32)
126 |         for i in range(len(self.tri_face_idx)):
127 |             for j in range(3):
128 |                 pt = shape_pts[self.tri_face_idx[i][j]]
129 |                 if hasattr(pt, 'x') and hasattr(pt, 'y'):
130 |                     tri_face[i, [j+j, j+j+1]] = pt.x, pt.y
131 |                 else:
132 |                     tri_face[i, [j+j, j+j+1]] = pt[0], pt[1]
133 |         return tri_face
134 | 
135 |     def wear_mask_to_face(self, image, face_shape, mask_idx=None):
136 |         if mask_idx is None:
137 |             mask_idx = random.randint(0, len(self.masks)-1)
138 | 
139 |         image_mask = self.masks[mask_idx]['img']
140 |         tri_mask_pts = self.masks[mask_idx]['tri']
141 |         tri_face = self.get_tri_face_points(face_shape)
142 | 
143 |         image_face = Image.fromarray(image)
144 |         for pts1, pts2 in zip(tri_mask_pts, tri_face):
145 |             pts1 = pts1.copy().reshape(3, 2)
146 |             pts2 = pts2.copy().reshape(3, 2)
147 | 
148 |             rect1 = cv2.boundingRect(pts1)
149 |             pts1[:, 0] = pts1[:, 0] - rect1[0]
150 |             pts1[:, 1] = pts1[:, 1] - rect1[1]
151 | 
152 |             croped_tri_mask = image_mask[rect1[1]:rect1[1]+rect1[3], rect1[0]:rect1[0]+rect1[2]]
153 | 
154 |             rect2 = cv2.boundingRect(pts2)
155 |             pts2[:, 0] = pts2[:, 0] - rect2[0]
156 |             pts2[:, 1] = pts2[:, 1] - rect2[1]
157 | 
158 |             mask_croped = np.zeros((rect2[3], rect2[2]), np.uint8)
159 |             cv2.fillConvexPoly(mask_croped, pts2.astype(np.int32), 255)
160 | 
161 |             M = cv2.getAffineTransform(pts1, pts2)
162 |             warped = cv2.warpAffine(croped_tri_mask, M, (rect2[2], rect2[3]))
163 |             warped = cv2.bitwise_and(warped, warped, mask=mask_croped)
164 | 
165 |             warped = Image.fromarray(warped)
166 |             image_face.paste(warped, (rect2[0], rect2[1]), warped)
167 | 
168 |         return np.array(image_face)
169 | 
170 | 
171 | if __name__ == '__main__':
172 |     create_mask_mark('./mask_images/01_surgical_light_blue.png')
173 |     create_mask_mark('./mask_images/02_cloth.png')
174 |     create_mask_mark('./mask_images/03_surgical_white.png')
175 |     create_mask_mark('./mask_images/04_surgical_blue.png')
176 | 
177 |     masker = FaceMasker()
178 | 


--------------------------------------------------------------------------------
/ibug/face_detection/retina_face/retina_face_predictor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import numpy as np
  4 | from copy import deepcopy
  5 | from types import SimpleNamespace
  6 | from typing import Union, Optional
  7 | from .prior_box import PriorBox
  8 | from .py_cpu_nms import py_cpu_nms
  9 | from .retina_face import RetinaFace
 10 | from .config import cfg_mnet, cfg_re50
 11 | from .box_utils import decode, decode_landm
 12 | 
 13 | 
 14 | __all__ = ['RetinaFacePredictor']
 15 | 
 16 | 
 17 | class RetinaFacePredictor(object):
 18 |     def __init__(self, threshold: float = 0.8, device: Union[str, torch.device] = 'cuda:0', model: Optional[SimpleNamespace] = None, config: Optional[SimpleNamespace] = None) -> None:
 19 |         self.threshold = threshold
 20 |         self.device = device
 21 |         if model is None:
 22 |             model = RetinaFacePredictor.get_model()
 23 |         if config is None:
 24 |             config = RetinaFacePredictor.create_config()
 25 |         self.config = SimpleNamespace(**model.config.__dict__, **config.__dict__)
 26 |         self.net = RetinaFace(cfg=self.config.__dict__, phase='test').to(self.device)
 27 |         pretrained_dict = torch.load(model.weights, map_location=self.device)
 28 |         if 'state_dict' in pretrained_dict.keys():
 29 |             pretrained_dict = {key.split('module.', 1)[-1] if key.startswith('module.') else key: value for key, value in pretrained_dict['state_dict'].items()}
 30 |         else:
 31 |             pretrained_dict = {key.split('module.', 1)[-1] if key.startswith('module.') else key: value for key, value in pretrained_dict.items()}
 32 |         self.net.load_state_dict(pretrained_dict, strict=False)
 33 |         self.net.eval()
 34 |         self.priors = None
 35 |         self.previous_size = None
 36 | 
 37 |     @staticmethod
 38 |     def get_model(name: str = 'resnet50') -> SimpleNamespace:
 39 |         name = name.lower().strip()
 40 |         if name == 'resnet50':
 41 |             return SimpleNamespace(weights=os.path.realpath(os.path.join(os.path.dirname(__file__), 'weights', 'Resnet50_Final.pth')),  config=SimpleNamespace(**deepcopy(cfg_re50)))
 42 |         elif name == 'mobilenet0.25':
 43 |             return SimpleNamespace(weights=os.path.realpath(os.path.join(os.path.dirname(__file__), 'weights', 'mobilenet0.25_Final.pth')), config=SimpleNamespace(**deepcopy(cfg_mnet)))
 44 |         else:
 45 |             raise ValueError('name must be set to either resnet50 or mobilenet0.25')
 46 | 
 47 |     @staticmethod
 48 |     def create_config(top_k: int = 750, conf_thresh: float = 0.02, nms_thresh: float = 0.4, nms_top_k: int = 5000) -> SimpleNamespace:
 49 |         return SimpleNamespace(top_k=top_k, conf_thresh=conf_thresh, nms_thresh=nms_thresh, nms_top_k=nms_top_k)
 50 | 
 51 |     @torch.no_grad()
 52 |     def __call__(self, image: np.ndarray, rgb: bool = True) -> np.ndarray:
 53 |         im_height, im_width, _ = image.shape
 54 |         if rgb:
 55 |             image = image[..., ::-1]
 56 |         image = image.astype(int) - np.array([104, 117, 123])
 57 |         image = image.transpose(2, 0, 1)
 58 |         image = torch.from_numpy(image).unsqueeze(0).float().to(self.device)
 59 |         scale = torch.Tensor([im_width, im_height, im_width, im_height]).to(self.device)
 60 |         loc, conf, landms = self.net(image)
 61 | 
 62 |         # #############################################################################
 63 |         # import onnx
 64 |         # from onnxsim import simplify
 65 |         # RESOLUTION = [
 66 |         #     # [192,320],
 67 |         #     # [240,320],
 68 |         #     # [320,480],
 69 |         #     # [360,640],
 70 |         #     [480,640],
 71 |         #     # [720,1280],
 72 |         # ]
 73 |         # MODEL = f'retinaface'
 74 |         # for H, W in RESOLUTION:
 75 |         #     onnx_file = f"{MODEL}_{H}x{W}.onnx"
 76 |         #     x = torch.randn(1, 3, H, W).cuda()
 77 |         #     torch.onnx.export(
 78 |         #         self.net,
 79 |         #         args=(x),
 80 |         #         f=onnx_file,
 81 |         #         opset_version=11,
 82 |         #         input_names = ['input'],
 83 |         #         output_names=['boxes','scores','landmarks'],
 84 |         #     )
 85 |         #     model_onnx1 = onnx.load(onnx_file)
 86 |         #     model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1)
 87 |         #     onnx.save(model_onnx1, onnx_file)
 88 | 
 89 |         #     model_onnx2 = onnx.load(onnx_file)
 90 |         #     model_simp, check = simplify(model_onnx2)
 91 |         #     onnx.save(model_simp, onnx_file)
 92 | 
 93 |         # # onnx_file = f"{MODEL}_HxW.onnx"
 94 |         # # x = torch.randn(1, 3, 192, 320).cuda()
 95 |         # # torch.onnx.export(
 96 |         # #     self.model.module,
 97 |         # #     args=(x),
 98 |         # #     f=onnx_file,
 99 |         # #     opset_version=11,
100 |         # #     input_names = ['input'],
101 |         # #     #output_names=['lines','scores'],
102 |         # #     dynamic_axes={
103 |         # #         'input' : {2: 'height', 3: 'width'},
104 |         # #     }
105 |         # # )
106 |         # # model_onnx1 = onnx.load(onnx_file)
107 |         # # model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1)
108 |         # # onnx.save(model_onnx1, onnx_file)
109 | 
110 |         # import sys
111 |         # sys.exit(0)
112 |         # #############################################################################
113 | 
114 | 
115 |         image_size = (im_height, im_width)
116 |         if self.priors is None or self.previous_size != image_size:
117 |             self.priors = PriorBox(self.config.__dict__, image_size=image_size).forward().to(self.device)
118 |             self.previous_size = image_size
119 |         prior_data = self.priors.data
120 |         boxes = decode(loc.data.squeeze(0), prior_data, self.config.variance)
121 |         boxes = boxes * scale
122 |         boxes = boxes.cpu().numpy()
123 |         # scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
124 |         scores = conf.squeeze(0).data.cpu().numpy()
125 |         landms = decode_landm(landms.data.squeeze(0), prior_data, self.config.variance)
126 |         scale1 = torch.Tensor(
127 |             [
128 |                 image.shape[3],
129 |                 image.shape[2],
130 |                 image.shape[3],
131 |                 image.shape[2],
132 |                 image.shape[3],
133 |                 image.shape[2],
134 |                 image.shape[3],
135 |                 image.shape[2],
136 |                 image.shape[3],
137 |                 image.shape[2]
138 |             ]
139 |         ).to(self.device)
140 |         landms = landms * scale1
141 |         landms = landms.cpu().numpy()
142 | 
143 |         # ignore low scores
144 |         inds = np.where(scores > self.config.conf_thresh)[0]
145 |         if len(inds) == 0:
146 |             return np.empty(shape=(0, 15), dtype=np.float32)
147 |         boxes = boxes[inds]
148 |         landms = landms[inds]
149 |         scores = scores[inds]
150 | 
151 |         # do NMS
152 |         dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
153 |         keep = py_cpu_nms(dets, self.config.nms_thresh, self.config.nms_top_k)
154 |         dets = dets[keep, :]
155 |         landms = landms[keep]
156 | 
157 |         """
158 |         dets.shape (1, 5)
159 |         x1,y1,x2,y2,score
160 | 
161 |         landms.shape (1, 10)
162 |         """
163 | 
164 |         # keep top-K
165 |         dets = dets[:self.config.top_k, :]
166 |         landms = landms[:self.config.top_k, :]
167 |         dets = np.concatenate((dets, landms), axis=1)
168 | 
169 |         # further filter by confidence
170 |         inds = np.where(dets[:, 4] >= self.threshold)[0]
171 |         if len(inds) == 0:
172 |             return np.empty(shape=(0, 15), dtype=np.float32)
173 |         else:
174 |             return dets[inds]
175 | 


--------------------------------------------------------------------------------
/ibug/face_alignment/fan/fan.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | def conv3x3(in_planes, out_planes, strd=1, padding=1, bias=False):
  7 |     "3x3 convolution with padding"
  8 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=strd, padding=padding, bias=bias)
  9 | 
 10 | 
 11 | class ConvBlock(nn.Module):
 12 |     def __init__(self, in_planes, out_planes, use_instance_norm):
 13 |         super(ConvBlock, self).__init__()
 14 |         self.bn1 = nn.InstanceNorm2d(in_planes) if use_instance_norm else nn.BatchNorm2d(in_planes)
 15 |         self.conv1 = conv3x3(in_planes, int(out_planes / 2))
 16 |         self.bn2 = (nn.InstanceNorm2d(int(out_planes / 2)) if use_instance_norm
 17 |                     else nn.BatchNorm2d(int(out_planes / 2)))
 18 |         self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4))
 19 |         self.bn3 = (nn.InstanceNorm2d(int(out_planes / 4)) if use_instance_norm
 20 |                     else nn.BatchNorm2d(int(out_planes / 4)))
 21 |         self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4))
 22 | 
 23 |         if in_planes != out_planes:
 24 |             self.downsample = nn.Sequential(nn.InstanceNorm2d(in_planes) if use_instance_norm
 25 |                                             else nn.BatchNorm2d(in_planes),
 26 |                                             nn.ReLU(True),
 27 |                                             nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, bias=False))
 28 |         else:
 29 |             self.downsample = None
 30 | 
 31 |     def forward(self, x):
 32 |         residual = x
 33 | 
 34 |         out1 = self.bn1(x)
 35 |         out1 = F.relu(out1, True)
 36 |         out1 = self.conv1(out1)
 37 | 
 38 |         out2 = self.bn2(out1)
 39 |         out2 = F.relu(out2, True)
 40 |         out2 = self.conv2(out2)
 41 | 
 42 |         out3 = self.bn3(out2)
 43 |         out3 = F.relu(out3, True)
 44 |         out3 = self.conv3(out3)
 45 | 
 46 |         out3 = torch.cat((out1, out2, out3), 1)
 47 | 
 48 |         if self.downsample is not None:
 49 |             residual = self.downsample(residual)
 50 | 
 51 |         out3 += residual
 52 | 
 53 |         return out3
 54 | 
 55 | 
 56 | class HourGlass(nn.Module):
 57 |     def __init__(self, config):
 58 |         super(HourGlass, self).__init__()
 59 |         self.config = config
 60 | 
 61 |         self._generate_network(self.config.hg_depth)
 62 | 
 63 |     def _generate_network(self, level):
 64 |         self.add_module('b1_' + str(level), ConvBlock(self.config.hg_num_features,
 65 |                                                       self.config.hg_num_features,
 66 |                                                       self.config.use_instance_norm))
 67 | 
 68 |         self.add_module('b2_' + str(level), ConvBlock(self.config.hg_num_features,
 69 |                                                       self.config.hg_num_features,
 70 |                                                       self.config.use_instance_norm))
 71 | 
 72 |         if level > 1:
 73 |             self._generate_network(level - 1)
 74 |         else:
 75 |             self.add_module('b2_plus_' + str(level),ConvBlock(self.config.hg_num_features,
 76 |                                                               self.config.hg_num_features,
 77 |                                                               self.config.use_instance_norm))
 78 | 
 79 |         self.add_module('b3_' + str(level), ConvBlock(self.config.hg_num_features,
 80 |                                                       self.config.hg_num_features,
 81 |                                                       self.config.use_instance_norm))
 82 | 
 83 |     def _forward(self, level, inp):
 84 |         up1 = inp
 85 |         up1 = self._modules['b1_' + str(level)](up1)
 86 | 
 87 |         if self.config.use_avg_pool:
 88 |             low1 = F.avg_pool2d(inp, 2)
 89 |         else:
 90 |             low1 = F.max_pool2d(inp, 2)
 91 |         low1 = self._modules['b2_' + str(level)](low1)
 92 | 
 93 |         if level > 1:
 94 |             low2 = self._forward(level - 1, low1)
 95 |         else:
 96 |             low2 = low1
 97 |             low2 = self._modules['b2_plus_' + str(level)](low2)
 98 | 
 99 |         low3 = low2
100 |         low3 = self._modules['b3_' + str(level)](low3)
101 | 
102 |         up2 = F.interpolate(low3, scale_factor=2, mode='nearest')
103 | 
104 |         return up1 + up2
105 | 
106 |     def forward(self, x):
107 |         return self._forward(self.config.hg_depth, x)
108 | 
109 | 
110 | class FAN(nn.Module):
111 |     def __init__(self, config):
112 |         super(FAN, self).__init__()
113 |         self.config = config
114 | 
115 |         # Stem
116 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=self.config.stem_conv_kernel_size,
117 |                                stride=self.config.stem_conv_stride,
118 |                                padding=self.config.stem_conv_kernel_size // 2)
119 |         self.bn1 = nn.InstanceNorm2d(64) if self.config.use_instance_norm else nn.BatchNorm2d(64)
120 |         self.conv2 = ConvBlock(64, 128, self.config.use_instance_norm)
121 |         self.conv3 = ConvBlock(128, 128, self.config.use_instance_norm)
122 |         self.conv4 = ConvBlock(128, self.config.hg_num_features, self.config.use_instance_norm)
123 | 
124 |         # Hourglasses
125 |         for hg_module in range(self.config.num_modules):
126 |             self.add_module('m' + str(hg_module), HourGlass(self.config))
127 |             self.add_module('top_m_' + str(hg_module), ConvBlock(self.config.hg_num_features,
128 |                                                                  self.config.hg_num_features,
129 |                                                                  self.config.use_instance_norm))
130 |             self.add_module('conv_last' + str(hg_module), nn.Conv2d(self.config.hg_num_features,
131 |                                                                     self.config.hg_num_features,
132 |                                                                     kernel_size=1, stride=1, padding=0))
133 |             self.add_module('bn_end' + str(hg_module),
134 |                             nn.InstanceNorm2d(self.config.hg_num_features) if self.config.use_instance_norm
135 |                             else nn.BatchNorm2d(self.config.hg_num_features))
136 |             self.add_module('l' + str(hg_module), nn.Conv2d(self.config.hg_num_features,
137 |                                                             self.config.num_landmarks,
138 |                                                             kernel_size=1, stride=1, padding=0))
139 | 
140 |             if hg_module < self.config.num_modules - 1:
141 |                 self.add_module('bl' + str(hg_module), nn.Conv2d(self.config.hg_num_features,
142 |                                                                  self.config.hg_num_features,
143 |                                                                  kernel_size=1, stride=1, padding=0))
144 |                 self.add_module('al' + str(hg_module), nn.Conv2d(self.config.num_landmarks,
145 |                                                                  self.config.hg_num_features,
146 |                                                                  kernel_size=1, stride=1, padding=0))
147 | 
148 |     def forward(self, x):
149 |         x = self.conv2(F.relu(self.bn1(self.conv1(x)), True))
150 |         if self.config.stem_pool_kernel_size > 1:
151 |             if self.config.use_avg_pool:
152 |                 x = F.avg_pool2d(x, self.config.stem_pool_kernel_size)
153 |             else:
154 |                 x = F.max_pool2d(x, self.config.stem_pool_kernel_size)
155 |         x = self.conv3(x)
156 |         x = self.conv4(x)
157 | 
158 |         previous = x
159 |         hg_feats = []
160 |         tmp_out = None
161 |         for i in range(self.config.num_modules):
162 |             hg = self._modules['m' + str(i)](previous)
163 | 
164 |             ll = hg
165 |             ll = self._modules['top_m_' + str(i)](ll)
166 | 
167 |             ll = F.relu(self._modules['bn_end' + str(i)](self._modules['conv_last' + str(i)](ll)), True)
168 | 
169 |             # Predict heatmaps
170 |             tmp_out = self._modules['l' + str(i)](ll)
171 | 
172 |             if i < self.config.num_modules - 1:
173 |                 ll = self._modules['bl' + str(i)](ll)
174 |                 tmp_out_ = self._modules['al' + str(i)](tmp_out)
175 |                 previous = previous + ll + tmp_out_
176 | 
177 |             hg_feats.append(ll)
178 | 
179 |         return tmp_out, x, tuple(hg_feats)
180 | 


--------------------------------------------------------------------------------
/ibug/face_alignment/fan/fan_custom.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | def conv3x3(in_planes, out_planes, strd=1, padding=1, bias=False):
  7 |     "3x3 convolution with padding"
  8 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=strd, padding=padding, bias=bias)
  9 | 
 10 | 
 11 | class ConvBlock(nn.Module):
 12 |     def __init__(self, in_planes, out_planes, use_instance_norm):
 13 |         super(ConvBlock, self).__init__()
 14 |         self.bn1 = nn.InstanceNorm2d(in_planes) if use_instance_norm else nn.BatchNorm2d(in_planes)
 15 |         self.conv1 = conv3x3(in_planes, int(out_planes / 2))
 16 |         self.bn2 = (nn.InstanceNorm2d(int(out_planes / 2)) if use_instance_norm
 17 |                     else nn.BatchNorm2d(int(out_planes / 2)))
 18 |         self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4))
 19 |         self.bn3 = (nn.InstanceNorm2d(int(out_planes / 4)) if use_instance_norm
 20 |                     else nn.BatchNorm2d(int(out_planes / 4)))
 21 |         self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4))
 22 | 
 23 |         if in_planes != out_planes:
 24 |             self.downsample = nn.Sequential(nn.InstanceNorm2d(in_planes) if use_instance_norm
 25 |                                             else nn.BatchNorm2d(in_planes),
 26 |                                             nn.ReLU(True),
 27 |                                             nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, bias=False))
 28 |         else:
 29 |             self.downsample = None
 30 | 
 31 |     def forward(self, x):
 32 |         residual = x
 33 | 
 34 |         out1 = self.bn1(x)
 35 |         out1 = F.relu(out1, True)
 36 |         out1 = self.conv1(out1)
 37 | 
 38 |         out2 = self.bn2(out1)
 39 |         out2 = F.relu(out2, True)
 40 |         out2 = self.conv2(out2)
 41 | 
 42 |         out3 = self.bn3(out2)
 43 |         out3 = F.relu(out3, True)
 44 |         out3 = self.conv3(out3)
 45 | 
 46 |         out3 = torch.cat((out1, out2, out3), 1)
 47 | 
 48 |         if self.downsample is not None:
 49 |             residual = self.downsample(residual)
 50 | 
 51 |         out3 += residual
 52 | 
 53 |         return out3
 54 | 
 55 | 
 56 | class HourGlass(nn.Module):
 57 |     def __init__(self, config):
 58 |         super(HourGlass, self).__init__()
 59 |         self.config = config
 60 | 
 61 |         self._generate_network(self.config.hg_depth)
 62 | 
 63 |     def _generate_network(self, level):
 64 |         self.add_module('b1_' + str(level), ConvBlock(self.config.hg_num_features,
 65 |                                                       self.config.hg_num_features,
 66 |                                                       self.config.use_instance_norm))
 67 | 
 68 |         self.add_module('b2_' + str(level), ConvBlock(self.config.hg_num_features,
 69 |                                                       self.config.hg_num_features,
 70 |                                                       self.config.use_instance_norm))
 71 | 
 72 |         if level > 1:
 73 |             self._generate_network(level - 1)
 74 |         else:
 75 |             self.add_module('b2_plus_' + str(level),ConvBlock(self.config.hg_num_features,
 76 |                                                               self.config.hg_num_features,
 77 |                                                               self.config.use_instance_norm))
 78 | 
 79 |         self.add_module('b3_' + str(level), ConvBlock(self.config.hg_num_features,
 80 |                                                       self.config.hg_num_features,
 81 |                                                       self.config.use_instance_norm))
 82 | 
 83 |     def _forward(self, level, inp):
 84 |         up1 = inp
 85 |         up1 = self._modules['b1_' + str(level)](up1)
 86 | 
 87 |         if self.config.use_avg_pool:
 88 |             low1 = F.avg_pool2d(inp, 2)
 89 |         else:
 90 |             low1 = F.max_pool2d(inp, 2)
 91 |         low1 = self._modules['b2_' + str(level)](low1)
 92 | 
 93 |         if level > 1:
 94 |             low2 = self._forward(level - 1, low1)
 95 |         else:
 96 |             low2 = low1
 97 |             low2 = self._modules['b2_plus_' + str(level)](low2)
 98 | 
 99 |         low3 = low2
100 |         low3 = self._modules['b3_' + str(level)](low3)
101 | 
102 |         up2 = F.interpolate(low3, scale_factor=2, mode='nearest')
103 | 
104 |         return up1 + up2
105 | 
106 |     def forward(self, x):
107 |         return self._forward(self.config.hg_depth, x)
108 | 
109 | 
110 | class FAN(nn.Module):
111 |     def __init__(self, config):
112 |         super(FAN, self).__init__()
113 |         self.config = config
114 | 
115 |         # Stem
116 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=self.config.stem_conv_kernel_size,
117 |                                stride=self.config.stem_conv_stride,
118 |                                padding=self.config.stem_conv_kernel_size // 2)
119 |         self.bn1 = nn.InstanceNorm2d(64) if self.config.use_instance_norm else nn.BatchNorm2d(64)
120 |         self.conv2 = ConvBlock(64, 128, self.config.use_instance_norm)
121 |         self.conv3 = ConvBlock(128, 128, self.config.use_instance_norm)
122 |         self.conv4 = ConvBlock(128, self.config.hg_num_features, self.config.use_instance_norm)
123 | 
124 |         # Hourglasses
125 |         for hg_module in range(self.config.num_modules):
126 |             self.add_module('m' + str(hg_module), HourGlass(self.config))
127 |             self.add_module('top_m_' + str(hg_module), ConvBlock(self.config.hg_num_features,
128 |                                                                  self.config.hg_num_features,
129 |                                                                  self.config.use_instance_norm))
130 |             self.add_module('conv_last' + str(hg_module), nn.Conv2d(self.config.hg_num_features,
131 |                                                                     self.config.hg_num_features,
132 |                                                                     kernel_size=1, stride=1, padding=0))
133 |             self.add_module('bn_end' + str(hg_module),
134 |                             nn.InstanceNorm2d(self.config.hg_num_features) if self.config.use_instance_norm
135 |                             else nn.BatchNorm2d(self.config.hg_num_features))
136 |             self.add_module('l' + str(hg_module), nn.Conv2d(self.config.hg_num_features,
137 |                                                             self.config.num_landmarks,
138 |                                                             kernel_size=1, stride=1, padding=0))
139 | 
140 |             if hg_module < self.config.num_modules - 1:
141 |                 self.add_module('bl' + str(hg_module), nn.Conv2d(self.config.hg_num_features,
142 |                                                                  self.config.hg_num_features,
143 |                                                                  kernel_size=1, stride=1, padding=0))
144 |                 self.add_module('al' + str(hg_module), nn.Conv2d(self.config.num_landmarks,
145 |                                                                  self.config.hg_num_features,
146 |                                                                  kernel_size=1, stride=1, padding=0))
147 | 
148 |     def forward(self, x):
149 |         x = self.conv2(F.relu(self.bn1(self.conv1(x)), True))
150 |         if self.config.stem_pool_kernel_size > 1:
151 |             if self.config.use_avg_pool:
152 |                 x = F.avg_pool2d(x, self.config.stem_pool_kernel_size)
153 |             else:
154 |                 x = F.max_pool2d(x, self.config.stem_pool_kernel_size)
155 |         x = self.conv3(x)
156 |         x = self.conv4(x)
157 | 
158 |         previous = x
159 |         hg_feats = []
160 |         tmp_out = None
161 |         for i in range(self.config.num_modules):
162 |             hg = self._modules['m' + str(i)](previous)
163 | 
164 |             ll = hg
165 |             ll = self._modules['top_m_' + str(i)](ll)
166 | 
167 |             ll = F.relu(self._modules['bn_end' + str(i)](self._modules['conv_last' + str(i)](ll)), True)
168 | 
169 |             # Predict heatmaps
170 |             tmp_out = self._modules['l' + str(i)](ll)
171 | 
172 |             if i < self.config.num_modules - 1:
173 |                 ll = self._modules['bl' + str(i)](ll)
174 |                 tmp_out_ = self._modules['al' + str(i)](tmp_out)
175 |                 previous = previous + ll + tmp_out_
176 | 
177 |             hg_feats.append(ll)
178 | 
179 |         # return tmp_out, x, tuple(hg_feats)
180 |         return self._decode(tmp_out), tmp_out
181 | 
182 | 
183 | 
184 |     def _decode(self, heatmaps: torch.Tensor):
185 |         heatmaps = heatmaps.contiguous()
186 |         scores = heatmaps.max(dim=3)[0].max(dim=2)[0]
187 | 
188 |         if (self.config.radius ** 2 * heatmaps.shape[2] * heatmaps.shape[3] <
189 |                 heatmaps.shape[2] ** 2 + heatmaps.shape[3] ** 2):
190 |             # Find peaks in all heatmaps
191 |             m = heatmaps.view(heatmaps.shape[0] * heatmaps.shape[1], -1).argmax(1)
192 |             # all_peaks = torch.cat(
193 |             #     [(m / heatmaps.shape[3]).trunc().view(-1, 1), (m % heatmaps.shape[3]).view(-1, 1)], dim=1
194 |             # ).reshape((heatmaps.shape[0], heatmaps.shape[1], 1, 1, 2)).repeat(
195 |             #     1, 1, heatmaps.shape[2], heatmaps.shape[3], 1).float()
196 |             all_peaks = torch.cat(
197 |                 [torch.div(m, heatmaps.shape[3], rounding_mode="trunc").view(-1, 1), (m % heatmaps.shape[3]).view(-1, 1)], dim=1
198 |             ).reshape((heatmaps.shape[0], heatmaps.shape[1], 1, 1, 2)).repeat(
199 |                 1, 1, heatmaps.shape[2], heatmaps.shape[3], 1).float()
200 | 
201 | 
202 |             # Apply masks created from the peaks
203 |             all_indices = torch.zeros_like(all_peaks) + torch.stack(
204 |                 [
205 |                     torch.arange(0.0, all_peaks.shape[2], device=all_peaks.device).unsqueeze(-1).repeat(1, all_peaks.shape[3]),
206 |                     torch.arange(0.0, all_peaks.shape[3], device=all_peaks.device).unsqueeze(0).repeat(all_peaks.shape[2], 1)
207 |                 ], dim=-1)
208 |             heatmaps = heatmaps * ((all_indices - all_peaks).norm(dim=-1) <= self.config.radius *
209 |                                    (heatmaps.shape[2] * heatmaps.shape[3]) ** 0.5).float()
210 | 
211 |         # Prepare the indices for calculating centroids
212 |         x_indices = (torch.zeros((*heatmaps.shape[:2], heatmaps.shape[3]), device=heatmaps.device) + torch.arange(0.5, heatmaps.shape[3], device=heatmaps.device))
213 |         y_indices = (torch.zeros(heatmaps.shape[:3], device=heatmaps.device) + torch.arange(0.5, heatmaps.shape[2], device=heatmaps.device))
214 | 
215 |         # Finally, find centroids as landmark locations
216 |         heatmaps = heatmaps.clamp_min(0.0)
217 |         if self.config.gamma != 1.0:
218 |             heatmaps = heatmaps.pow(self.config.gamma)
219 |         m00s = heatmaps.sum(dim=(2, 3)).clamp_min(torch.finfo(heatmaps.dtype).eps)
220 |         xs = heatmaps.sum(dim=2).mul(x_indices).sum(dim=2).div(m00s)
221 |         ys = heatmaps.sum(dim=3).mul(y_indices).sum(dim=2).div(m00s)
222 | 
223 |         lm_info = torch.stack((xs, ys, scores), dim=-1)#.cpu().numpy()
224 |         # return lm_info[..., :-1], lm_info[..., -1]
225 |         return lm_info


--------------------------------------------------------------------------------
/ibug/face_alignment/fan/fan_predictor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import torch
  4 | import numpy as np
  5 | from types import SimpleNamespace
  6 | from typing import Union, Optional, Tuple
  7 | # from .fan_custom import FAN
  8 | from ibug.face_alignment.fan.fan import FAN
  9 | 
 10 | 
 11 | __all__ = ['FANPredictor']
 12 | 
 13 | 
 14 | class FANPredictor(object):
 15 |     def __init__(self, device: Union[str, torch.device] = 'cuda:0', model: Optional[SimpleNamespace] = None,
 16 |                  config: Optional[SimpleNamespace] = None) -> None:
 17 |         self.device = device
 18 |         if model is None:
 19 |             model = FANPredictor.get_model()
 20 |         if config is None:
 21 |             config = FANPredictor.create_config()
 22 |         self.config = SimpleNamespace(**model.config.__dict__, **config.__dict__)
 23 |         self.net = FAN(config=self.config).to(self.device)
 24 |         self.net.load_state_dict(torch.load(model.weights, map_location=self.device))
 25 |         self.net.eval()
 26 |         if self.config.use_jit:
 27 |             self.net = torch.jit.trace(self.net, torch.rand(1, 3, self.config.input_size,
 28 |                                                             self.config.input_size).to(self.device))
 29 | 
 30 |     @staticmethod
 31 |     def get_model(name: str = '2dfan2') -> SimpleNamespace:
 32 |         name = name.lower()
 33 |         if name == '2dfan2':
 34 |             return SimpleNamespace(weights=os.path.join(os.path.dirname(__file__), 'weights', '2dfan2.pth'),
 35 |                                    config=SimpleNamespace(crop_ratio=0.55, input_size=256, num_modules=2,
 36 |                                                           hg_num_features=256, hg_depth=4, use_avg_pool=False,
 37 |                                                           use_instance_norm=False, stem_conv_kernel_size=7,
 38 |                                                           stem_conv_stride=2, stem_pool_kernel_size=2,
 39 |                                                           num_landmarks=68))
 40 |         elif name == '2dfan4':
 41 |             return SimpleNamespace(weights=os.path.join(os.path.dirname(__file__), 'weights', '2dfan4.pth'),
 42 |                                    config=SimpleNamespace(crop_ratio=0.55, input_size=256, num_modules=4,
 43 |                                                           hg_num_features=256, hg_depth=4, use_avg_pool=True,
 44 |                                                           use_instance_norm=False, stem_conv_kernel_size=7,
 45 |                                                           stem_conv_stride=2, stem_pool_kernel_size=2,
 46 |                                                           num_landmarks=68))
 47 |         elif name == '2dfan2_alt':
 48 |             return SimpleNamespace(weights=os.path.join(os.path.dirname(__file__), 'weights', '2dfan2_alt.pth'),
 49 |                                    config=SimpleNamespace(crop_ratio=0.55, input_size=256, num_modules=2,
 50 |                                                           hg_num_features=256, hg_depth=4, use_avg_pool=False,
 51 |                                                           use_instance_norm=False, stem_conv_kernel_size=7,
 52 |                                                           stem_conv_stride=2, stem_pool_kernel_size=2,
 53 |                                                           num_landmarks=68))
 54 |         else:
 55 |             raise ValueError('name must be set to either 2dfan2, 2dfan4, or 2dfan2_alt')
 56 | 
 57 |     @staticmethod
 58 |     def create_config(gamma: float = 1.0, radius: float = 0.1, use_jit: bool = True) -> SimpleNamespace:
 59 |         return SimpleNamespace(gamma=gamma, radius=radius, use_jit=use_jit)
 60 | 
 61 |     @torch.no_grad()
 62 |     def __call__(self, image: np.ndarray, face_boxes: np.ndarray, rgb: bool = True,
 63 |                  return_features: bool = False) -> Union[Tuple[np.ndarray, np.ndarray],
 64 |                                                          Tuple[np.ndarray, np.ndarray, torch.Tensor]]:
 65 |         if face_boxes.size > 0:
 66 |             if not rgb:
 67 |                 image = image[..., ::-1]
 68 |             if face_boxes.ndim == 1:
 69 |                 face_boxes = face_boxes[np.newaxis, ...]
 70 | 
 71 |             # Crop the faces
 72 |             face_patches = []
 73 |             centres = (face_boxes[:, [0, 1]] + face_boxes[:, [2, 3]]) / 2.0
 74 |             face_sizes = (face_boxes[:, [3, 2]] - face_boxes[:, [1, 0]]).mean(axis=1)
 75 |             enlarged_face_box_sizes = (face_sizes / self.config.crop_ratio)[:, np.newaxis].repeat(2, axis=1)
 76 |             enlarged_face_boxes = np.zeros_like(face_boxes[:, :4])
 77 |             enlarged_face_boxes[:, :2] = np.round(centres - enlarged_face_box_sizes / 2.0)
 78 |             enlarged_face_boxes[:, 2:] = np.round(enlarged_face_boxes[:, :2] + enlarged_face_box_sizes) + 1
 79 |             enlarged_face_boxes = enlarged_face_boxes.astype(int)
 80 |             outer_bounding_box = np.hstack((enlarged_face_boxes[:, :2].min(axis=0),
 81 |                                             enlarged_face_boxes[:, 2:].max(axis=0)))
 82 |             pad_widths = np.zeros(shape=(3, 2), dtype=int)
 83 |             if outer_bounding_box[0] < 0:
 84 |                 pad_widths[1][0] = -outer_bounding_box[0]
 85 |             if outer_bounding_box[1] < 0:
 86 |                 pad_widths[0][0] = -outer_bounding_box[1]
 87 |             if outer_bounding_box[2] > image.shape[1]:
 88 |                 pad_widths[1][1] = outer_bounding_box[2] - image.shape[1]
 89 |             if outer_bounding_box[3] > image.shape[0]:
 90 |                 pad_widths[0][1] = outer_bounding_box[3] - image.shape[0]
 91 |             if np.any(pad_widths > 0):
 92 |                 image = np.pad(image, pad_widths)
 93 |             for left, top, right, bottom in enlarged_face_boxes:
 94 |                 left += pad_widths[1][0]
 95 |                 top += pad_widths[0][0]
 96 |                 right += pad_widths[1][0]
 97 |                 bottom += pad_widths[0][0]
 98 |                 face_patches.append(cv2.resize(image[top: bottom, left: right, :],
 99 |                                                (self.config.input_size, self.config.input_size)))
100 |             face_patches = torch.from_numpy(np.array(face_patches).transpose(
101 |                 (0, 3, 1, 2)).astype(np.float32)).to(self.device) / 255.0
102 | 
103 |             # Get heatmaps
104 |             heatmaps, stem_feats, hg_feats = self.net(face_patches)
105 |             # landmarks, landmark_scores = self.net(face_patches)
106 | 
107 |             # import onnx
108 |             # from onnxsim import simplify
109 |             # RESOLUTION = [
110 |             #     [256,256],
111 |             # ]
112 | 
113 |             # # MODEL = f'2dfan2_alt'
114 |             # MODEL = f'2dfan4'
115 | 
116 |             # for H, W in RESOLUTION:
117 |             #     onnx_file = f"{MODEL}_1x3x{H}x{W}.onnx"
118 |             #     x = torch.randn(1, 3, H, W).cuda()
119 |             #     torch.onnx.export(
120 |             #         self.net,
121 |             #         args=(x),
122 |             #         f=onnx_file,
123 |             #         opset_version=11,
124 |             #         input_names = ['input'],
125 |             #         output_names=['landmarks_xyscore','heatmaps'],
126 |             #     )
127 |             #     model_onnx1 = onnx.load(onnx_file)
128 |             #     model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1)
129 |             #     onnx.save(model_onnx1, onnx_file)
130 | 
131 |             #     model_onnx2 = onnx.load(onnx_file)
132 |             #     model_simp, check = simplify(model_onnx2)
133 |             #     onnx.save(model_simp, onnx_file)
134 | 
135 |             # onnx_file = f"{MODEL}_Nx3x{H}x{W}.onnx"
136 |             # x = torch.randn(1, 3, H, W).cuda()
137 |             # torch.onnx.export(
138 |             #     self.net,
139 |             #     args=(x),
140 |             #     f=onnx_file,
141 |             #     opset_version=11,
142 |             #     input_names = ['input'],
143 |             #     output_names=['landmarks_xyscore','heatmaps'],
144 |             #     dynamic_axes={
145 |             #         # 'input' : {2: 'height', 3: 'width'},
146 |             #         'input' : {0: 'N'},
147 |             #         # 'input' : {0: 'N', 2: 'height', 3: 'width'},
148 |             #         'landmarks_xyscore': {0: 'N'},
149 |             #         'heatmaps': {0: 'N'},
150 |             #     }
151 |             # )
152 |             # model_onnx1 = onnx.load(onnx_file)
153 |             # model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1)
154 |             # onnx.save(model_onnx1, onnx_file)
155 | 
156 |             # import sys
157 |             # sys.exit(0)
158 | 
159 | 
160 | 
161 |             # Get landmark coordinates and scores
162 |             landmarks, landmark_scores = self._decode(heatmaps)
163 | 
164 |             # Rectify landmark coordinates
165 |             hh, hw = heatmaps.size(2), heatmaps.size(3)
166 |             for landmark, (left, top, right, bottom) in zip(landmarks, enlarged_face_boxes):
167 |                 landmark[:, 0] = landmark[:, 0] * (right - left) / hw + left
168 |                 landmark[:, 1] = landmark[:, 1] * (bottom - top) / hh + top
169 | 
170 |             if return_features:
171 |                 return landmarks, landmark_scores, torch.cat((stem_feats, torch.cat(hg_feats, dim=1) * torch.sum(heatmaps, dim=1, keepdim=True)), dim=1)
172 |             else:
173 |                 return landmarks, landmark_scores
174 |         else:
175 |             landmarks = np.empty(shape=(0, 68, 2), dtype=np.float32)
176 |             landmark_scores = np.empty(shape=(0, 68), dtype=np.float32)
177 |             if return_features:
178 |                 return landmarks, landmark_scores, torch.Tensor([])
179 |             else:
180 |                 return landmarks, landmark_scores
181 | 
182 | 
183 | 
184 |     def _decode(self, heatmaps: torch.Tensor) -> Tuple[np.ndarray, np.ndarray]:
185 |         heatmaps = heatmaps.contiguous()
186 |         scores = heatmaps.max(dim=3)[0].max(dim=2)[0]
187 | 
188 |         if (self.config.radius ** 2 * heatmaps.shape[2] * heatmaps.shape[3] <
189 |                 heatmaps.shape[2] ** 2 + heatmaps.shape[3] ** 2):
190 |             # Find peaks in all heatmaps
191 |             m = heatmaps.view(heatmaps.shape[0] * heatmaps.shape[1], -1).argmax(1)
192 |             all_peaks = torch.cat(
193 |                 [(m / heatmaps.shape[3]).trunc().view(-1, 1), (m % heatmaps.shape[3]).view(-1, 1)], dim=1
194 |             ).reshape((heatmaps.shape[0], heatmaps.shape[1], 1, 1, 2)).repeat(
195 |                 1, 1, heatmaps.shape[2], heatmaps.shape[3], 1).float()
196 | 
197 |             # Apply masks created from the peaks
198 |             all_indices = torch.zeros_like(all_peaks) + torch.stack(
199 |                 [torch.arange(0.0, all_peaks.shape[2],
200 |                               device=all_peaks.device).unsqueeze(-1).repeat(1, all_peaks.shape[3]),
201 |                  torch.arange(0.0, all_peaks.shape[3],
202 |                               device=all_peaks.device).unsqueeze(0).repeat(all_peaks.shape[2], 1)], dim=-1)
203 |             heatmaps = heatmaps * ((all_indices - all_peaks).norm(dim=-1) <= self.config.radius *
204 |                                    (heatmaps.shape[2] * heatmaps.shape[3]) ** 0.5).float()
205 | 
206 |         # Prepare the indices for calculating centroids
207 |         x_indices = (torch.zeros((*heatmaps.shape[:2], heatmaps.shape[3]), device=heatmaps.device) +
208 |                      torch.arange(0.5, heatmaps.shape[3], device=heatmaps.device))
209 |         y_indices = (torch.zeros(heatmaps.shape[:3], device=heatmaps.device) +
210 |                      torch.arange(0.5, heatmaps.shape[2], device=heatmaps.device))
211 | 
212 |         # Finally, find centroids as landmark locations
213 |         heatmaps = heatmaps.clamp_min(0.0)
214 |         if self.config.gamma != 1.0:
215 |             heatmaps = heatmaps.pow(self.config.gamma)
216 |         m00s = heatmaps.sum(dim=(2, 3)).clamp_min(torch.finfo(heatmaps.dtype).eps)
217 |         xs = heatmaps.sum(dim=2).mul(x_indices).sum(dim=2).div(m00s)
218 |         ys = heatmaps.sum(dim=3).mul(y_indices).sum(dim=2).div(m00s)
219 | 
220 |         lm_info = torch.stack((xs, ys, scores), dim=-1).cpu().numpy()
221 |         return lm_info[..., :-1], lm_info[..., -1]
222 | 


--------------------------------------------------------------------------------
/ibug/face_detection/retina_face/box_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | 
  5 | def point_form(boxes):
  6 |     """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
  7 |     representation for comparison to point form ground truth data.
  8 |     Args:
  9 |         boxes: (tensor) center-size default boxes from priorbox layers.
 10 |     Return:
 11 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 12 |     """
 13 |     return torch.cat(
 14 |         (
 15 |             boxes[:, :2] - boxes[:, 2:]/2,  # xmin, ymin
 16 |             boxes[:, :2] + boxes[:, 2:]/2,  # xmax, ymax
 17 |         ),
 18 |         1,
 19 |     )
 20 | 
 21 | 
 22 | def center_size(boxes):
 23 |     """ Convert prior_boxes to (cx, cy, w, h)
 24 |     representation for comparison to center-size form ground truth data.
 25 |     Args:
 26 |         boxes: (tensor) point_form boxes
 27 |     Return:
 28 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 29 |     """
 30 |     return torch.cat(
 31 |         (
 32 |             boxes[:, 2:] + boxes[:, :2])/2,  # cx, cy
 33 |             boxes[:, 2:] - boxes[:, :2],  # w, h
 34 |             1,
 35 |         )
 36 | 
 37 | 
 38 | def intersect(box_a, box_b):
 39 |     """ We resize both tensors to [A,B,2] without new malloc:
 40 |     [A,2] -> [A,1,2] -> [A,B,2]
 41 |     [B,2] -> [1,B,2] -> [A,B,2]
 42 |     Then we compute the area of intersect between box_a and box_b.
 43 |     Args:
 44 |         box_a: (tensor) bounding boxes, Shape: [A,4].
 45 |         box_b: (tensor) bounding boxes, Shape: [B,4].
 46 |     Return:
 47 |         (tensor) intersection area, Shape: [A,B].
 48 |     """
 49 |     A = box_a.size(0)
 50 |     B = box_b.size(0)
 51 |     max_xy = torch.min(
 52 |         box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
 53 |         box_b[:, 2:].unsqueeze(0).expand(A, B, 2)
 54 |     )
 55 |     min_xy = torch.max(
 56 |         box_a[:, :2].unsqueeze(1).expand(A, B, 2),
 57 |         box_b[:, :2].unsqueeze(0).expand(A, B, 2)
 58 |     )
 59 |     inter = torch.clamp((max_xy - min_xy), min=0)
 60 |     return inter[:, :, 0] * inter[:, :, 1]
 61 | 
 62 | 
 63 | def jaccard(box_a, box_b):
 64 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
 65 |     is simply the intersection over union of two boxes.  Here we operate on
 66 |     ground truth boxes and default boxes.
 67 |     E.g.:
 68 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
 69 |     Args:
 70 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
 71 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
 72 |     Return:
 73 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
 74 |     """
 75 |     inter = intersect(box_a, box_b)
 76 |     area_a = (
 77 |         (box_a[:, 2]-box_a[:, 0]) *
 78 |         (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
 79 |     area_b = (
 80 |         (box_b[:, 2]-box_b[:, 0]) *
 81 |         (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
 82 |     union = area_a + area_b - inter
 83 |     return inter / union  # [A,B]
 84 | 
 85 | 
 86 | def matrix_iou(a, b):
 87 |     """
 88 |     return iou of a and b, numpy version for data augenmentation
 89 |     """
 90 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
 91 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
 92 | 
 93 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
 94 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
 95 |     area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
 96 |     return area_i / (area_a[:, np.newaxis] + area_b - area_i)
 97 | 
 98 | 
 99 | def matrix_iof(a, b):
100 |     """
101 |     return iof of a and b, numpy version for data augenmentation
102 |     """
103 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
104 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
105 | 
106 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
107 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
108 |     return area_i / np.maximum(area_a[:, np.newaxis], 1)
109 | 
110 | 
111 | def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx):
112 |     """Match each prior box with the ground truth box of the highest jaccard
113 |     overlap, encode the bounding boxes, then return the matched indices
114 |     corresponding to both confidence and location preds.
115 |     Args:
116 |         threshold: (float) The overlap threshold used when mathing boxes.
117 |         truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
118 |         priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
119 |         variances: (tensor) Variances corresponding to each prior coord,
120 |             Shape: [num_priors, 4].
121 |         labels: (tensor) All the class labels for the image, Shape: [num_obj].
122 |         landms: (tensor) Ground truth landms, Shape [num_obj, 10].
123 |         loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
124 |         conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
125 |         landm_t: (tensor) Tensor to be filled w/ endcoded landm targets.
126 |         idx: (int) current batch index
127 |     Return:
128 |         The matched indices corresponding to 1)location 2)confidence 3)landm preds.
129 |     """
130 |     # jaccard index
131 |     overlaps = jaccard(
132 |         truths,
133 |         point_form(priors)
134 |     )
135 |     # (Bipartite Matching)
136 |     # [1,num_objects] best prior for each ground truth
137 |     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
138 | 
139 |     # ignore hard gt
140 |     valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
141 |     best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
142 |     if best_prior_idx_filter.shape[0] <= 0:
143 |         loc_t[idx] = 0
144 |         conf_t[idx] = 0
145 |         return
146 | 
147 |     # [1,num_priors] best ground truth for each prior
148 |     best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
149 |     best_truth_idx.squeeze_(0)
150 |     best_truth_overlap.squeeze_(0)
151 |     best_prior_idx.squeeze_(1)
152 |     best_prior_idx_filter.squeeze_(1)
153 |     best_prior_overlap.squeeze_(1)
154 |     best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2)  # ensure best prior
155 |     # TODO refactor: index  best_prior_idx with long tensor
156 |     # ensure every gt matches with its prior of max overlap
157 |     for j in range(best_prior_idx.size(0)):     # 判别此anchor是预测哪一个boxes
158 |         best_truth_idx[best_prior_idx[j]] = j
159 |     matches = truths[best_truth_idx]            # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
160 |     conf = labels[best_truth_idx]               # Shape: [num_priors]      此处为每一个anchor对应的label取出来
161 |     conf[best_truth_overlap < threshold] = 0    # label as background   overlap<0.35的全部作为负样本
162 |     loc = encode(matches, priors, variances)
163 | 
164 |     matches_landm = landms[best_truth_idx]
165 |     landm = encode_landm(matches_landm, priors, variances)
166 |     loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
167 |     conf_t[idx] = conf  # [num_priors] top class label for each prior
168 |     landm_t[idx] = landm
169 | 
170 | 
171 | def encode(matched, priors, variances):
172 |     """Encode the variances from the priorbox layers into the ground truth boxes
173 |     we have matched (based on jaccard overlap) with the prior boxes.
174 |     Args:
175 |         matched: (tensor) Coords of ground truth for each prior in point-form
176 |             Shape: [num_priors, 4].
177 |         priors: (tensor) Prior boxes in center-offset form
178 |             Shape: [num_priors,4].
179 |         variances: (list[float]) Variances of priorboxes
180 |     Return:
181 |         encoded boxes (tensor), Shape: [num_priors, 4]
182 |     """
183 | 
184 |     # dist b/t match center and prior's center
185 |     g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
186 |     # encode variance
187 |     g_cxcy /= (variances[0] * priors[:, 2:])
188 |     # match wh / prior wh
189 |     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
190 |     g_wh = torch.log(g_wh) / variances[1]
191 |     # return target for smooth_l1_loss
192 |     return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
193 | 
194 | 
195 | def encode_landm(matched, priors, variances):
196 |     """Encode the variances from the priorbox layers into the ground truth boxes
197 |     we have matched (based on jaccard overlap) with the prior boxes.
198 |     Args:
199 |         matched: (tensor) Coords of ground truth for each prior in point-form
200 |             Shape: [num_priors, 10].
201 |         priors: (tensor) Prior boxes in center-offset form
202 |             Shape: [num_priors,4].
203 |         variances: (list[float]) Variances of priorboxes
204 |     Return:
205 |         encoded landm (tensor), Shape: [num_priors, 10]
206 |     """
207 | 
208 |     # dist b/t match center and prior's center
209 |     matched = torch.reshape(matched, (matched.size(0), 5, 2))
210 |     priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
211 |     priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
212 |     priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
213 |     priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
214 |     priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
215 |     g_cxcy = matched[:, :, :2] - priors[:, :, :2]
216 |     # encode variance
217 |     g_cxcy /= (variances[0] * priors[:, :, 2:])
218 |     # g_cxcy /= priors[:, :, 2:]
219 |     g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
220 |     # return target for smooth_l1_loss
221 |     return g_cxcy
222 | 
223 | 
224 | # Adapted from https://github.com/Hakuyume/chainer-ssd
225 | def decode(loc, priors, variances):
226 |     """Decode locations from predictions using priors to undo
227 |     the encoding we did for offset regression at train time.
228 |     Args:
229 |         loc (tensor): location predictions for loc layers,
230 |             Shape: [num_priors,4]
231 |         priors (tensor): Prior boxes in center-offset form.
232 |             Shape: [num_priors,4].
233 |         variances: (list[float]) Variances of priorboxes
234 |     Return:
235 |         decoded bounding box predictions
236 |     """
237 | 
238 |     boxes = torch.cat((
239 |         priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
240 |         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
241 |     boxes[:, :2] -= boxes[:, 2:] / 2
242 |     boxes[:, 2:] += boxes[:, :2]
243 |     return boxes
244 | 
245 | 
246 | def decode_landm(pre, priors, variances):
247 |     """Decode landm from predictions using priors to undo
248 |     the encoding we did for offset regression at train time.
249 |     Args:
250 |         pre (tensor): landm predictions for loc layers,
251 |             Shape: [num_priors,10]
252 |         priors (tensor): Prior boxes in center-offset form.
253 |             Shape: [num_priors,4].
254 |         variances: (list[float]) Variances of priorboxes
255 |     Return:
256 |         decoded landm predictions
257 |     """
258 |     landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
259 |                         priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
260 |                         priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
261 |                         priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
262 |                         priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
263 |                         ), dim=1)
264 |     return landms
265 | 
266 | 
267 | def log_sum_exp(x):
268 |     """Utility function for computing log_sum_exp while determining
269 |     This will be used to determine unaveraged confidence loss across
270 |     all examples in a batch.
271 |     Args:
272 |         x (Variable(tensor)): conf_preds from conf layers
273 |     """
274 |     x_max = x.data.max()
275 |     return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
276 | 
277 | 
278 | # Original author: Francisco Massa:
279 | # https://github.com/fmassa/object-detection.torch
280 | # Ported to PyTorch by Max deGroot (02/01/2017)
281 | def nms(boxes, scores, overlap=0.5, top_k=200):
282 |     """Apply non-maximum suppression at test time to avoid detecting too many
283 |     overlapping bounding boxes for a given object.
284 |     Args:
285 |         boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
286 |         scores: (tensor) The class predscores for the img, Shape:[num_priors].
287 |         overlap: (float) The overlap thresh for suppressing unnecessary boxes.
288 |         top_k: (int) The Maximum number of box preds to consider.
289 |     Return:
290 |         The indices of the kept boxes with respect to num_priors.
291 |     """
292 | 
293 |     keep = torch.Tensor(scores.size(0)).fill_(0).long()
294 |     if boxes.numel() == 0:
295 |         return keep
296 |     x1 = boxes[:, 0]
297 |     y1 = boxes[:, 1]
298 |     x2 = boxes[:, 2]
299 |     y2 = boxes[:, 3]
300 |     area = torch.mul(x2 - x1, y2 - y1)
301 |     v, idx = scores.sort(0)  # sort in ascending order
302 |     # I = I[v >= 0.01]
303 |     idx = idx[-top_k:]  # indices of the top-k largest vals
304 |     xx1 = boxes.new()
305 |     yy1 = boxes.new()
306 |     xx2 = boxes.new()
307 |     yy2 = boxes.new()
308 |     w = boxes.new()
309 |     h = boxes.new()
310 | 
311 |     # keep = torch.Tensor()
312 |     count = 0
313 |     while idx.numel() > 0:
314 |         i = idx[-1]  # index of current largest val
315 |         # keep.append(i)
316 |         keep[count] = i
317 |         count += 1
318 |         if idx.size(0) == 1:
319 |             break
320 |         idx = idx[:-1]  # remove kept element from view
321 |         # load bboxes of next highest vals
322 |         torch.index_select(x1, 0, idx, out=xx1)
323 |         torch.index_select(y1, 0, idx, out=yy1)
324 |         torch.index_select(x2, 0, idx, out=xx2)
325 |         torch.index_select(y2, 0, idx, out=yy2)
326 |         # store element-wise max with next highest score
327 |         xx1 = torch.clamp(xx1, min=x1[i])
328 |         yy1 = torch.clamp(yy1, min=y1[i])
329 |         xx2 = torch.clamp(xx2, max=x2[i])
330 |         yy2 = torch.clamp(yy2, max=y2[i])
331 |         w.resize_as_(xx2)
332 |         h.resize_as_(yy2)
333 |         w = xx2 - xx1
334 |         h = yy2 - yy1
335 |         # check sizes of xx1 and xx2.. after each iteration
336 |         w = torch.clamp(w, min=0.0)
337 |         h = torch.clamp(h, min=0.0)
338 |         inter = w*h
339 |         # IoU = i / (area(a) + area(b) - i)
340 |         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
341 |         union = (rem_areas - inter) + area[i]
342 |         IoU = inter/union  # store result in iou
343 |         # keep only elements with an IoU <= overlap
344 |         idx = idx[IoU.le(overlap)]
345 |     return keep, count
346 | 
347 | 
348 | 


--------------------------------------------------------------------------------
/11_create_masked_face_dataset_yolo_test_yolov4_filter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os
  4 | import cv2
  5 | import glob
  6 | import copy
  7 | import numpy as np
  8 | import onnxruntime
  9 | from tqdm import tqdm
 10 | from natsort import natsorted
 11 | from argparse import ArgumentParser
 12 | from typing import Tuple, Optional, List
 13 | 
 14 | 
 15 | class YOLOv4ONNX(object):
 16 |     def __init__(
 17 |         self,
 18 |         model_path: Optional[str] = 'yolov4_headdetection_480x640_post.onnx',
 19 |         input_shape: Optional[Tuple[int,int]] = (480, 640),
 20 |         class_score_th: Optional[float] = 0.20,
 21 |         providers: Optional[List] = [
 22 |             (
 23 |                 'TensorrtExecutionProvider', {
 24 |                     'trt_engine_cache_enable': True,
 25 |                     'trt_engine_cache_path': '.',
 26 |                     'trt_fp16_enable': True,
 27 |                 }
 28 |             ),
 29 |             'CUDAExecutionProvider',
 30 |             'CPUExecutionProvider',
 31 |         ],
 32 |     ):
 33 |         """YOLOv4ONNX
 34 | 
 35 |         Parameters
 36 |         ----------
 37 |         model_path: Optional[str]
 38 |             ONNX file path for YOLOv4
 39 | 
 40 |         input_shape: Optional[Tuple[int,int]]
 41 |             Model Input Resolution, Default: (480,640)
 42 | 
 43 |         class_score_th: Optional[float]
 44 | 
 45 |         class_score_th: Optional[float]
 46 |             Score threshold. Default: 0.20
 47 | 
 48 |         providers: Optional[List]
 49 |             Name of onnx execution providers
 50 |             Default:
 51 |             [
 52 |                 'TensorrtExecutionProvider',
 53 |                 'CUDAExecutionProvider',
 54 |                 'CPUExecutionProvider',
 55 |             ]
 56 |         """
 57 |         # Input size
 58 |         self.input_shape = input_shape
 59 | 
 60 |         # Threshold
 61 |         self.class_score_th = class_score_th
 62 | 
 63 |         # Model loading
 64 |         session_option = onnxruntime.SessionOptions()
 65 |         session_option.log_severity_level = 3
 66 |         self.onnx_session = onnxruntime.InferenceSession(
 67 |             model_path,
 68 |             sess_options=session_option,
 69 |             providers=providers,
 70 |         )
 71 |         self.providers = self.onnx_session.get_providers()
 72 | 
 73 |         self.input_name = self.onnx_session.get_inputs()[0].name
 74 |         self.output_name = self.onnx_session.get_outputs()[0].name
 75 | 
 76 | 
 77 |     def __call__(
 78 |         self,
 79 |         image: np.ndarray,
 80 |     ) -> Tuple[np.ndarray, np.ndarray]:
 81 |         """YOLOV4ONNX
 82 | 
 83 |         Parameters
 84 |         ----------
 85 |         image: np.ndarray
 86 |             Entire image
 87 | 
 88 |         Returns
 89 |         -------
 90 |         faceboxes: np.ndarray
 91 |             Predicted face boxes: [facecount, x1, y1, x2, y2]
 92 | 
 93 |         facescores: np.ndarray
 94 |             Predicted face box confs: [facecount, conf]
 95 |         """
 96 |         temp_image = copy.deepcopy(image)
 97 | 
 98 |         # PreProcess
 99 |         resized_image = self.__preprocess(
100 |             temp_image,
101 |         )
102 | 
103 |         # Inference
104 |         inferece_image = np.asarray([resized_image], dtype=np.float32)
105 |         boxes = self.onnx_session.run(
106 |             None,
107 |             {self.input_name: inferece_image},
108 |         )[0]
109 | 
110 |         # PostProcess
111 |         faceboxes, facescores = self.__postprocess(
112 |             image= temp_image,
113 |             boxes=boxes,
114 |         )
115 | 
116 |         return faceboxes, facescores
117 | 
118 | 
119 |     def __preprocess(
120 |         self,
121 |         image: np.ndarray,
122 |         swap: Optional[Tuple[int,int,int]] = (2, 0, 1),
123 |     ) -> np.ndarray:
124 |         """__preprocess
125 | 
126 |         Parameters
127 |         ----------
128 |         image: np.ndarray
129 |             Entire image
130 | 
131 |         swap: tuple
132 |             HWC to CHW: (2,0,1)
133 |             CHW to HWC: (1,2,0)
134 |             HWC to HWC: (0,1,2)
135 |             CHW to CHW: (0,1,2)
136 | 
137 |         Returns
138 |         -------
139 |         resized_image: np.ndarray
140 |             Resized and normalized image.
141 |         """
142 |         # Normalization + BGR->RGB
143 |         resized_image = cv2.resize(
144 |             image,
145 |             (
146 |                 int(self.input_shape[1]), # type: ignore
147 |                 int(self.input_shape[0]), # type: ignore
148 |             )
149 |         )
150 |         resized_image = np.divide(resized_image, 255.0) # type: ignore
151 |         resized_image = resized_image.transpose(swap)
152 |         resized_image = np.ascontiguousarray(resized_image, dtype=np.float32)
153 |         return resized_image
154 | 
155 | 
156 |     def __postprocess(
157 |         self,
158 |         image: np.ndarray,
159 |         boxes: np.ndarray,
160 |     ) -> Tuple[np.ndarray, np.ndarray]:
161 |         """__postprocess
162 | 
163 |         Parameters
164 |         ----------
165 |         image: np.ndarray
166 |             Entire image.
167 | 
168 |         boxes: np.ndarray
169 |             (boxcount, 5) = (boxcount, x1y1x2y2score)
170 | 
171 |         Returns
172 |         -------
173 |         faceboxes: np.ndarray
174 |             Predicted face boxes: [facecount, x1, y1, x2, y2]
175 | 
176 |         facescores: np.ndarray
177 |             Predicted face box confs: [facecount, score]
178 |         """
179 |         image_height = image.shape[0]
180 |         image_width = image.shape[1]
181 | 
182 |         scores = boxes[:,4]
183 |         keep_idxs = scores > self.class_score_th
184 |         boxes_keep = boxes[keep_idxs, :]
185 | 
186 |         faceboxes = []
187 |         facescores = []
188 | 
189 |         if len(boxes_keep) > 0:
190 |             boxes_keep[:, 0] = boxes_keep[:, 0] * image_width
191 |             boxes_keep[:, 1] = boxes_keep[:, 1] * image_height
192 |             boxes_keep[:, 2] = boxes_keep[:, 2] * image_width
193 |             boxes_keep[:, 3] = boxes_keep[:, 3] * image_height
194 | 
195 |             for box in boxes_keep:
196 |                 x_min = int(box[0]) if int(box[0]) > 0 else 0
197 |                 y_min = int(box[1]) if int(box[1]) > 0 else 0
198 |                 x_max = int(box[2]) if int(box[2]) < image_width else image_width
199 |                 y_max = int(box[3]) if int(box[3]) < image_height else image_height
200 |                 score = box[4]
201 | 
202 |                 faceboxes.append(
203 |                     [
204 |                         x_min,
205 |                         y_min,
206 |                         x_max,
207 |                         y_max,
208 |                     ]
209 |                 )
210 |                 facescores.append(
211 |                     [
212 |                         score
213 |                     ]
214 |                 )
215 | 
216 |         return np.asarray(faceboxes), np.asarray(facescores) # type: ignore
217 | 
218 | 
219 | class YOLOv7ONNX(object):
220 |     def __init__(
221 |         self,
222 |         model_path: Optional[str] = 'yolov7_tiny_head_0.752_post_480x640.onnx',
223 |         class_score_th: Optional[float] = 0.30,
224 |         providers: Optional[List] = [
225 |             # (
226 |             #     'TensorrtExecutionProvider', {
227 |             #         'trt_engine_cache_enable': True,
228 |             #         'trt_engine_cache_path': '.',
229 |             #         'trt_fp16_enable': True,
230 |             #     }
231 |             # ),
232 |             'CUDAExecutionProvider',
233 |             'CPUExecutionProvider',
234 |         ],
235 |     ):
236 |         """YOLOv7ONNX
237 |         Parameters
238 |         ----------
239 |         model_path: Optional[str]
240 |             ONNX file path for YOLOv7
241 |         class_score_th: Optional[float]
242 |         class_score_th: Optional[float]
243 |             Score threshold. Default: 0.30
244 |         providers: Optional[List]
245 |             Name of onnx execution providers
246 |             Default:
247 |             [
248 |                 (
249 |                     'TensorrtExecutionProvider', {
250 |                         'trt_engine_cache_enable': True,
251 |                         'trt_engine_cache_path': '.',
252 |                         'trt_fp16_enable': True,
253 |                     }
254 |                 ),
255 |                 'CUDAExecutionProvider',
256 |                 'CPUExecutionProvider',
257 |             ]
258 |         """
259 |         # Threshold
260 |         self.class_score_th = class_score_th
261 | 
262 |         # Model loading
263 |         session_option = onnxruntime.SessionOptions()
264 |         session_option.log_severity_level = 3
265 |         self.onnx_session = onnxruntime.InferenceSession(
266 |             model_path,
267 |             sess_options=session_option,
268 |             providers=providers,
269 |         )
270 |         self.providers = self.onnx_session.get_providers()
271 | 
272 |         self.input_shapes = [
273 |             input.shape for input in self.onnx_session.get_inputs()
274 |         ]
275 |         self.input_names = [
276 |             input.name for input in self.onnx_session.get_inputs()
277 |         ]
278 |         self.output_names = [
279 |             output.name for output in self.onnx_session.get_outputs()
280 |         ]
281 | 
282 | 
283 |     def __call__(
284 |         self,
285 |         image: np.ndarray,
286 |     ) -> Tuple[np.ndarray, np.ndarray]:
287 |         """YOLOv7ONNX
288 |         Parameters
289 |         ----------
290 |         image: np.ndarray
291 |             Entire image
292 |         Returns
293 |         -------
294 |         face_boxes: np.ndarray
295 |             Predicted face boxes: [facecount, y1, x1, y2, x2]
296 |         face_scores: np.ndarray
297 |             Predicted face box scores: [facecount, score]
298 |         """
299 |         temp_image = copy.deepcopy(image)
300 | 
301 |         # PreProcess
302 |         resized_image = self.__preprocess(
303 |             temp_image,
304 |         )
305 | 
306 |         # Inference
307 |         inferece_image = np.asarray([resized_image], dtype=np.float32)
308 |         scores, boxes = self.onnx_session.run(
309 |             self.output_names,
310 |             {input_name: inferece_image for input_name in self.input_names},
311 |         )
312 | 
313 |         # PostProcess
314 |         face_boxes, face_scores = self.__postprocess(
315 |             image=temp_image,
316 |             scores=scores,
317 |             boxes=boxes,
318 |         )
319 | 
320 |         return face_boxes, face_scores
321 | 
322 | 
323 |     def __preprocess(
324 |         self,
325 |         image: np.ndarray,
326 |         swap: Optional[Tuple[int,int,int]] = (2, 0, 1),
327 |     ) -> np.ndarray:
328 |         """__preprocess
329 |         Parameters
330 |         ----------
331 |         image: np.ndarray
332 |             Entire image
333 |         swap: tuple
334 |             HWC to CHW: (2,0,1)
335 |             CHW to HWC: (1,2,0)
336 |             HWC to HWC: (0,1,2)
337 |             CHW to CHW: (0,1,2)
338 |         Returns
339 |         -------
340 |         resized_image: np.ndarray
341 |             Resized and normalized image.
342 |         """
343 |         # Normalization + BGR->RGB
344 |         resized_image = cv2.resize(
345 |             image,
346 |             (
347 |                 int(self.input_shapes[0][3]),
348 |                 int(self.input_shapes[0][2]),
349 |             )
350 |         )
351 |         resized_image = np.divide(resized_image, 255.0)
352 |         resized_image = resized_image[..., ::-1]
353 |         resized_image = resized_image.transpose(swap)
354 |         resized_image = np.ascontiguousarray(
355 |             resized_image,
356 |             dtype=np.float32,
357 |         )
358 |         return resized_image
359 | 
360 | 
361 |     def __postprocess(
362 |         self,
363 |         image: np.ndarray,
364 |         scores: np.ndarray,
365 |         boxes: np.ndarray,
366 |     ) -> Tuple[np.ndarray, np.ndarray]:
367 |         """__postprocess
368 |         Parameters
369 |         ----------
370 |         image: np.ndarray
371 |             Entire image.
372 |         scores: np.ndarray
373 |             float32[N, 1]
374 |         boxes: np.ndarray
375 |             int64[N, 6]
376 |         Returns
377 |         -------
378 |         faceboxes: np.ndarray
379 |             Predicted face boxes: [facecount, y1, x1, y2, x2]
380 |         facescores: np.ndarray
381 |             Predicted face box confs: [facecount, score]
382 |         """
383 |         image_height = image.shape[0]
384 |         image_width = image.shape[1]
385 | 
386 |         """
387 |         Head Detector is
388 |             N -> Number of boxes detected
389 |             batchno -> always 0: BatchNo.0
390 |             classid -> always 0: "Head"
391 |         scores: float32[N,1],
392 |         batchno_classid_y1x1y2x2: int64[N,6],
393 |         """
394 |         scores = scores
395 |         keep_idxs = scores[:, 0] > self.class_score_th
396 |         scores_keep = scores[keep_idxs, :]
397 |         boxes_keep = boxes[keep_idxs, :]
398 |         faceboxes = []
399 |         facescores = []
400 | 
401 |         if len(boxes_keep) > 0:
402 |             for box, score in zip(boxes_keep, scores_keep):
403 |                 x_min = max(int(box[3]), 0)
404 |                 y_min = max(int(box[2]), 0)
405 |                 x_max = min(int(box[5]), image_width)
406 |                 y_max = min(int(box[4]), image_height)
407 | 
408 |                 faceboxes.append(
409 |                     [x_min, y_min, x_max, y_max]
410 |                 )
411 |                 facescores.append(
412 |                     score
413 |                 )
414 | 
415 |         return np.asarray(faceboxes), np.asarray(facescores)
416 | 
417 | 
418 | def main():
419 |     parser = ArgumentParser()
420 |     parser.add_argument(
421 |         '-y',
422 |         '--yolo_mode',
423 |         type=str,
424 |         default='yolov4',
425 |         choices=['yolov4', 'yolov7']
426 |     )
427 |     parser.add_argument(
428 |         '-i',
429 |         '--image_folder_path',
430 |         type=str,
431 |         default='300W_LP_onlyone_person',
432 |     )
433 |     args = parser.parse_args()
434 | 
435 |     yolo_mode = args.yolo_mode
436 | 
437 |     model = None
438 |     if yolo_mode == 'yolov4':
439 |         model = YOLOv4ONNX(
440 |             model_path='yolov4_headdetection_480x640_post.onnx',
441 |             class_score_th=0.80,
442 |         )
443 |     elif yolo_mode == 'yolov7':
444 |         model = YOLOv7ONNX(
445 |             model_path='yolov7_tiny_head_0.752_post_480x640.onnx',
446 |             # class_score_th=0.90,
447 |         )
448 | 
449 |     image_files = glob.glob(f"{args.image_folder_path}/*/*.jpg")
450 | 
451 |     image_count = 0
452 |     for image_file in tqdm(natsorted(image_files), dynamic_ncols=True):
453 | 
454 |         dirname = os.path.dirname(image_file)
455 |         # print(f'@@@ dirname: {dirname} split: {dirname.split("/")}')
456 |         new_dirname = f'{args.image_folder_path}_yolov4_filterd/{dirname.split("/")[1]}'
457 |         os.makedirs(new_dirname, exist_ok=True)
458 | 
459 |         image = cv2.imread(image_file)
460 | 
461 |         debug_image = copy.deepcopy(image)
462 |         face_boxes, face_scores = model(debug_image)
463 | 
464 |         if len(face_boxes) == 1:
465 | 
466 |             # for face_box, face_score in zip(face_boxes, face_scores):
467 | 
468 |             #     x_min = int(face_box[0])
469 |             #     y_min = int(face_box[1])
470 |             #     x_max = int(face_box[2])
471 |             #     y_max = int(face_box[3])
472 | 
473 |             #     # add margin
474 |             #     y_min = int(max(0, y_min - abs(y_min - y_max) / 17))
475 |             #     y_max = int(min(image.shape[0], y_max + abs(y_min - y_max) / 17))
476 |             #     x_min = int(max(0, x_min - abs(x_min - x_max) / 7))
477 |             #     x_max = min(image.shape[1], x_max + abs(x_min - x_max) / 7)
478 |             #     x_max = int(min(x_max, image.shape[1]))
479 | 
480 |             #     cv2.rectangle(
481 |             #         debug_image,
482 |             #         (x_min, y_min),
483 |             #         (x_max, y_max),
484 |             #         (255,255,255),
485 |             #         2,
486 |             #     )
487 |             #     cv2.rectangle(
488 |             #         debug_image,
489 |             #         (x_min, y_min),
490 |             #         (x_max, y_max),
491 |             #         (0,255,0),
492 |             #         1,
493 |             #     )
494 |             #     cv2.putText(
495 |             #         debug_image,
496 |             #         f'{face_score[0]:.2f}',
497 |             #         (
498 |             #             x_min,
499 |             #             y_min-10 if y_min-10 > 0 else 20
500 |             #         ),
501 |             #         cv2.FONT_HERSHEY_SIMPLEX,
502 |             #         0.7,
503 |             #         (255, 255, 255),
504 |             #         2,
505 |             #         cv2.LINE_AA,
506 |             #     )
507 |             #     cv2.putText(
508 |             #         debug_image,
509 |             #         f'{face_score[0]:.2f}',
510 |             #         (
511 |             #             x_min,
512 |             #             y_min-10 if y_min-10 > 0 else 20
513 |             #         ),
514 |             #         cv2.FONT_HERSHEY_SIMPLEX,
515 |             #         0.7,
516 |             #         (0, 255, 0),
517 |             #         1,
518 |             #         cv2.LINE_AA,
519 |             #     )
520 | 
521 |             # cv2.imshow("test", debug_image)
522 | 
523 |             # key = cv2.waitKey(0)
524 |             # if key == 27: # ESC
525 |             #     break
526 | 
527 |             basename = os.path.basename(image_file)
528 |             cv2.imwrite(f'{new_dirname}/{basename}', image)
529 |             image_count += 1
530 | 
531 |     print(f'image_count: {image_count}')
532 | 
533 | if __name__ == "__main__":
534 |     main()


--------------------------------------------------------------------------------
/12_create_masked_face_dataset_yolo_test_yolov4_annotation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os
  4 | import cv2
  5 | import glob
  6 | import copy
  7 | import json
  8 | import numpy as np
  9 | import onnxruntime
 10 | from tqdm import tqdm
 11 | from pathlib import Path
 12 | from natsort import natsorted
 13 | from argparse import ArgumentParser
 14 | from typing import Tuple, Optional, List
 15 | from sklearn.model_selection import train_test_split
 16 | 
 17 | # input image width/height of the yolov4 model, set by command-line argument
 18 | INPUT_WIDTH  = 0
 19 | INPUT_HEIGHT = 0
 20 | 
 21 | # Minimum width/height of objects for detection (don't learn from objects smaller than these)
 22 | MIN_W = 5
 23 | MIN_H = 5
 24 | 
 25 | # Do K-Means clustering in order to determine "anchor" sizes
 26 | DO_KMEANS = True
 27 | KMEANS_CLUSTERS = 9
 28 | BBOX_WHS = []  # keep track of bbox width/height with respect to 640x640
 29 | 
 30 | 
 31 | class YOLOv4ONNX(object):
 32 |     def __init__(
 33 |         self,
 34 |         model_path: Optional[str] = 'yolov4_headdetection_480x640_post.onnx',
 35 |         input_shape: Optional[Tuple[int,int]] = (480, 640),
 36 |         class_score_th: Optional[float] = 0.20,
 37 |         providers: Optional[List] = [
 38 |             (
 39 |                 'TensorrtExecutionProvider', {
 40 |                     'trt_engine_cache_enable': True,
 41 |                     'trt_engine_cache_path': '.',
 42 |                     'trt_fp16_enable': True,
 43 |                 }
 44 |             ),
 45 |             'CUDAExecutionProvider',
 46 |             'CPUExecutionProvider',
 47 |         ],
 48 |     ):
 49 |         """YOLOv4ONNX
 50 | 
 51 |         Parameters
 52 |         ----------
 53 |         model_path: Optional[str]
 54 |             ONNX file path for YOLOv4
 55 | 
 56 |         input_shape: Optional[Tuple[int,int]]
 57 |             Model Input Resolution, Default: (480,640)
 58 | 
 59 |         class_score_th: Optional[float]
 60 | 
 61 |         class_score_th: Optional[float]
 62 |             Score threshold. Default: 0.20
 63 | 
 64 |         providers: Optional[List]
 65 |             Name of onnx execution providers
 66 |             Default:
 67 |             [
 68 |                 'TensorrtExecutionProvider',
 69 |                 'CUDAExecutionProvider',
 70 |                 'CPUExecutionProvider',
 71 |             ]
 72 |         """
 73 |         # Input size
 74 |         self.input_shape = input_shape
 75 | 
 76 |         # Threshold
 77 |         self.class_score_th = class_score_th
 78 | 
 79 |         # Model loading
 80 |         session_option = onnxruntime.SessionOptions()
 81 |         session_option.log_severity_level = 3
 82 |         self.onnx_session = onnxruntime.InferenceSession(
 83 |             model_path,
 84 |             sess_options=session_option,
 85 |             providers=providers,
 86 |         )
 87 |         self.providers = self.onnx_session.get_providers()
 88 | 
 89 |         self.input_name = self.onnx_session.get_inputs()[0].name
 90 |         self.output_name = self.onnx_session.get_outputs()[0].name
 91 | 
 92 | 
 93 |     def __call__(
 94 |         self,
 95 |         image: np.ndarray,
 96 |     ) -> Tuple[np.ndarray, np.ndarray]:
 97 |         """YOLOV4ONNX
 98 | 
 99 |         Parameters
100 |         ----------
101 |         image: np.ndarray
102 |             Entire image
103 | 
104 |         Returns
105 |         -------
106 |         faceboxes: np.ndarray
107 |             Predicted face boxes: [facecount, x1, y1, x2, y2]
108 | 
109 |         facescores: np.ndarray
110 |             Predicted face box confs: [facecount, conf]
111 |         """
112 |         temp_image = copy.deepcopy(image)
113 | 
114 |         # PreProcess
115 |         resized_image = self.__preprocess(
116 |             temp_image,
117 |         )
118 | 
119 |         # Inference
120 |         inferece_image = np.asarray([resized_image], dtype=np.float32)
121 |         boxes = self.onnx_session.run(
122 |             None,
123 |             {self.input_name: inferece_image},
124 |         )[0]
125 | 
126 |         # PostProcess
127 |         faceboxes, facescores = self.__postprocess(
128 |             image= temp_image,
129 |             boxes=boxes,
130 |         )
131 | 
132 |         return faceboxes, facescores
133 | 
134 | 
135 |     def __preprocess(
136 |         self,
137 |         image: np.ndarray,
138 |         swap: Optional[Tuple[int,int,int]] = (2, 0, 1),
139 |     ) -> np.ndarray:
140 |         """__preprocess
141 | 
142 |         Parameters
143 |         ----------
144 |         image: np.ndarray
145 |             Entire image
146 | 
147 |         swap: tuple
148 |             HWC to CHW: (2,0,1)
149 |             CHW to HWC: (1,2,0)
150 |             HWC to HWC: (0,1,2)
151 |             CHW to CHW: (0,1,2)
152 | 
153 |         Returns
154 |         -------
155 |         resized_image: np.ndarray
156 |             Resized and normalized image.
157 |         """
158 |         # Normalization + BGR->RGB
159 |         resized_image = cv2.resize(
160 |             image,
161 |             (
162 |                 int(self.input_shape[1]), # type: ignore
163 |                 int(self.input_shape[0]), # type: ignore
164 |             )
165 |         )
166 |         resized_image = np.divide(resized_image, 255.0) # type: ignore
167 |         resized_image = resized_image.transpose(swap)
168 |         resized_image = np.ascontiguousarray(resized_image, dtype=np.float32)
169 |         return resized_image
170 | 
171 | 
172 |     def __postprocess(
173 |         self,
174 |         image: np.ndarray,
175 |         boxes: np.ndarray,
176 |     ) -> Tuple[np.ndarray, np.ndarray]:
177 |         """__postprocess
178 | 
179 |         Parameters
180 |         ----------
181 |         image: np.ndarray
182 |             Entire image.
183 | 
184 |         boxes: np.ndarray
185 |             (boxcount, 5) = (boxcount, x1y1x2y2score)
186 | 
187 |         Returns
188 |         -------
189 |         faceboxes: np.ndarray
190 |             Predicted face boxes: [facecount, x1, y1, x2, y2]
191 | 
192 |         facescores: np.ndarray
193 |             Predicted face box confs: [facecount, score]
194 |         """
195 |         image_height = image.shape[0]
196 |         image_width = image.shape[1]
197 | 
198 |         scores = boxes[:,4]
199 |         keep_idxs = scores > self.class_score_th
200 |         boxes_keep = boxes[keep_idxs, :]
201 | 
202 |         faceboxes = []
203 |         facescores = []
204 | 
205 |         if len(boxes_keep) > 0:
206 |             boxes_keep[:, 0] = boxes_keep[:, 0] * image_width
207 |             boxes_keep[:, 1] = boxes_keep[:, 1] * image_height
208 |             boxes_keep[:, 2] = boxes_keep[:, 2] * image_width
209 |             boxes_keep[:, 3] = boxes_keep[:, 3] * image_height
210 | 
211 |             for box in boxes_keep:
212 |                 x_min = int(box[0]) if int(box[0]) > 0 else 0
213 |                 y_min = int(box[1]) if int(box[1]) > 0 else 0
214 |                 x_max = int(box[2]) if int(box[2]) < image_width else image_width
215 |                 y_max = int(box[3]) if int(box[3]) < image_height else image_height
216 |                 score = box[4]
217 | 
218 |                 faceboxes.append(
219 |                     [
220 |                         x_min,
221 |                         y_min,
222 |                         x_max,
223 |                         y_max,
224 |                     ]
225 |                 )
226 |                 facescores.append(
227 |                     [
228 |                         score
229 |                     ]
230 |                 )
231 | 
232 |         return np.asarray(faceboxes), np.asarray(facescores) # type: ignore
233 | 
234 | 
235 | class YOLOv7ONNX(object):
236 |     def __init__(
237 |         self,
238 |         model_path: Optional[str] = 'yolov7_tiny_head_0.752_post_480x640.onnx',
239 |         class_score_th: Optional[float] = 0.30,
240 |         providers: Optional[List] = [
241 |             # (
242 |             #     'TensorrtExecutionProvider', {
243 |             #         'trt_engine_cache_enable': True,
244 |             #         'trt_engine_cache_path': '.',
245 |             #         'trt_fp16_enable': True,
246 |             #     }
247 |             # ),
248 |             'CUDAExecutionProvider',
249 |             'CPUExecutionProvider',
250 |         ],
251 |     ):
252 |         """YOLOv7ONNX
253 |         Parameters
254 |         ----------
255 |         model_path: Optional[str]
256 |             ONNX file path for YOLOv7
257 |         class_score_th: Optional[float]
258 |         class_score_th: Optional[float]
259 |             Score threshold. Default: 0.30
260 |         providers: Optional[List]
261 |             Name of onnx execution providers
262 |             Default:
263 |             [
264 |                 (
265 |                     'TensorrtExecutionProvider', {
266 |                         'trt_engine_cache_enable': True,
267 |                         'trt_engine_cache_path': '.',
268 |                         'trt_fp16_enable': True,
269 |                     }
270 |                 ),
271 |                 'CUDAExecutionProvider',
272 |                 'CPUExecutionProvider',
273 |             ]
274 |         """
275 |         # Threshold
276 |         self.class_score_th = class_score_th
277 | 
278 |         # Model loading
279 |         session_option = onnxruntime.SessionOptions()
280 |         session_option.log_severity_level = 3
281 |         self.onnx_session = onnxruntime.InferenceSession(
282 |             model_path,
283 |             sess_options=session_option,
284 |             providers=providers,
285 |         )
286 |         self.providers = self.onnx_session.get_providers()
287 | 
288 |         self.input_shapes = [
289 |             input.shape for input in self.onnx_session.get_inputs()
290 |         ]
291 |         self.input_names = [
292 |             input.name for input in self.onnx_session.get_inputs()
293 |         ]
294 |         self.output_names = [
295 |             output.name for output in self.onnx_session.get_outputs()
296 |         ]
297 | 
298 | 
299 |     def __call__(
300 |         self,
301 |         image: np.ndarray,
302 |     ) -> Tuple[np.ndarray, np.ndarray]:
303 |         """YOLOv7ONNX
304 |         Parameters
305 |         ----------
306 |         image: np.ndarray
307 |             Entire image
308 |         Returns
309 |         -------
310 |         face_boxes: np.ndarray
311 |             Predicted face boxes: [facecount, y1, x1, y2, x2]
312 |         face_scores: np.ndarray
313 |             Predicted face box scores: [facecount, score]
314 |         """
315 |         temp_image = copy.deepcopy(image)
316 | 
317 |         # PreProcess
318 |         resized_image = self.__preprocess(
319 |             temp_image,
320 |         )
321 | 
322 |         # Inference
323 |         inferece_image = np.asarray([resized_image], dtype=np.float32)
324 |         scores, boxes = self.onnx_session.run(
325 |             self.output_names,
326 |             {input_name: inferece_image for input_name in self.input_names},
327 |         )
328 | 
329 |         # PostProcess
330 |         face_boxes, face_scores = self.__postprocess(
331 |             image=temp_image,
332 |             scores=scores,
333 |             boxes=boxes,
334 |         )
335 | 
336 |         return face_boxes, face_scores
337 | 
338 | 
339 |     def __preprocess(
340 |         self,
341 |         image: np.ndarray,
342 |         swap: Optional[Tuple[int,int,int]] = (2, 0, 1),
343 |     ) -> np.ndarray:
344 |         """__preprocess
345 |         Parameters
346 |         ----------
347 |         image: np.ndarray
348 |             Entire image
349 |         swap: tuple
350 |             HWC to CHW: (2,0,1)
351 |             CHW to HWC: (1,2,0)
352 |             HWC to HWC: (0,1,2)
353 |             CHW to CHW: (0,1,2)
354 |         Returns
355 |         -------
356 |         resized_image: np.ndarray
357 |             Resized and normalized image.
358 |         """
359 |         # Normalization + BGR->RGB
360 |         resized_image = cv2.resize(
361 |             image,
362 |             (
363 |                 int(self.input_shapes[0][3]),
364 |                 int(self.input_shapes[0][2]),
365 |             )
366 |         )
367 |         resized_image = np.divide(resized_image, 255.0)
368 |         resized_image = resized_image[..., ::-1]
369 |         resized_image = resized_image.transpose(swap)
370 |         resized_image = np.ascontiguousarray(
371 |             resized_image,
372 |             dtype=np.float32,
373 |         )
374 |         return resized_image
375 | 
376 | 
377 |     def __postprocess(
378 |         self,
379 |         image: np.ndarray,
380 |         scores: np.ndarray,
381 |         boxes: np.ndarray,
382 |     ) -> Tuple[np.ndarray, np.ndarray]:
383 |         """__postprocess
384 |         Parameters
385 |         ----------
386 |         image: np.ndarray
387 |             Entire image.
388 |         scores: np.ndarray
389 |             float32[N, 1]
390 |         boxes: np.ndarray
391 |             int64[N, 6]
392 |         Returns
393 |         -------
394 |         faceboxes: np.ndarray
395 |             Predicted face boxes: [facecount, y1, x1, y2, x2]
396 |         facescores: np.ndarray
397 |             Predicted face box confs: [facecount, score]
398 |         """
399 |         image_height = image.shape[0]
400 |         image_width = image.shape[1]
401 | 
402 |         """
403 |         Head Detector is
404 |             N -> Number of boxes detected
405 |             batchno -> always 0: BatchNo.0
406 |             classid -> always 0: "Head"
407 |         scores: float32[N,1],
408 |         batchno_classid_y1x1y2x2: int64[N,6],
409 |         """
410 |         scores = scores
411 |         keep_idxs = scores[:, 0] > self.class_score_th
412 |         scores_keep = scores[keep_idxs, :]
413 |         boxes_keep = boxes[keep_idxs, :]
414 |         faceboxes = []
415 |         facescores = []
416 | 
417 |         if len(boxes_keep) > 0:
418 |             for box, score in zip(boxes_keep, scores_keep):
419 |                 x_min = max(int(box[3]), 0)
420 |                 y_min = max(int(box[2]), 0)
421 |                 x_max = min(int(box[5]), image_width)
422 |                 y_max = min(int(box[4]), image_height)
423 | 
424 |                 faceboxes.append(
425 |                     [x_min, y_min, x_max, y_max]
426 |                 )
427 |                 facescores.append(
428 |                     score
429 |                 )
430 | 
431 |         return np.asarray(faceboxes), np.asarray(facescores)
432 | 
433 | 
434 | def txt_line(cls, bbox, img_w, img_h):
435 |     """Generate 1 line in the txt file."""
436 |     x, y, w, h = bbox
437 |     x = max(int(x), 0)
438 |     y = max(int(y), 0)
439 |     w = min(int(w), img_w - x)
440 |     h = min(int(h), img_h - y)
441 |     w_rescaled = float(w) * INPUT_WIDTH  / img_w
442 |     h_rescaled = float(h) * INPUT_HEIGHT / img_h
443 |     if w_rescaled < MIN_W or h_rescaled < MIN_H:
444 |         return ''
445 |     else:
446 |         if DO_KMEANS:
447 |             global BBOX_WHS
448 |             BBOX_WHS.append((w_rescaled, h_rescaled))
449 |         cx = (x + w / 2.) / img_w
450 |         cy = (y + h / 2.) / img_h
451 |         nw = float(w) / img_w
452 |         nh = float(h) / img_h
453 |         return f'{int(cls)} {cx:.6f} {cy:.6f} {nw:.6f} {nh:.6f}\n'
454 | 
455 | 
456 | def process(set_, data_list, output_dir, model):
457 |     """Process either 'train' or 'test' set."""
458 |     jpgs = []
459 |     raw_anno_count = 0
460 |     print(f'** Processing Sets: {set_}')
461 |     for image_file_path in tqdm(data_list, dynamic_ncols=True):
462 |         image = cv2.imread(image_file_path)
463 |         img_h, img_w, img_c = image.shape
464 |         basename = os.path.basename(image_file_path)
465 |         basename_without_ext = os.path.splitext(basename)[0]
466 |         txt_path = output_dir / (f'{basename_without_ext}.txt')
467 | 
468 |         # inference
469 |         face_boxes, face_scores = model(image)
470 |         if len(face_boxes) == 1:
471 |             line_count = 0
472 |             with open(txt_path.as_posix(), 'w') as ftxt:
473 |                 for face_box, face_score in zip(face_boxes, face_scores):
474 |                     x_min = int(face_box[0])
475 |                     y_min = int(face_box[1])
476 |                     x_max = int(face_box[2])
477 |                     y_max = int(face_box[3])
478 | 
479 |                     # add margin
480 |                     y_min = int(max(0, y_min - abs(y_min - y_max) / 17))
481 |                     y_max = int(min(img_h, y_max + abs(y_min - y_max) / 17))
482 |                     x_min = int(max(0, x_min - abs(x_min - x_max) / 7))
483 |                     x_max = min(img_w, x_max + abs(x_min - x_max) / 7)
484 |                     x_max = int(min(x_max, img_w))
485 |                     w = int(x_max - x_min)
486 |                     h = int(y_max - y_min)
487 |                     bbox = [x_min, y_min, w, h]
488 | 
489 |                     line = txt_line(0, bbox, img_w, img_h)
490 |                     if line:
491 |                         ftxt.write(line)
492 |                         line_count += 1
493 | 
494 |             if line_count > 0:
495 |                 jpgs.append(f'{output_dir}/{basename_without_ext}.jpg')
496 |                 cv2.imwrite(f'{output_dir}/{basename_without_ext}.jpg', image)
497 |                 raw_anno_count += 1
498 | 
499 | 
500 |     print(f'** Processed Images: {raw_anno_count}')
501 |     # write the 'data/300wlp-{args.dim}/train.txt' or 'data/300wlp-{args.dim}/test.txt'
502 |     set_path = output_dir / (f'{set_}.txt')
503 |     with open(set_path.as_posix(), 'w') as fset:
504 |         for jpg in jpgs:
505 |             fset.write(f'{jpg}\n')
506 | 
507 | 
508 | def rm_txts(output_dir):
509 |     """Remove txt files in output_dir."""
510 |     for txt in output_dir.glob('*.txt'):
511 |         if txt.is_file():
512 |             txt.unlink()
513 | 
514 | 
515 | def main():
516 |     global INPUT_WIDTH, INPUT_HEIGHT
517 | 
518 |     parser = ArgumentParser()
519 |     parser.add_argument(
520 |         '-y',
521 |         '--yolo_mode',
522 |         type=str,
523 |         default='yolov4',
524 |         choices=['yolov4', 'yolov7']
525 |     )
526 |     parser.add_argument(
527 |         '-i',
528 |         '--image_folder_path',
529 |         type=str,
530 |         default='300W_LP_onlyone_person_yolov4_filterd',
531 |     )
532 |     parser.add_argument(
533 |         '-d',
534 |         '--dim',
535 |         type=str,
536 |         default='640x480',
537 |         help='input width and height, e.g. 640x480'
538 |     )
539 |     args = parser.parse_args()
540 | 
541 |     yolo_mode = args.yolo_mode
542 | 
543 |     model = None
544 |     if yolo_mode == 'yolov4':
545 |         model = YOLOv4ONNX(
546 |             model_path='yolov4_headdetection_480x640_post.onnx',
547 |             class_score_th=0.80,
548 |         )
549 |     elif yolo_mode == 'yolov7':
550 |         model = YOLOv7ONNX(
551 |             model_path='yolov7_tiny_head_0.752_post_480x640.onnx',
552 |             # class_score_th=0.90,
553 |         )
554 | 
555 |     dim_split = args.dim.split('x')
556 |     if len(dim_split) != 2:
557 |         raise SystemExit(f'ERROR: bad spec of input dim ({args.dim})')
558 |     INPUT_WIDTH, INPUT_HEIGHT = int(dim_split[0]), int(dim_split[1])
559 |     if INPUT_WIDTH % 32 != 0 or INPUT_HEIGHT % 32 != 0:
560 |         raise SystemExit(f'ERROR: bad spec of input dim ({args.dim})')
561 | 
562 |     output_dir = Path(f'data/300wlp-{args.dim}')
563 |     output_dir.mkdir(parents=True, exist_ok=True)
564 |     rm_txts(output_dir)
565 | 
566 |     # Train:Test = 0.95:0.05
567 |     image_files = natsorted(glob.glob(f"{args.image_folder_path}/*/*.jpg"))
568 |     train_list, test_list = train_test_split(
569 |         image_files,
570 |         test_size=0.05,
571 |         train_size=0.95,
572 |         random_state=1,
573 |     )
574 | 
575 | 
576 |     process(set_='test', data_list=test_list, output_dir=output_dir, model=model)
577 |     process(set_='train', data_list=train_list, output_dir=output_dir, model=model)
578 | 
579 | 
580 | if __name__ == "__main__":
581 |     main()


--------------------------------------------------------------------------------