├── ibug ├── face_alignment │ ├── fan │ │ ├── weights │ │ │ └── .gitkeep │ │ ├── __init__.py │ │ ├── fan.py │ │ ├── fan_custom.py │ │ └── fan_predictor.py │ ├── __init__.py │ └── utils.py └── face_detection │ ├── retina_face │ ├── weights │ │ └── .gitkeep │ ├── __init__.py │ ├── config.py │ ├── py_cpu_nms.py │ ├── prior_box.py │ ├── retina_face.py │ ├── retina_face_net.py │ ├── retina_face_predictor.py │ └── box_utils.py │ ├── __init__.py │ └── utils │ ├── __init__.py │ ├── data │ └── bfm_lms.npy │ ├── head_pose_estimator.py │ └── simple_face_tracker.py ├── mask_images ├── 02_cloth.png ├── mask_pts.pkl ├── 04_surgical_blue.png ├── 03_surgical_white.png └── 01_surgical_light_blue.png ├── 02_pickle_data_train_list_copy.py ├── .gitignore ├── README.md ├── LICENSE ├── 00_pickle_data_sort.py ├── 01_data_cleaning.py ├── folder_split.py ├── verify_txts.py ├── create_filename_list.py ├── 10_create_masked_face_dataset_yolo_test_only_one_person.py ├── create_masked_face_dataset_yolo.py ├── create_masked_face_dataset_6drepnet.py ├── FaceMasking.py ├── 11_create_masked_face_dataset_yolo_test_yolov4_filter.py └── 12_create_masked_face_dataset_yolo_test_yolov4_annotation.py /ibug/face_alignment/fan/weights/.gitkeep: -------------------------------------------------------------------------------- 1 | 2dfan4.pth -------------------------------------------------------------------------------- /ibug/face_detection/retina_face/weights/.gitkeep: -------------------------------------------------------------------------------- 1 | mobilenet0.25_Final.pth 2 | Resnet50_Final.pth -------------------------------------------------------------------------------- /ibug/face_alignment/fan/__init__.py: -------------------------------------------------------------------------------- 1 | from ibug.face_alignment.fan.fan_predictor import FANPredictor 2 | -------------------------------------------------------------------------------- /mask_images/02_cloth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/mask_images/02_cloth.png -------------------------------------------------------------------------------- /mask_images/mask_pts.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/mask_images/mask_pts.pkl -------------------------------------------------------------------------------- /ibug/face_alignment/__init__.py: -------------------------------------------------------------------------------- 1 | from ibug.face_alignment.fan import FANPredictor 2 | 3 | 4 | __version__ = '0.1.0' 5 | -------------------------------------------------------------------------------- /ibug/face_detection/retina_face/__init__.py: -------------------------------------------------------------------------------- 1 | from ibug.face_detection.retina_face.retina_face_predictor import RetinaFacePredictor 2 | -------------------------------------------------------------------------------- /mask_images/04_surgical_blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/mask_images/04_surgical_blue.png -------------------------------------------------------------------------------- /ibug/face_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from ibug.face_detection.retina_face import RetinaFacePredictor 2 | 3 | 4 | __version__ = '0.1.0' 5 | -------------------------------------------------------------------------------- /mask_images/03_surgical_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/mask_images/03_surgical_white.png -------------------------------------------------------------------------------- /ibug/face_detection/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from head_pose_estimator import HeadPoseEstimator 2 | from simple_face_tracker import SimpleFaceTracker 3 | -------------------------------------------------------------------------------- /mask_images/01_surgical_light_blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/mask_images/01_surgical_light_blue.png -------------------------------------------------------------------------------- /ibug/face_detection/utils/data/bfm_lms.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PINTO0309/Face_Mask_Augmentation/HEAD/ibug/face_detection/utils/data/bfm_lms.npy -------------------------------------------------------------------------------- /02_pickle_data_train_list_copy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | 4 | with open('3dmm_data/new_param_all_norm_v201.pkl', 'rb') as p: 5 | param_all_norm_v201 = pickle.load(p) 6 | print(f'param_all_norm_v201: {len(param_all_norm_v201)}') 7 | 8 | new_param_all_norm_v201 = [] 9 | for val in param_all_norm_v201: 10 | new_param_all_norm_v201.append(val) 11 | new_param_all_norm_v201.append(val) 12 | 13 | new_np_param_all_norm_v201 = np.asarray(new_param_all_norm_v201) 14 | print(new_np_param_all_norm_v201.shape) 15 | 16 | with open('3dmm_data/new_new_param_all_norm_v201.pkl', 'wb') as p: 17 | pickle.dump(new_np_param_all_norm_v201, p) 18 | 19 | # new_np_param_all_norm_v201: 1272504 -> 636252 x2 20 | print(f'new_np_param_all_norm_v201: {len(new_np_param_all_norm_v201)}') 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ibug_face_alignment.egg-info 2 | *train_aug_120x120_part/ 3 | *train_aug_120x120_part_masked/ 4 | *train_aug_120x120_part_masked_clean/ 5 | *300W_LP_croped/ 6 | train_aug_120x120_part_masked_clean.tar.gz 7 | 3dmm_data/ 8 | 3dmm_data.tar.gz 9 | 300W_LP_croped_masked/ 10 | multi_detected.csv 11 | non_detected.csv 12 | __pycache__/ 13 | .vscode/ 14 | 2dfan4.pth 15 | mobilenet0.25_Final.pth 16 | Resnet50_Final.pth 17 | 300W_LP_w_masked/ 18 | 300W_LP_w_no_masked_image_only/ 19 | 300W_LP_w_no_masked_image_only.tar.gz 20 | yolov4_headdetection_480x640_post.onnx 21 | yolov7_tiny_head_0.752_post_480x640.onnx 22 | 300W_LP/ 23 | 300W_LP_onlyone_person/ 24 | 300W_LP_onlyone_person_yolov4_filterd/ 25 | 300wlp-*x*/ 26 | data/ 27 | TensorrtExecutionProvider_* 28 | non_detected.csv 29 | multi_detected.csv -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Face_Mask_Augmentation 2 | Masked Face Image Augmentation Tool for Dataset 300W-LP with 6D Head Pose Information. 3 | 4 | https://user-images.githubusercontent.com/33194443/180627371-2ee52c66-d032-4021-b437-ba6a5d834eb0.mp4 5 | 6 | https://user-images.githubusercontent.com/33194443/177553855-ca0cea66-6453-4000-9977-861717dab1e5.mp4 7 | 8 | # Acknowledgements 9 | 1. https://github.com/hhj1897/face_alignment MIT license 10 | 2. https://github.com/hhj1897/face_detection MIT license 11 | 3. https://github.com/GajuuzZ/FaceNetPytoch-Mask No-License 12 | 4. https://ibug.doc.ic.ac.uk/resources/itwmm/ 3DMM 13 | 5. http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm 300W-LP, AFLW2000 14 | 15 | # References 16 | 1. https://github.com/PINTO0309/DMHead 17 | 18 | https://user-images.githubusercontent.com/33194443/175073709-e9c43655-27a9-4760-a38c-768dabe33c1f.mp4 19 | -------------------------------------------------------------------------------- /ibug/face_detection/retina_face/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | 3 | cfg_mnet = { 4 | 'name': 'mobilenet0.25', 5 | 'min_sizes': [[16, 32], [64, 128], [256, 512]], 6 | 'steps': [8, 16, 32], 7 | 'variance': [0.1, 0.2], 8 | 'clip': False, 9 | 'loc_weight': 2.0, 10 | 'gpu_train': True, 11 | 'batch_size': 32, 12 | 'ngpu': 1, 13 | 'epoch': 250, 14 | 'decay1': 190, 15 | 'decay2': 220, 16 | 'image_size': 640, 17 | 'pretrain': False, 18 | 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3}, 19 | 'in_channel': 32, 20 | 'out_channel': 64 21 | } 22 | 23 | cfg_re50 = { 24 | 'name': 'Resnet50', 25 | 'min_sizes': [[16, 32], [64, 128], [256, 512]], 26 | 'steps': [8, 16, 32], 27 | 'variance': [0.1, 0.2], 28 | 'clip': False, 29 | 'loc_weight': 2.0, 30 | 'gpu_train': True, 31 | 'batch_size': 24, 32 | 'ngpu': 4, 33 | 'epoch': 100, 34 | 'decay1': 70, 35 | 'decay2': 90, 36 | 'image_size': 840, 37 | 'pretrain': False, 38 | 'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3}, 39 | 'in_channel': 256, 40 | 'out_channel': 256 41 | } 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Katsuya Hyodo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /00_pickle_data_sort.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import copy 3 | import itertools 4 | import numpy as np 5 | from natsort import natsorted 6 | 7 | with open('3dmm_data/param_all_norm_v201.pkl', 'rb') as p: 8 | param_all_norm_v201 = pickle.load(p) 9 | 10 | with open('3dmm_data/train_aug_120x120.list.train') as f: 11 | train_list = f.read().splitlines() 12 | 13 | param_all_norm_v201_list = param_all_norm_v201.tolist() 14 | tmp_list = copy.deepcopy(param_all_norm_v201_list) 15 | 16 | for idx in range(len(param_all_norm_v201_list)): 17 | tmp_list[idx].append(train_list[idx]) 18 | 19 | train_list_sorted = natsorted(tmp_list, key=lambda x: x[102]) 20 | sorted_param_all_norm_v201_list = [val[0:102] for val in train_list_sorted] 21 | sorted_train_list = [val[102:103] for val in train_list_sorted] 22 | 23 | sorted_param_all_norm_v201_list_np = np.asarray(sorted_param_all_norm_v201_list) 24 | with open('3dmm_data/new_param_all_norm_v201.pkl', 'wb') as p: 25 | pickle.dump(sorted_param_all_norm_v201_list_np, p) 26 | 27 | sorted_train_list = list(itertools.chain.from_iterable(sorted_train_list)) 28 | str_ = '\n'.join(sorted_train_list) 29 | with open('3dmm_data/new_train_aug_120x120.list.train', 'wt') as f: 30 | f.write(str_) 31 | -------------------------------------------------------------------------------- /ibug/face_detection/retina_face/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | 11 | def py_cpu_nms(dets, thresh, top_k): 12 | """Pure Python NMS baseline.""" 13 | x1 = dets[:, 0] 14 | y1 = dets[:, 1] 15 | x2 = dets[:, 2] 16 | y2 = dets[:, 3] 17 | scores = dets[:, 4] 18 | 19 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 20 | order = scores.argsort()[: -top_k - 1: -1] 21 | 22 | keep = [] 23 | while order.size > 0: 24 | i = order[0] 25 | keep.append(i) 26 | xx1 = np.maximum(x1[i], x1[order[1:]]) 27 | yy1 = np.maximum(y1[i], y1[order[1:]]) 28 | xx2 = np.minimum(x2[i], x2[order[1:]]) 29 | yy2 = np.minimum(y2[i], y2[order[1:]]) 30 | 31 | w = np.maximum(0.0, xx2 - xx1 + 1) 32 | h = np.maximum(0.0, yy2 - yy1 + 1) 33 | inter = w * h 34 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 35 | 36 | inds = np.where(ovr <= thresh)[0] 37 | order = order[inds + 1] 38 | 39 | return keep 40 | -------------------------------------------------------------------------------- /01_data_cleaning.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import shutil 4 | from tqdm import tqdm 5 | from natsort import natsorted 6 | 7 | OUTPUT_PATH = 'train_aug_120x120_part_masked_clean' 8 | os.makedirs(OUTPUT_PATH, exist_ok=True) 9 | 10 | file_list1 = natsorted(glob.glob('train_aug_120x120_part/*/*.jpg')) 11 | file_list2 = natsorted(glob.glob('train_aug_120x120_part_masked/*/*.jpg')) 12 | 13 | with open('3dmm_data/new_train_aug_120x120.list.train') as f: 14 | train_list = f.read().splitlines() 15 | print(f'train_list: {len(train_list)}') 16 | 17 | clean_list_count = 0 18 | new_train_list = [] 19 | for file_path1, file_path2 in tqdm(zip(file_list1, file_list2)): 20 | if os.path.basename(file_path1) in train_list: 21 | shutil.copy2(file_path1, OUTPUT_PATH) 22 | new_train_list.append(os.path.basename(file_path1)) 23 | clean_list_count += 1 24 | 25 | if '_'.join(os.path.splitext(os.path.basename(file_path2))[0].split('_')[:-2]) + '.jpg' in train_list: 26 | shutil.copy2(file_path2, OUTPUT_PATH) 27 | new_train_list.append(os.path.basename(file_path2)) 28 | clean_list_count += 1 29 | 30 | # clean_list: 1272504 -> 636252 x2 31 | print(f'clean_list: {clean_list_count}') 32 | 33 | new_train_list = '\n'.join(new_train_list) 34 | with open('3dmm_data/new_new_train_aug_120x120.list.train', 'w') as f: 35 | f.write(new_train_list) 36 | 37 | -------------------------------------------------------------------------------- /folder_split.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import shutil 4 | from tqdm import tqdm 5 | from natsort import natsorted 6 | from argparse import ArgumentParser 7 | 8 | FOLDER_MAX = 2500 9 | 10 | def main(): 11 | parser = ArgumentParser() 12 | parser.add_argument( 13 | '-i', 14 | '--image_folder_path', 15 | type=str, 16 | default='HELEN', 17 | ) 18 | args = parser.parse_args() 19 | 20 | image_files = natsorted(glob.glob(f"{args.image_folder_path}/*.jpg")) 21 | mat_files = natsorted(glob.glob(f"{args.image_folder_path}/*.mat")) 22 | 23 | assert len(image_files) == len(mat_files) 24 | 25 | image_count = 0 26 | for (image_file, mat_file) in tqdm(zip(image_files, mat_files), dynamic_ncols=True): 27 | new_folder_number = image_count // FOLDER_MAX 28 | dirname = os.path.dirname(image_file) 29 | # print(f'@@@ dirname: {dirname} split: {dirname.split("/")}') 30 | new_dirname = f'{args.image_folder_path}_{str(new_folder_number).zfill(2)}' 31 | os.makedirs(new_dirname, exist_ok=True) 32 | 33 | shutil.move(image_file, new_dirname) 34 | shutil.move(mat_file, new_dirname) 35 | 36 | image_count += 1 37 | 38 | 39 | print(f'image_count: {image_count}') 40 | print(f'folder_count: {image_count//FOLDER_MAX+1}') 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /ibug/face_detection/retina_face/prior_box.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from itertools import product as product 3 | from math import ceil 4 | 5 | 6 | class PriorBox(object): 7 | def __init__(self, cfg, image_size=None): 8 | super(PriorBox, self).__init__() 9 | self.min_sizes = cfg['min_sizes'] 10 | self.steps = cfg['steps'] 11 | self.clip = cfg['clip'] 12 | self.image_size = image_size 13 | self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps] 14 | self.name = "s" 15 | 16 | def forward(self): 17 | anchors = [] 18 | for k, f in enumerate(self.feature_maps): 19 | min_sizes = self.min_sizes[k] 20 | for i, j in product(range(f[0]), range(f[1])): 21 | for min_size in min_sizes: 22 | s_kx = min_size / self.image_size[1] 23 | s_ky = min_size / self.image_size[0] 24 | dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]] 25 | dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]] 26 | for cy, cx in product(dense_cy, dense_cx): 27 | anchors += [cx, cy, s_kx, s_ky] 28 | 29 | # back to torch land 30 | output = torch.Tensor(anchors).view(-1, 4) 31 | if self.clip: 32 | output.clamp_(max=1, min=0) 33 | return output 34 | -------------------------------------------------------------------------------- /verify_txts.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | """verify_txts.py 4 | 5 | For verifying correctness of the generated YOLO txt annotations. 6 | """ 7 | import cv2 8 | import random 9 | from pathlib import Path 10 | from argparse import ArgumentParser 11 | 12 | WINDOW_NAME = "verify_txts" 13 | parser = ArgumentParser() 14 | parser.add_argument('--dim', type=str, default='640x480', help='input width and height, e.g. 640x480') 15 | args = parser.parse_args() 16 | 17 | if random.random() < 0.5: 18 | print('Verifying test.txt') 19 | jpgs_path = Path(f'data/300wlp-{args.dim}_masked/test.txt') 20 | else: 21 | print('Verifying train.txt') 22 | jpgs_path = Path(f'data/300wlp-{args.dim}_masked/train.txt') 23 | 24 | with open(jpgs_path.as_posix(), 'r') as f: 25 | jpg_names = [l.strip() for l in f.readlines()] 26 | 27 | random.shuffle(jpg_names) 28 | for jpg_name in jpg_names: 29 | img = cv2.imread(jpg_name) 30 | img_h, img_w, _ = img.shape 31 | txt_name = jpg_name.replace('.jpg', '.txt') 32 | with open(txt_name, 'r') as f: 33 | obj_lines = [l.strip() for l in f.readlines()] 34 | for obj_line in obj_lines: 35 | cls, cx, cy, nw, nh = [float(item) for item in obj_line.split(' ')] 36 | color = (0, 0, 255) if cls == 0.0 else (0, 255, 0) 37 | x_min = int((cx - (nw / 2.0)) * img_w) 38 | y_min = int((cy - (nh / 2.0)) * img_h) 39 | x_max = int((cx + (nw / 2.0)) * img_w) 40 | y_max = int((cy + (nh / 2.0)) * img_h) 41 | cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, 2) 42 | cv2.imshow(WINDOW_NAME, img) 43 | if cv2.waitKey(0) == 27: 44 | break 45 | 46 | cv2.destroyAllWindows() 47 | -------------------------------------------------------------------------------- /create_filename_list.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import numpy as np 4 | import scipy.io as sio 5 | from tqdm import tqdm 6 | 7 | def get_ypr_from_mat(mat_path): 8 | # Get yaw, pitch, roll from .mat annotation. 9 | # They are in radians 10 | mat = sio.loadmat(mat_path) 11 | # [pitch yaw roll tdx tdy tdz scale_factor] 12 | pre_pose_params = mat['Pose_Para'][0] 13 | # Get [pitch, yaw, roll] 14 | pose_params = pre_pose_params[:3] 15 | return pose_params 16 | 17 | def parse_args(): 18 | """Parse input arguments.""" 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument( 21 | '--root_dir', 22 | help='root directory of the datasets files', 23 | default='./datasets/300W_LP', 24 | type=str 25 | ) 26 | parser.add_argument( 27 | '--file_name', 28 | help='Output filename.', 29 | default='files.txt', 30 | type=str 31 | ) 32 | args = parser.parse_args() 33 | 34 | return args 35 | 36 | if __name__ == '__main__': 37 | args = parse_args() 38 | 39 | os.chdir(args.root_dir) 40 | 41 | file_counter = 0 42 | rej_counter = 0 43 | outfile = open(args.file_name, 'w') 44 | 45 | for root, dirs, files in tqdm(os.walk('.'), dynamic_ncols=True): 46 | for f in tqdm(files, dynamic_ncols=True): 47 | if f[-4:] == '.jpg': 48 | mat_path = os.path.join(root, f.replace('.jpg', '.mat')) 49 | # We get the pose in radians 50 | pose = get_ypr_from_mat(mat_path) 51 | # And convert to degrees. 52 | pitch = pose[0] * 180 / np.pi 53 | yaw = pose[1] * 180 / np.pi 54 | roll = pose[2] * 180 / np.pi 55 | 56 | if abs(pitch) <= 99 and abs(yaw) <= 99 and abs(roll) <= 99: 57 | if file_counter > 0: 58 | outfile.write('\n') 59 | outfile.write(root + '/' + f[:-4]) 60 | file_counter += 1 61 | else: 62 | rej_counter += 1 63 | 64 | outfile.close() 65 | print(f'{file_counter} files listed! {rej_counter} files had out-of-range values and kept out of the list!') 66 | -------------------------------------------------------------------------------- /10_create_masked_face_dataset_yolo_test_only_one_person.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import glob 4 | from tqdm import tqdm 5 | from copy import deepcopy 6 | from natsort import natsorted 7 | from argparse import ArgumentParser 8 | from ibug.face_detection import RetinaFacePredictor 9 | 10 | face_detector = RetinaFacePredictor( 11 | threshold=0.8, 12 | device='cuda:0', 13 | model=RetinaFacePredictor.get_model('resnet50') 14 | ) 15 | 16 | morethanone = [] 17 | nondetected = [] 18 | 19 | def main(): 20 | parser = ArgumentParser() 21 | parser.add_argument( 22 | '-i', 23 | '--image_folder_path', 24 | type=str, 25 | default='300W_LP', 26 | ) 27 | args = parser.parse_args() 28 | 29 | image_files = glob.glob(f"{args.image_folder_path}/*/*.jpg") 30 | 31 | image_count = 0 32 | for image_file in tqdm(natsorted(image_files), dynamic_ncols=True): 33 | 34 | dirname = os.path.dirname(image_file) 35 | # print(f'@@@ dirname: {dirname} split: {dirname.split("/")}') 36 | new_dirname = f'{args.image_folder_path}_onlyone_person/{dirname.split("/")[1]}' 37 | os.makedirs(new_dirname, exist_ok=True) 38 | 39 | image = cv2.imread(image_file) 40 | 41 | debug_image = deepcopy(image) 42 | debug_image = debug_image[..., ::-1] 43 | 44 | detected_faces = face_detector(debug_image, rgb=True) 45 | 46 | if len(detected_faces) == 1: 47 | 48 | # for face_box in detected_faces: 49 | # cv2.rectangle( 50 | # image, 51 | # (int(face_box[0]), int(face_box[1])), 52 | # (int(face_box[2]), int(face_box[3])), 53 | # (255,255,255), 54 | # 2, 55 | # ) 56 | # cv2.rectangle( 57 | # image, 58 | # (int(face_box[0]), int(face_box[1])), 59 | # (int(face_box[2]), int(face_box[3])), 60 | # (0,255,0), 61 | # 1, 62 | # ) 63 | # cv2.putText( 64 | # image, 65 | # f'{face_box[4]:.2f}', 66 | # ( 67 | # int(face_box[0]), 68 | # int(face_box[1]-10) if face_box[1]-10 > 0 else 20 69 | # ), 70 | # cv2.FONT_HERSHEY_SIMPLEX, 71 | # 0.7, 72 | # (255, 255, 255), 73 | # 2, 74 | # cv2.LINE_AA, 75 | # ) 76 | # cv2.putText( 77 | # image, 78 | # f'{face_box[4]:.2f}', 79 | # ( 80 | # int(face_box[0]), 81 | # int(face_box[1]-10) if face_box[1]-10 > 0 else 20 82 | # ), 83 | # cv2.FONT_HERSHEY_SIMPLEX, 84 | # 0.7, 85 | # (0, 255, 0), 86 | # 1, 87 | # cv2.LINE_AA, 88 | # ) 89 | 90 | # cv2.imshow("test", image) 91 | 92 | # key = cv2.waitKey(0) 93 | # if key == 27: # ESC 94 | # break 95 | 96 | basename = os.path.basename(image_file) 97 | cv2.imwrite(f'{new_dirname}/{basename}', image) 98 | image_count += 1 99 | 100 | print(f'image_count: {image_count}') 101 | 102 | if __name__ == "__main__": 103 | main() -------------------------------------------------------------------------------- /ibug/face_alignment/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from typing import Optional, Sequence, Tuple 4 | 5 | 6 | __all__ = ['get_landmark_connectivity', 'plot_landmarks'] 7 | 8 | 9 | def get_landmark_connectivity(num_landmarks: int) -> Optional[Sequence[Tuple[int, int]]]: 10 | if num_landmarks == 68: 11 | return ((0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), 12 | (12, 13), (13, 14), (14, 15), (15, 16), (17, 18), (18, 19), (19, 20), (20, 21), (22, 23), (23, 24), 13 | (24, 25), (25, 26), (27, 28), (28, 29), (29, 30), (30, 33), (31, 32), (32, 33), (33, 34), (34, 35), 14 | (36, 37), (37, 38), (38, 39), (40, 41), (41, 36), (42, 43), (43, 44), (44, 45), (45, 46), (46, 47), 15 | (47, 42), (48, 49), (49, 50), (50, 51), (51, 52), (52, 53), (53, 54), (54, 55), (55, 56), (56, 57), 16 | (57, 58), (58, 59), (59, 48), (60, 61), (61, 62), (62, 63), (63, 64), (64, 65), (65, 66), (66, 67), 17 | (67, 60), (39, 40)) 18 | elif num_landmarks == 100: 19 | return ((0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), 20 | (12, 13), (13, 14), (14, 15), (15, 16), (17, 18), (18, 19), (19, 20), (20, 21), (22, 23), (23, 24), 21 | (24, 25), (25, 26), (68, 69), (69, 70), (70, 71), (72, 73), (73, 74), (74, 75), (36, 76), (76, 37), 22 | (37, 77), (77, 38), (38, 78), (78, 39), (39, 40), (40, 79), (79, 41), (41, 36), (42, 80), (80, 43), 23 | (43, 81), (81, 44), (44, 82), (82, 45), (45, 46), (46, 83), (83, 47), (47, 42), (27, 28), (28, 29), 24 | (29, 30), (30, 33), (31, 32), (32, 33), (33, 34), (34, 35), (84, 85), (86, 87), (48, 49), (49, 88), 25 | (88, 50), (50, 51), (51, 52), (52, 89), (89, 53), (53, 54), (54, 55), (55, 90), (90, 56), (56, 57), 26 | (57, 58), (58, 91), (91, 59), (59, 48), (60, 92), (92, 93), (93, 61), (61, 62), (62, 63), (63, 94), 27 | (94, 95), (95, 64), (64, 96), (96, 97), (97, 65), (65, 66), (66, 67), (67, 98), (98, 99), (99, 60), 28 | (17, 68), (21, 71), (22, 72), (26, 75)) 29 | else: 30 | return None 31 | 32 | 33 | def plot_landmarks(image: np.ndarray, landmarks: np.ndarray, landmark_scores: Optional[Sequence[float]] = None, 34 | threshold: float = 0.2, line_colour: Tuple[int, int, int] = (0, 255, 0), 35 | pts_colour: Tuple[int, int, int] = (0, 0, 255), line_thickness: int = 1, pts_radius: int = 1, 36 | landmark_connectivity: Optional[Sequence[Tuple[int, int]]] = None) -> None: 37 | num_landmarks = len(landmarks) 38 | if landmark_scores is None: 39 | landmark_scores = np.full((num_landmarks,), threshold + 1.0, dtype=float) 40 | if landmark_connectivity is None: 41 | landmark_connectivity = get_landmark_connectivity(len(landmarks)) 42 | if landmark_connectivity is not None: 43 | for (idx1, idx2) in landmark_connectivity: 44 | if (idx1 < num_landmarks and idx2 < num_landmarks and 45 | landmark_scores[idx1] >= threshold and landmark_scores[idx2] >= threshold): 46 | cv2.line(image, tuple(landmarks[idx1].astype(int).tolist()), 47 | tuple(landmarks[idx2].astype(int).tolist()), 48 | color=line_colour, thickness=line_thickness, lineType=cv2.LINE_AA) 49 | for landmark, score in zip(landmarks, landmark_scores): 50 | if score >= threshold: 51 | cv2.circle(image, tuple(landmark.astype(int).tolist()), pts_radius, pts_colour, -1) 52 | -------------------------------------------------------------------------------- /ibug/face_detection/utils/head_pose_estimator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import math 4 | import numpy as np 5 | from typing import Optional, Tuple 6 | 7 | 8 | __all__ = ['HeadPoseEstimator'] 9 | 10 | 11 | class HeadPoseEstimator(object): 12 | def __init__(self, mean_shape_path: str = os.path.join(os.path.dirname(__file__), 'data', 'bfm_lms.npy')) -> None: 13 | # Load the 68-point mean shape derived from BFM 14 | mean_shape = np.load(mean_shape_path) 15 | 16 | # Calculate the 5-points mean shape 17 | left_eye = mean_shape[[37, 38, 40, 41]].mean(axis=0) 18 | right_eye = mean_shape[[43, 44, 46, 47]].mean(axis=0) 19 | self._mean_shape_5pts = np.vstack((left_eye, right_eye, mean_shape[[30, 48, 54]])) 20 | 21 | # Flip the y coordinates of the mean shape to match that of the image coordinate system 22 | self._mean_shape_5pts[:, 1] = -self._mean_shape_5pts[:, 1] 23 | 24 | def __call__( 25 | self, 26 | landmarks: np.ndarray, 27 | image_width: int = 0, 28 | image_height: int = 0, 29 | camera_matrix: Optional[np.ndarray] = None, 30 | dist_coeffs: Optional[np.ndarray] = None, 31 | output_preference: int = 0 32 | ) -> Tuple[float, float, float]: 33 | 34 | # Form the camera matrix 35 | if camera_matrix is None: 36 | if image_width <= 0 or image_height <= 0: 37 | raise ValueError( 38 | 'image_width and image_height must be specified when camera_matrix is not given directly') 39 | else: 40 | camera_matrix = np.array([ 41 | [image_width + image_height, 0, image_width / 2.0], 42 | [0, image_width + image_height, image_height / 2.0], 43 | [0, 0, 1], 44 | ], dtype=float) 45 | 46 | # Prepare the landmarks 47 | if landmarks.shape[0] == 68: 48 | landmarks = landmarks[17:] 49 | if landmarks.shape[0] in [49, 51]: 50 | left_eye = landmarks[[20, 21, 23, 24]].mean(axis=0) 51 | right_eye = landmarks[[26, 27, 29, 30]].mean(axis=0) 52 | landmarks = np.vstack((left_eye, right_eye, landmarks[[13, 31, 37]])) 53 | 54 | # Use EPnP to estimate pitch, yaw, and roll 55 | _, rvec, _ = cv2.solvePnP( 56 | self._mean_shape_5pts, 57 | np.expand_dims(landmarks, axis=1), 58 | camera_matrix, 59 | dist_coeffs, 60 | flags=cv2.SOLVEPNP_EPNP 61 | ) 62 | rot_mat, _ = cv2.Rodrigues(rvec) 63 | if 1.0 + rot_mat[2, 0] < 1e-9: 64 | pitch = 0.0 65 | yaw = 90.0 66 | roll = -math.atan2(rot_mat[0, 1], rot_mat[0, 2]) / math.pi * 180.0 67 | elif 1.0 - rot_mat[2, 0] < 1e-9: 68 | pitch = 0.0 69 | yaw = -90.0 70 | roll = math.atan2(-rot_mat[0, 1], -rot_mat[0, 2]) / math.pi * 180.0 71 | else: 72 | pitch = math.atan2(rot_mat[2, 1], rot_mat[2, 2]) / math.pi * 180.0 73 | yaw = -math.asin(rot_mat[2, 0]) / math.pi * 180.0 74 | roll = math.atan2(rot_mat[1, 0], rot_mat[0, 0]) / math.pi * 180.0 75 | 76 | # Respond to output_preference: 77 | # output_preference == 1: limit pitch to the range of -90.0 ~ 90.0 78 | # output_preference == 2: limit yaw to the range of -90.0 ~ 90.0 (already satisfied) 79 | # output_preference == 3: limit roll to the range of -90.0 ~ 90.0 80 | # otherwise: minimise total rotation, min(abs(pitch) + abs(yaw) + abs(roll)) 81 | if output_preference != 2: 82 | alt_pitch = pitch - 180.0 if pitch > 0.0 else pitch + 180.0 83 | alt_yaw = -180.0 - yaw if yaw < 0.0 else 180.0 - yaw 84 | alt_roll = roll - 180.0 if roll > 0.0 else roll + 180.0 85 | if (output_preference == 1 and -90.0 < alt_pitch < 90.0 or 86 | output_preference == 3 and -90.0 < alt_roll < 90.0 or 87 | output_preference not in (1, 2, 3) and 88 | abs(alt_pitch) + abs(alt_yaw) + abs(alt_roll) < abs(pitch) + abs(yaw) + abs(roll)): 89 | pitch, yaw, roll = alt_pitch, alt_yaw, alt_roll 90 | 91 | return -pitch, yaw, roll 92 | -------------------------------------------------------------------------------- /ibug/face_detection/utils/simple_face_tracker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List, Optional 3 | from scipy.optimize import linear_sum_assignment 4 | 5 | 6 | __all__ = ['SimpleFaceTracker'] 7 | 8 | 9 | class SimpleFaceTracker(object): 10 | def __init__(self, iou_threshold: float = 0.4, minimum_face_size: float = 0.0) -> None: 11 | self._iou_threshold = iou_threshold 12 | self._minimum_face_size = minimum_face_size 13 | self._tracklets = [] 14 | self._tracklet_counter = 0 15 | 16 | @property 17 | def iou_threshold(self) -> float: 18 | return self._iou_threshold 19 | 20 | @iou_threshold.setter 21 | def iou_threshold(self, threshold: float) -> None: 22 | self._iou_threshold = threshold 23 | 24 | @property 25 | def minimum_face_size(self) -> float: 26 | return self._minimum_face_size 27 | 28 | @minimum_face_size.setter 29 | def minimum_face_size(self, face_size: float) -> None: 30 | self._minimum_face_size = face_size 31 | 32 | def __call__(self, face_boxes: np.ndarray) -> List[Optional[int]]: 33 | if face_boxes.size <= 0: 34 | self._tracklets = [] 35 | return [] 36 | 37 | # Calculate area of the faces 38 | face_areas = np.abs((face_boxes[:, 2] - face_boxes[:, 0]) * (face_boxes[:, 3] - face_boxes[:, 1])) 39 | 40 | # Prepare tracklets 41 | for tracklet in self._tracklets: 42 | tracklet['tracked'] = False 43 | 44 | # Calculate the distance matrix based on IOU 45 | iou_distance_threshold = np.clip(1.0 - self._iou_threshold, 0.0, 1.0) 46 | min_face_area = max(self._minimum_face_size ** 2, np.finfo(float).eps) 47 | distances = np.full(shape=(face_boxes.shape[0], len(self._tracklets)), 48 | fill_value=2.0 * min(face_boxes.shape[0], len(self._tracklets)), dtype=float) 49 | for row, face_box in enumerate(face_boxes): 50 | if face_areas[row] >= min_face_area: 51 | for col, tracklet in enumerate(self._tracklets): 52 | x_left = max(min(face_box[0], face_box[2]), min(tracklet['bbox'][0], tracklet['bbox'][2])) 53 | y_top = max(min(face_box[1], face_box[3]), min(tracklet['bbox'][1], tracklet['bbox'][3])) 54 | x_right = min(max(face_box[2], face_box[0]), max(tracklet['bbox'][2], tracklet['bbox'][0])) 55 | y_bottom = min(max(face_box[3], face_box[1]), max(tracklet['bbox'][3], tracklet['bbox'][1])) 56 | if x_right <= x_left or y_bottom <= y_top: 57 | distance = 1.0 58 | else: 59 | intersection_area = (x_right - x_left) * (y_bottom - y_top) 60 | distance = 1.0 - intersection_area / float(face_areas[row] + tracklet['area'] - 61 | intersection_area) 62 | if distance <= iou_distance_threshold: 63 | distances[row, col] = distance 64 | 65 | # ID assignment 66 | tracked_ids = [None] * face_boxes.shape[0] 67 | for row, col in zip(*linear_sum_assignment(distances)): 68 | if distances[row, col] <= iou_distance_threshold: 69 | tracked_ids[row] = self._tracklets[col]['id'] 70 | self._tracklets[col]['bbox'] = face_boxes[row, :4].copy() 71 | self._tracklets[col]['area'] = face_areas[row] 72 | self._tracklets[col]['tracked'] = True 73 | 74 | # Remove expired tracklets 75 | self._tracklets = [x for x in self._tracklets if x['tracked']] 76 | 77 | # Register new faces 78 | for idx, face_box in enumerate(face_boxes): 79 | if face_areas[idx] >= min_face_area and tracked_ids[idx] is None: 80 | self._tracklet_counter += 1 81 | self._tracklets.append({'bbox': face_box[:4].copy(), 'area': face_areas[idx], 82 | 'id': self._tracklet_counter, 'tracked': True}) 83 | tracked_ids[idx] = self._tracklets[-1]['id'] 84 | 85 | return tracked_ids 86 | 87 | def reset(self, reset_tracklet_counter: bool = True) -> None: 88 | self._tracklets = [] 89 | if reset_tracklet_counter: 90 | self._tracklet_counter = 0 91 | -------------------------------------------------------------------------------- /create_masked_face_dataset_yolo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import pandas as pd 4 | import numpy as np 5 | import glob 6 | import random 7 | random.seed(0) 8 | import shutil 9 | from tqdm import tqdm 10 | from natsort import natsorted 11 | from FaceMasking import FaceMasker 12 | from ibug.face_detection import RetinaFacePredictor 13 | from ibug.face_alignment import FANPredictor 14 | 15 | face_detector = RetinaFacePredictor( 16 | threshold=0.8, 17 | device='cuda:0', 18 | model=RetinaFacePredictor.get_model('resnet50') 19 | ) 20 | landmark_detector = FANPredictor( 21 | device='cuda:0', 22 | model=FANPredictor.get_model('2dfan4') 23 | ) 24 | 25 | 26 | source_folder = 'data/300wlp-640x480' 27 | output_folder = 'data/300wlp-640x480_masked' 28 | 29 | if not os.path.exists(output_folder): 30 | os.makedirs(output_folder, exist_ok=True) 31 | 32 | masker = FaceMasker() 33 | 34 | less_filfol = [] 35 | morethanone = [] 36 | nondetected = [] 37 | 38 | image_files = natsorted(glob.glob(f"{source_folder}/*.jpg")) 39 | text_files = natsorted(glob.glob(f"{source_folder}/*.txt")) 40 | 41 | train_txt = '' 42 | test_txt = '' 43 | 44 | if f'{source_folder}/train.txt' in text_files: 45 | text_files.remove(f'{source_folder}/train.txt') 46 | with open(glob.glob(f"{source_folder}/train.txt")[0], 'r') as f: 47 | train_txt = [l.strip() for l in f.readlines()] 48 | 49 | if f'{source_folder}/test.txt' in text_files: 50 | text_files.remove(f'{source_folder}/test.txt') 51 | with open(glob.glob(f"{source_folder}/test.txt")[0], 'r') as f: 52 | test_txt = [l.strip() for l in f.readlines()] 53 | 54 | if f'{source_folder}/val.txt' in text_files: 55 | text_files.remove(f'{source_folder}/val.txt') 56 | 57 | assert len(image_files) == len(text_files), \ 58 | f"len(image_files) != len(text_files): {len(image_files)} {len(text_files)}" 59 | 60 | 61 | output_train_txt_list = [] 62 | output_test_txt_list = [] 63 | 64 | for j, (image_file, text_file) in tqdm(enumerate(zip(image_files, text_files))): 65 | image_basename = os.path.basename(image_file) 66 | image_basename_without_ext = os.path.splitext(image_basename)[0] 67 | text_basename = os.path.basename(text_file) 68 | text_basename_without_ext = os.path.splitext(text_basename)[0] 69 | assert image_basename_without_ext == text_basename_without_ext, \ 70 | f"image_basename_without_ext != text_basename_without_ext: \ 71 | {image_basename_without_ext} {text_basename_without_ext}" 72 | 73 | # Load image 74 | image = cv2.imread(image_file)[..., ::-1] 75 | width = image.shape[1] 76 | height = image.shape[0] 77 | 78 | detected_faces = face_detector(image, rgb=True) 79 | 80 | if len(detected_faces) == 0: 81 | nondetected.append(image_file) 82 | continue 83 | if len(detected_faces) > 1: 84 | morethanone.append(image_file) 85 | continue 86 | 87 | landmarks, scores = landmark_detector(image, detected_faces, rgb=False) 88 | landmarks = [tuple(landmark.astype(np.int32)) for landmark in landmarks[0]] 89 | 90 | # Save masked-extracted face. 91 | image_mask = masker.wear_mask_to_face(image, landmarks) 92 | face_mask = image_mask[:, :, ::-1] 93 | 94 | cv2.imwrite( 95 | f'{output_folder}/{image_basename_without_ext}_masked.jpg', 96 | face_mask, 97 | ) 98 | shutil.copy( 99 | text_file, 100 | f'{output_folder}/{text_basename_without_ext}_masked.txt', 101 | ) 102 | 103 | if image_file in test_txt: 104 | output_test_txt_list.append(f'{output_folder}/{image_basename_without_ext}_masked.jpg') 105 | else: 106 | output_train_txt_list.append(f'{output_folder}/{image_basename_without_ext}_masked.jpg') 107 | 108 | 109 | set_path = f'{output_folder}/test.txt' 110 | with open(set_path, 'w') as fset: 111 | for jpg in output_test_txt_list: 112 | fset.write(f'{jpg}\n') 113 | 114 | set_path = f'{output_folder}/train.txt' 115 | with open(set_path, 'w') as fset: 116 | for jpg in output_train_txt_list: 117 | fset.write(f'{jpg}\n') 118 | 119 | nondetected = pd.DataFrame(nondetected) 120 | nondetected.to_csv(f'non_detected.csv', header=None, index=None) 121 | morethanone = pd.DataFrame(morethanone) 122 | morethanone.to_csv(f'multi_detected.csv', header=None, index=None) -------------------------------------------------------------------------------- /create_masked_face_dataset_6drepnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import time 4 | import pandas as pd 5 | import numpy as np 6 | import glob 7 | import scipy.io as sio 8 | import random 9 | random.seed(0) 10 | import shutil 11 | from FaceMasking import FaceMasker 12 | from natsort import natsorted 13 | from ibug.face_detection import RetinaFacePredictor 14 | from ibug.face_alignment import FANPredictor 15 | 16 | face_detector = RetinaFacePredictor( 17 | threshold=0.8, 18 | device='cuda:0', 19 | model=RetinaFacePredictor.get_model('resnet50') 20 | ) 21 | landmark_detector = FANPredictor( 22 | device='cuda:0', 23 | model=FANPredictor.get_model('2dfan4') 24 | ) 25 | 26 | 27 | source_folder = '300W_LP_croped' 28 | output_folder = '300W_LP_croped_masked' 29 | image_size = 480 30 | 31 | if not os.path.exists(output_folder): 32 | os.makedirs(output_folder) 33 | 34 | masker = FaceMasker() 35 | 36 | list_fol = os.listdir(source_folder) 37 | num_fol = len(list_fol) 38 | less_filfol = [] 39 | morethanone = [] 40 | nondetected = [] 41 | 42 | st = time.time() 43 | for i, fol in enumerate(list_fol): 44 | print('folder: {}/{}'.format(i+1, num_fol)) 45 | folfil = os.listdir(os.path.join(source_folder, fol)) 46 | num_fil = len(folfil) 47 | if len(folfil) <= 0: 48 | less_filfol.append(fol) 49 | continue 50 | 51 | image_files = glob.glob(f"{os.path.join(source_folder, fol)}/*.jpg") 52 | mat_files = glob.glob(f"{os.path.join(source_folder, fol)}/*.mat") 53 | 54 | for j, (image_file, mat_file) in enumerate(zip(natsorted(image_files), natsorted(mat_files))): 55 | 56 | save_fol = f'{output_folder}/{fol}' 57 | if not os.path.exists(save_fol): 58 | os.makedirs(save_fol) 59 | 60 | print(' file: {}/{}'.format(j+1, num_fil)) 61 | 62 | # Load image 63 | image = cv2.imread(image_file)[:, :, ::-1] 64 | # Load .mat 65 | mat = sio.loadmat(mat_file) 66 | # Calculate image size for final crop (for cropping to adjust to size of annotation data) 67 | pt2d = mat['pt2d'] 68 | x_min = min(pt2d[0,:]) 69 | y_min = min(pt2d[1,:]) 70 | x_max = max(pt2d[0,:]) 71 | y_max = max(pt2d[1,:]) 72 | k = 0.20 73 | x_min -= 2 * k * abs(x_max - x_min) 74 | y_min -= 2 * k * abs(y_max - y_min) 75 | x_max += 2 * k * abs(x_max - x_min) 76 | y_max += 0.6 * k * abs(y_max - y_min) 77 | x_min = max(int(x_min), 0) 78 | y_min = max(int(y_min), 0) 79 | x_max = min(int(x_max), int(image_size)) 80 | y_max = min(int(x_max), int(image_size)) 81 | crop_start_x = 0 82 | crop_end_x = int(x_max-x_min) 83 | crop_start_y = 0 84 | crop_end_y = int(y_max-y_min) 85 | width = image.shape[1] 86 | height = image.shape[0] 87 | 88 | detected_faces = face_detector(image, rgb=True) 89 | if len(detected_faces) == 0: 90 | detected_faces = np.asarray([[0,0,image.shape[1],image.shape[0],1.0]]) 91 | landmarks, scores = landmark_detector(image, detected_faces, rgb=False) 92 | landmarks = [tuple(landmark.astype(np.int32)) for landmark in landmarks[0]] 93 | 94 | if len(detected_faces) == 0: 95 | nondetected.append(image_file) 96 | continue 97 | if len(detected_faces) > 1: 98 | morethanone.append(image_file) 99 | continue 100 | 101 | # Save masked-extracted face. 102 | image_mask = masker.wear_mask_to_face(image, landmarks) 103 | face_mask = image_mask[:, :, ::-1] 104 | 105 | cv2.imwrite( 106 | os.path.join(save_fol, os.path.basename(image_file).split('.')[0] + '.jpg'), 107 | image[crop_start_y:crop_end_y, crop_start_x:crop_end_x, :][..., ::-1], 108 | ) 109 | cv2.imwrite( 110 | os.path.join(save_fol, os.path.basename(image_file).split('.')[0] + '_masked.jpg'), 111 | face_mask[crop_start_y:crop_end_y, crop_start_x:crop_end_x, :], 112 | ) 113 | shutil.copy(mat_file, save_fol) 114 | shutil.copy(mat_file, os.path.join(save_fol, os.path.basename(mat_file).split('.')[0] + '_masked.mat')) 115 | 116 | 117 | 118 | elps = time.time() - st 119 | print('time used: %.0f m : %.0f s' % (elps // 60, elps % 60)) 120 | 121 | nondetected = pd.DataFrame(nondetected) 122 | nondetected.to_csv(f'non_detected.csv', header=None, index=None) 123 | morethanone = pd.DataFrame(morethanone) 124 | morethanone.to_csv(f'multi_detected.csv', header=None, index=None) -------------------------------------------------------------------------------- /ibug/face_detection/retina_face/retina_face.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torchvision.models as models 5 | import torchvision.models._utils as _utils 6 | from .retina_face_net import MobileNetV1, FPN, SSH 7 | 8 | 9 | class ClassHead(nn.Module): 10 | def __init__(self, inchannels=512, num_anchors=3): 11 | super(ClassHead, self).__init__() 12 | self.num_anchors = num_anchors 13 | self.conv1x1 = nn.Conv2d(inchannels, self.num_anchors*2, kernel_size=(1, 1), stride=1, padding=0) 14 | 15 | def forward(self, x): 16 | out = self.conv1x1(x) 17 | out = out.permute(0, 2, 3, 1).contiguous() 18 | 19 | return out.view(out.shape[0], -1, 2) 20 | 21 | 22 | class BboxHead(nn.Module): 23 | def __init__(self, inchannels=512, num_anchors=3): 24 | super(BboxHead, self).__init__() 25 | self.conv1x1 = nn.Conv2d(inchannels, num_anchors*4, kernel_size=(1, 1), stride=1,padding=0) 26 | 27 | def forward(self, x): 28 | out = self.conv1x1(x) 29 | out = out.permute(0, 2, 3, 1).contiguous() 30 | 31 | return out.view(out.shape[0], -1, 4) 32 | 33 | 34 | class LandmarkHead(nn.Module): 35 | def __init__(self, inchannels=512, num_anchors=3): 36 | super(LandmarkHead, self).__init__() 37 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10, kernel_size=(1, 1), stride=1, padding=0) 38 | 39 | def forward(self, x): 40 | out = self.conv1x1(x) 41 | out = out.permute(0, 2, 3, 1).contiguous() 42 | 43 | return out.view(out.shape[0], -1, 10) 44 | 45 | 46 | class RetinaFace(nn.Module): 47 | def __init__(self, cfg=None, phase='train'): 48 | """ 49 | :param cfg: Network related settings. 50 | :param phase: train or test. 51 | """ 52 | super(RetinaFace, self).__init__() 53 | self.phase = phase 54 | backbone = None 55 | if cfg['name'] == 'mobilenet0.25': 56 | backbone = MobileNetV1() 57 | if cfg['pretrain']: 58 | raise ValueError('cfg[\'pretrain\'] cannot be set to True for mobilenet0.25') 59 | elif cfg['name'] == 'Resnet50': 60 | backbone = models.resnet50(pretrained=cfg['pretrain']) 61 | 62 | self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers']) 63 | in_channels_stage2 = cfg['in_channel'] 64 | in_channels_list = [ 65 | in_channels_stage2 * 2, 66 | in_channels_stage2 * 4, 67 | in_channels_stage2 * 8, 68 | ] 69 | out_channels = cfg['out_channel'] 70 | self.fpn = FPN(in_channels_list,out_channels) 71 | self.ssh1 = SSH(out_channels, out_channels) 72 | self.ssh2 = SSH(out_channels, out_channels) 73 | self.ssh3 = SSH(out_channels, out_channels) 74 | 75 | self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel']) 76 | self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel']) 77 | self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel']) 78 | 79 | def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2): 80 | classhead = nn.ModuleList() 81 | for i in range(fpn_num): 82 | classhead.append(ClassHead(inchannels, anchor_num)) 83 | return classhead 84 | 85 | def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2): 86 | bboxhead = nn.ModuleList() 87 | for i in range(fpn_num): 88 | bboxhead.append(BboxHead(inchannels, anchor_num)) 89 | return bboxhead 90 | 91 | def _make_landmark_head(self, fpn_num=3, inchannels=64, anchor_num=2): 92 | landmarkhead = nn.ModuleList() 93 | for i in range(fpn_num): 94 | landmarkhead.append(LandmarkHead(inchannels, anchor_num)) 95 | return landmarkhead 96 | 97 | def forward(self, inputs): 98 | out = self.body(inputs) 99 | 100 | # FPN 101 | fpn = self.fpn(out) 102 | 103 | # SSH 104 | feature1 = self.ssh1(fpn[0]) 105 | feature2 = self.ssh2(fpn[1]) 106 | feature3 = self.ssh3(fpn[2]) 107 | features = [feature1, feature2, feature3] 108 | 109 | bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1) 110 | classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1) 111 | ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1) 112 | 113 | if self.phase == 'train': 114 | output = (bbox_regressions, classifications, ldm_regressions) 115 | else: 116 | # output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions) 117 | output = (bbox_regressions, F.softmax(classifications, dim=-1)[...,1], ldm_regressions) 118 | return output 119 | -------------------------------------------------------------------------------- /ibug/face_detection/retina_face/retina_face_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def conv_bn(inp, oup, stride = 1, leaky = 0): 7 | return nn.Sequential( 8 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 9 | nn.BatchNorm2d(oup), 10 | nn.LeakyReLU(negative_slope=leaky, inplace=True) 11 | ) 12 | 13 | 14 | def conv_bn_no_relu(inp, oup, stride): 15 | return nn.Sequential( 16 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 17 | nn.BatchNorm2d(oup), 18 | ) 19 | 20 | 21 | def conv_bn1X1(inp, oup, stride, leaky=0): 22 | return nn.Sequential( 23 | nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False), 24 | nn.BatchNorm2d(oup), 25 | nn.LeakyReLU(negative_slope=leaky, inplace=True) 26 | ) 27 | 28 | 29 | def conv_dw(inp, oup, stride, leaky=0.1): 30 | return nn.Sequential( 31 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 32 | nn.BatchNorm2d(inp), 33 | nn.LeakyReLU(negative_slope=leaky, inplace=True), 34 | 35 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 36 | nn.BatchNorm2d(oup), 37 | nn.LeakyReLU(negative_slope=leaky, inplace=True), 38 | ) 39 | 40 | 41 | class SSH(nn.Module): 42 | def __init__(self, in_channel, out_channel): 43 | super(SSH, self).__init__() 44 | assert out_channel % 4 == 0 45 | leaky = 0 46 | if out_channel <= 64: 47 | leaky = 0.1 48 | self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1) 49 | 50 | self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky) 51 | self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1) 52 | 53 | self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky) 54 | self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1) 55 | 56 | def forward(self, input): 57 | conv3X3 = self.conv3X3(input) 58 | 59 | conv5X5_1 = self.conv5X5_1(input) 60 | conv5X5 = self.conv5X5_2(conv5X5_1) 61 | 62 | conv7X7_2 = self.conv7X7_2(conv5X5_1) 63 | conv7X7 = self.conv7x7_3(conv7X7_2) 64 | 65 | out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1) 66 | out = F.relu(out) 67 | return out 68 | 69 | 70 | class FPN(nn.Module): 71 | def __init__(self,in_channels_list,out_channels): 72 | super(FPN,self).__init__() 73 | leaky = 0 74 | if out_channels <= 64: 75 | leaky = 0.1 76 | self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride=1, leaky=leaky) 77 | self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride=1, leaky=leaky) 78 | self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride=1, leaky=leaky) 79 | 80 | self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky) 81 | self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky) 82 | 83 | def forward(self, input): 84 | # names = list(input.keys()) 85 | input = list(input.values()) 86 | 87 | output1 = self.output1(input[0]) 88 | output2 = self.output2(input[1]) 89 | output3 = self.output3(input[2]) 90 | 91 | up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest") 92 | output2 = output2 + up3 93 | output2 = self.merge2(output2) 94 | 95 | up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest") 96 | output1 = output1 + up2 97 | output1 = self.merge1(output1) 98 | 99 | out = [output1, output2, output3] 100 | return out 101 | 102 | 103 | class MobileNetV1(nn.Module): 104 | def __init__(self): 105 | super(MobileNetV1, self).__init__() 106 | self.stage1 = nn.Sequential( 107 | conv_bn(3, 8, 2, leaky=0.1), # 3 108 | conv_dw(8, 16, 1), # 7 109 | conv_dw(16, 32, 2), # 11 110 | conv_dw(32, 32, 1), # 19 111 | conv_dw(32, 64, 2), # 27 112 | conv_dw(64, 64, 1), # 43 113 | ) 114 | self.stage2 = nn.Sequential( 115 | conv_dw(64, 128, 2), # 43 + 16 = 59 116 | conv_dw(128, 128, 1), # 59 + 32 = 91 117 | conv_dw(128, 128, 1), # 91 + 32 = 123 118 | conv_dw(128, 128, 1), # 123 + 32 = 155 119 | conv_dw(128, 128, 1), # 155 + 32 = 187 120 | conv_dw(128, 128, 1), # 187 + 32 = 219 121 | ) 122 | self.stage3 = nn.Sequential( 123 | conv_dw(128, 256, 2), # 219 +3 2 = 241 124 | conv_dw(256, 256, 1), # 241 + 64 = 301 125 | ) 126 | self.avg = nn.AdaptiveAvgPool2d((1,1)) 127 | self.fc = nn.Linear(256, 1000) 128 | 129 | def forward(self, x): 130 | x = self.stage1(x) 131 | x = self.stage2(x) 132 | x = self.stage3(x) 133 | x = self.avg(x) 134 | # x = self.model(x) 135 | x = x.view(-1, 256) 136 | x = self.fc(x) 137 | return x 138 | -------------------------------------------------------------------------------- /FaceMasking.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import pickle 4 | import numpy as np 5 | from PIL import Image 6 | import random 7 | random.seed(0) 8 | 9 | MASK_PTS_FILE = './mask_images/mask_pts.pkl' 10 | TRI_MASK_IDX = [ 11 | [0, 1, 3], [3, 1, 4], [3, 4, 6], [6, 4, 7], 12 | [4, 7, 8], [4, 5, 8], [1, 5, 4], [1, 2, 5] 13 | ] 14 | DEFAULT_TRI_FACE_IDX = [ 15 | [1, 28, 3], [3, 28, 30], [3, 30, 5], [5, 30, 8], 16 | [30, 8, 11], [30, 13, 11], [28, 13, 30], [28, 15, 13] 17 | ] 18 | DEFAULT_MASK_PTS = np.array([ 19 | (30, 12), (125, 5), (220, 12), (20, 80), (125, 80), 20 | (230, 80), (65, 140), (125, 160), (185, 140) 21 | ]) 22 | 23 | 24 | def get_tri_mask_points(pts_mask, tri_mask_idx): 25 | tri_mask_pts = np.zeros((len(tri_mask_idx), 6), dtype=np.float32) 26 | for i in range(len(tri_mask_idx)): 27 | tri_mask_pts[i] = pts_mask[tri_mask_idx[i]].ravel() 28 | return tri_mask_pts 29 | 30 | 31 | def closest_point(pt, pts): 32 | dist = np.sum((pts - pt) ** 2, axis=1) 33 | return np.argmin(dist), np.min(dist) 34 | 35 | 36 | def create_mask_mark(png_image): 37 | create_mask_mark.done = False 38 | create_mask_mark.current = (0, 0) 39 | create_mask_mark.pts = DEFAULT_MASK_PTS 40 | create_mask_mark.sel_idx = None 41 | window = 'Adjust points' 42 | 43 | def on_mouse(event, x, y, flags, param): 44 | if create_mask_mark.done: 45 | return 46 | 47 | if event == cv2.EVENT_MOUSEMOVE: 48 | if create_mask_mark.sel_idx is not None: 49 | create_mask_mark.pts[create_mask_mark.sel_idx] = (x, y) 50 | elif event == cv2.EVENT_LBUTTONDOWN: 51 | idx, dist = closest_point(np.array((x, y)), create_mask_mark.pts) 52 | if dist < 10: 53 | create_mask_mark.sel_idx = idx 54 | elif event == cv2.EVENT_LBUTTONUP: 55 | create_mask_mark.sel_idx = None 56 | 57 | masks = [] 58 | idx = [] 59 | if os.path.exists(MASK_PTS_FILE): 60 | masks = pickle.load(open(MASK_PTS_FILE, 'rb')) 61 | idx = [i for (i, d) in enumerate(masks) if d['file'] == png_image] 62 | if len(idx) > 0: 63 | create_mask_mark.pts = masks[idx[0]]['pts'] 64 | else: 65 | pass 66 | 67 | img = cv2.imread(png_image, cv2.IMREAD_UNCHANGED) 68 | cv2.imshow(window, img) 69 | cv2.waitKey(1) 70 | cv2.setMouseCallback(window, on_mouse) 71 | print('Press ESC to finish Adjust.') 72 | 73 | while not create_mask_mark.done: 74 | canvas = np.copy(img) 75 | for pt in create_mask_mark.pts: 76 | canvas = cv2.circle(canvas, (pt[0], pt[1]), 4, (0, 255, 0), -1) 77 | 78 | tri_mask_pts = get_tri_mask_points(create_mask_mark.pts, TRI_MASK_IDX) 79 | for tri in tri_mask_pts: 80 | tri = tri.reshape(3, 2) 81 | canvas = cv2.polylines(canvas, [tri.astype(np.int32)], True, (0, 255, 0), 2) 82 | 83 | cv2.imshow(window, canvas) 84 | if cv2.waitKey(50) == 27: 85 | create_mask_mark.done = True 86 | 87 | print('Any KEY to continue.') 88 | cv2.imshow(window, canvas) 89 | cv2.waitKey(0) 90 | cv2.destroyAllWindows() 91 | 92 | if len(idx) > 0: 93 | masks[idx[0]]['pts'] = create_mask_mark.pts 94 | else: 95 | masks.append({'file': png_image, 'pts': create_mask_mark.pts}) 96 | pickle.dump(masks, open(MASK_PTS_FILE, 'wb')) 97 | 98 | 99 | class FaceMasker: 100 | def __init__(self, mask_pts_file=MASK_PTS_FILE): 101 | self.masks_pts_file = mask_pts_file 102 | 103 | self.num_pts = 9 104 | self.tri_mask_idx = TRI_MASK_IDX 105 | self.tri_face_idx = DEFAULT_TRI_FACE_IDX 106 | self.masks = None 107 | self.load_mask() 108 | 109 | def load_mask(self): 110 | masks = pickle.load(open(self.masks_pts_file, 'rb')) 111 | 112 | self.masks = [] 113 | for m in masks: 114 | img = cv2.imread(m['file'], cv2.IMREAD_UNCHANGED) 115 | img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) 116 | self.masks.append( 117 | { 118 | 'img': img, 119 | 'pts': m['pts'], 120 | 'tri': get_tri_mask_points(m['pts'], self.tri_mask_idx) 121 | } 122 | ) 123 | 124 | def get_tri_face_points(self, shape_pts): 125 | tri_face = np.zeros((len(self.tri_face_idx), 6), dtype=np.float32) 126 | for i in range(len(self.tri_face_idx)): 127 | for j in range(3): 128 | pt = shape_pts[self.tri_face_idx[i][j]] 129 | if hasattr(pt, 'x') and hasattr(pt, 'y'): 130 | tri_face[i, [j+j, j+j+1]] = pt.x, pt.y 131 | else: 132 | tri_face[i, [j+j, j+j+1]] = pt[0], pt[1] 133 | return tri_face 134 | 135 | def wear_mask_to_face(self, image, face_shape, mask_idx=None): 136 | if mask_idx is None: 137 | mask_idx = random.randint(0, len(self.masks)-1) 138 | 139 | image_mask = self.masks[mask_idx]['img'] 140 | tri_mask_pts = self.masks[mask_idx]['tri'] 141 | tri_face = self.get_tri_face_points(face_shape) 142 | 143 | image_face = Image.fromarray(image) 144 | for pts1, pts2 in zip(tri_mask_pts, tri_face): 145 | pts1 = pts1.copy().reshape(3, 2) 146 | pts2 = pts2.copy().reshape(3, 2) 147 | 148 | rect1 = cv2.boundingRect(pts1) 149 | pts1[:, 0] = pts1[:, 0] - rect1[0] 150 | pts1[:, 1] = pts1[:, 1] - rect1[1] 151 | 152 | croped_tri_mask = image_mask[rect1[1]:rect1[1]+rect1[3], rect1[0]:rect1[0]+rect1[2]] 153 | 154 | rect2 = cv2.boundingRect(pts2) 155 | pts2[:, 0] = pts2[:, 0] - rect2[0] 156 | pts2[:, 1] = pts2[:, 1] - rect2[1] 157 | 158 | mask_croped = np.zeros((rect2[3], rect2[2]), np.uint8) 159 | cv2.fillConvexPoly(mask_croped, pts2.astype(np.int32), 255) 160 | 161 | M = cv2.getAffineTransform(pts1, pts2) 162 | warped = cv2.warpAffine(croped_tri_mask, M, (rect2[2], rect2[3])) 163 | warped = cv2.bitwise_and(warped, warped, mask=mask_croped) 164 | 165 | warped = Image.fromarray(warped) 166 | image_face.paste(warped, (rect2[0], rect2[1]), warped) 167 | 168 | return np.array(image_face) 169 | 170 | 171 | if __name__ == '__main__': 172 | create_mask_mark('./mask_images/01_surgical_light_blue.png') 173 | create_mask_mark('./mask_images/02_cloth.png') 174 | create_mask_mark('./mask_images/03_surgical_white.png') 175 | create_mask_mark('./mask_images/04_surgical_blue.png') 176 | 177 | masker = FaceMasker() 178 | -------------------------------------------------------------------------------- /ibug/face_detection/retina_face/retina_face_predictor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | from copy import deepcopy 5 | from types import SimpleNamespace 6 | from typing import Union, Optional 7 | from .prior_box import PriorBox 8 | from .py_cpu_nms import py_cpu_nms 9 | from .retina_face import RetinaFace 10 | from .config import cfg_mnet, cfg_re50 11 | from .box_utils import decode, decode_landm 12 | 13 | 14 | __all__ = ['RetinaFacePredictor'] 15 | 16 | 17 | class RetinaFacePredictor(object): 18 | def __init__(self, threshold: float = 0.8, device: Union[str, torch.device] = 'cuda:0', model: Optional[SimpleNamespace] = None, config: Optional[SimpleNamespace] = None) -> None: 19 | self.threshold = threshold 20 | self.device = device 21 | if model is None: 22 | model = RetinaFacePredictor.get_model() 23 | if config is None: 24 | config = RetinaFacePredictor.create_config() 25 | self.config = SimpleNamespace(**model.config.__dict__, **config.__dict__) 26 | self.net = RetinaFace(cfg=self.config.__dict__, phase='test').to(self.device) 27 | pretrained_dict = torch.load(model.weights, map_location=self.device) 28 | if 'state_dict' in pretrained_dict.keys(): 29 | pretrained_dict = {key.split('module.', 1)[-1] if key.startswith('module.') else key: value for key, value in pretrained_dict['state_dict'].items()} 30 | else: 31 | pretrained_dict = {key.split('module.', 1)[-1] if key.startswith('module.') else key: value for key, value in pretrained_dict.items()} 32 | self.net.load_state_dict(pretrained_dict, strict=False) 33 | self.net.eval() 34 | self.priors = None 35 | self.previous_size = None 36 | 37 | @staticmethod 38 | def get_model(name: str = 'resnet50') -> SimpleNamespace: 39 | name = name.lower().strip() 40 | if name == 'resnet50': 41 | return SimpleNamespace(weights=os.path.realpath(os.path.join(os.path.dirname(__file__), 'weights', 'Resnet50_Final.pth')), config=SimpleNamespace(**deepcopy(cfg_re50))) 42 | elif name == 'mobilenet0.25': 43 | return SimpleNamespace(weights=os.path.realpath(os.path.join(os.path.dirname(__file__), 'weights', 'mobilenet0.25_Final.pth')), config=SimpleNamespace(**deepcopy(cfg_mnet))) 44 | else: 45 | raise ValueError('name must be set to either resnet50 or mobilenet0.25') 46 | 47 | @staticmethod 48 | def create_config(top_k: int = 750, conf_thresh: float = 0.02, nms_thresh: float = 0.4, nms_top_k: int = 5000) -> SimpleNamespace: 49 | return SimpleNamespace(top_k=top_k, conf_thresh=conf_thresh, nms_thresh=nms_thresh, nms_top_k=nms_top_k) 50 | 51 | @torch.no_grad() 52 | def __call__(self, image: np.ndarray, rgb: bool = True) -> np.ndarray: 53 | im_height, im_width, _ = image.shape 54 | if rgb: 55 | image = image[..., ::-1] 56 | image = image.astype(int) - np.array([104, 117, 123]) 57 | image = image.transpose(2, 0, 1) 58 | image = torch.from_numpy(image).unsqueeze(0).float().to(self.device) 59 | scale = torch.Tensor([im_width, im_height, im_width, im_height]).to(self.device) 60 | loc, conf, landms = self.net(image) 61 | 62 | # ############################################################################# 63 | # import onnx 64 | # from onnxsim import simplify 65 | # RESOLUTION = [ 66 | # # [192,320], 67 | # # [240,320], 68 | # # [320,480], 69 | # # [360,640], 70 | # [480,640], 71 | # # [720,1280], 72 | # ] 73 | # MODEL = f'retinaface' 74 | # for H, W in RESOLUTION: 75 | # onnx_file = f"{MODEL}_{H}x{W}.onnx" 76 | # x = torch.randn(1, 3, H, W).cuda() 77 | # torch.onnx.export( 78 | # self.net, 79 | # args=(x), 80 | # f=onnx_file, 81 | # opset_version=11, 82 | # input_names = ['input'], 83 | # output_names=['boxes','scores','landmarks'], 84 | # ) 85 | # model_onnx1 = onnx.load(onnx_file) 86 | # model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1) 87 | # onnx.save(model_onnx1, onnx_file) 88 | 89 | # model_onnx2 = onnx.load(onnx_file) 90 | # model_simp, check = simplify(model_onnx2) 91 | # onnx.save(model_simp, onnx_file) 92 | 93 | # # onnx_file = f"{MODEL}_HxW.onnx" 94 | # # x = torch.randn(1, 3, 192, 320).cuda() 95 | # # torch.onnx.export( 96 | # # self.model.module, 97 | # # args=(x), 98 | # # f=onnx_file, 99 | # # opset_version=11, 100 | # # input_names = ['input'], 101 | # # #output_names=['lines','scores'], 102 | # # dynamic_axes={ 103 | # # 'input' : {2: 'height', 3: 'width'}, 104 | # # } 105 | # # ) 106 | # # model_onnx1 = onnx.load(onnx_file) 107 | # # model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1) 108 | # # onnx.save(model_onnx1, onnx_file) 109 | 110 | # import sys 111 | # sys.exit(0) 112 | # ############################################################################# 113 | 114 | 115 | image_size = (im_height, im_width) 116 | if self.priors is None or self.previous_size != image_size: 117 | self.priors = PriorBox(self.config.__dict__, image_size=image_size).forward().to(self.device) 118 | self.previous_size = image_size 119 | prior_data = self.priors.data 120 | boxes = decode(loc.data.squeeze(0), prior_data, self.config.variance) 121 | boxes = boxes * scale 122 | boxes = boxes.cpu().numpy() 123 | # scores = conf.squeeze(0).data.cpu().numpy()[:, 1] 124 | scores = conf.squeeze(0).data.cpu().numpy() 125 | landms = decode_landm(landms.data.squeeze(0), prior_data, self.config.variance) 126 | scale1 = torch.Tensor( 127 | [ 128 | image.shape[3], 129 | image.shape[2], 130 | image.shape[3], 131 | image.shape[2], 132 | image.shape[3], 133 | image.shape[2], 134 | image.shape[3], 135 | image.shape[2], 136 | image.shape[3], 137 | image.shape[2] 138 | ] 139 | ).to(self.device) 140 | landms = landms * scale1 141 | landms = landms.cpu().numpy() 142 | 143 | # ignore low scores 144 | inds = np.where(scores > self.config.conf_thresh)[0] 145 | if len(inds) == 0: 146 | return np.empty(shape=(0, 15), dtype=np.float32) 147 | boxes = boxes[inds] 148 | landms = landms[inds] 149 | scores = scores[inds] 150 | 151 | # do NMS 152 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) 153 | keep = py_cpu_nms(dets, self.config.nms_thresh, self.config.nms_top_k) 154 | dets = dets[keep, :] 155 | landms = landms[keep] 156 | 157 | """ 158 | dets.shape (1, 5) 159 | x1,y1,x2,y2,score 160 | 161 | landms.shape (1, 10) 162 | """ 163 | 164 | # keep top-K 165 | dets = dets[:self.config.top_k, :] 166 | landms = landms[:self.config.top_k, :] 167 | dets = np.concatenate((dets, landms), axis=1) 168 | 169 | # further filter by confidence 170 | inds = np.where(dets[:, 4] >= self.threshold)[0] 171 | if len(inds) == 0: 172 | return np.empty(shape=(0, 15), dtype=np.float32) 173 | else: 174 | return dets[inds] 175 | -------------------------------------------------------------------------------- /ibug/face_alignment/fan/fan.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def conv3x3(in_planes, out_planes, strd=1, padding=1, bias=False): 7 | "3x3 convolution with padding" 8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=strd, padding=padding, bias=bias) 9 | 10 | 11 | class ConvBlock(nn.Module): 12 | def __init__(self, in_planes, out_planes, use_instance_norm): 13 | super(ConvBlock, self).__init__() 14 | self.bn1 = nn.InstanceNorm2d(in_planes) if use_instance_norm else nn.BatchNorm2d(in_planes) 15 | self.conv1 = conv3x3(in_planes, int(out_planes / 2)) 16 | self.bn2 = (nn.InstanceNorm2d(int(out_planes / 2)) if use_instance_norm 17 | else nn.BatchNorm2d(int(out_planes / 2))) 18 | self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4)) 19 | self.bn3 = (nn.InstanceNorm2d(int(out_planes / 4)) if use_instance_norm 20 | else nn.BatchNorm2d(int(out_planes / 4))) 21 | self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4)) 22 | 23 | if in_planes != out_planes: 24 | self.downsample = nn.Sequential(nn.InstanceNorm2d(in_planes) if use_instance_norm 25 | else nn.BatchNorm2d(in_planes), 26 | nn.ReLU(True), 27 | nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, bias=False)) 28 | else: 29 | self.downsample = None 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out1 = self.bn1(x) 35 | out1 = F.relu(out1, True) 36 | out1 = self.conv1(out1) 37 | 38 | out2 = self.bn2(out1) 39 | out2 = F.relu(out2, True) 40 | out2 = self.conv2(out2) 41 | 42 | out3 = self.bn3(out2) 43 | out3 = F.relu(out3, True) 44 | out3 = self.conv3(out3) 45 | 46 | out3 = torch.cat((out1, out2, out3), 1) 47 | 48 | if self.downsample is not None: 49 | residual = self.downsample(residual) 50 | 51 | out3 += residual 52 | 53 | return out3 54 | 55 | 56 | class HourGlass(nn.Module): 57 | def __init__(self, config): 58 | super(HourGlass, self).__init__() 59 | self.config = config 60 | 61 | self._generate_network(self.config.hg_depth) 62 | 63 | def _generate_network(self, level): 64 | self.add_module('b1_' + str(level), ConvBlock(self.config.hg_num_features, 65 | self.config.hg_num_features, 66 | self.config.use_instance_norm)) 67 | 68 | self.add_module('b2_' + str(level), ConvBlock(self.config.hg_num_features, 69 | self.config.hg_num_features, 70 | self.config.use_instance_norm)) 71 | 72 | if level > 1: 73 | self._generate_network(level - 1) 74 | else: 75 | self.add_module('b2_plus_' + str(level),ConvBlock(self.config.hg_num_features, 76 | self.config.hg_num_features, 77 | self.config.use_instance_norm)) 78 | 79 | self.add_module('b3_' + str(level), ConvBlock(self.config.hg_num_features, 80 | self.config.hg_num_features, 81 | self.config.use_instance_norm)) 82 | 83 | def _forward(self, level, inp): 84 | up1 = inp 85 | up1 = self._modules['b1_' + str(level)](up1) 86 | 87 | if self.config.use_avg_pool: 88 | low1 = F.avg_pool2d(inp, 2) 89 | else: 90 | low1 = F.max_pool2d(inp, 2) 91 | low1 = self._modules['b2_' + str(level)](low1) 92 | 93 | if level > 1: 94 | low2 = self._forward(level - 1, low1) 95 | else: 96 | low2 = low1 97 | low2 = self._modules['b2_plus_' + str(level)](low2) 98 | 99 | low3 = low2 100 | low3 = self._modules['b3_' + str(level)](low3) 101 | 102 | up2 = F.interpolate(low3, scale_factor=2, mode='nearest') 103 | 104 | return up1 + up2 105 | 106 | def forward(self, x): 107 | return self._forward(self.config.hg_depth, x) 108 | 109 | 110 | class FAN(nn.Module): 111 | def __init__(self, config): 112 | super(FAN, self).__init__() 113 | self.config = config 114 | 115 | # Stem 116 | self.conv1 = nn.Conv2d(3, 64, kernel_size=self.config.stem_conv_kernel_size, 117 | stride=self.config.stem_conv_stride, 118 | padding=self.config.stem_conv_kernel_size // 2) 119 | self.bn1 = nn.InstanceNorm2d(64) if self.config.use_instance_norm else nn.BatchNorm2d(64) 120 | self.conv2 = ConvBlock(64, 128, self.config.use_instance_norm) 121 | self.conv3 = ConvBlock(128, 128, self.config.use_instance_norm) 122 | self.conv4 = ConvBlock(128, self.config.hg_num_features, self.config.use_instance_norm) 123 | 124 | # Hourglasses 125 | for hg_module in range(self.config.num_modules): 126 | self.add_module('m' + str(hg_module), HourGlass(self.config)) 127 | self.add_module('top_m_' + str(hg_module), ConvBlock(self.config.hg_num_features, 128 | self.config.hg_num_features, 129 | self.config.use_instance_norm)) 130 | self.add_module('conv_last' + str(hg_module), nn.Conv2d(self.config.hg_num_features, 131 | self.config.hg_num_features, 132 | kernel_size=1, stride=1, padding=0)) 133 | self.add_module('bn_end' + str(hg_module), 134 | nn.InstanceNorm2d(self.config.hg_num_features) if self.config.use_instance_norm 135 | else nn.BatchNorm2d(self.config.hg_num_features)) 136 | self.add_module('l' + str(hg_module), nn.Conv2d(self.config.hg_num_features, 137 | self.config.num_landmarks, 138 | kernel_size=1, stride=1, padding=0)) 139 | 140 | if hg_module < self.config.num_modules - 1: 141 | self.add_module('bl' + str(hg_module), nn.Conv2d(self.config.hg_num_features, 142 | self.config.hg_num_features, 143 | kernel_size=1, stride=1, padding=0)) 144 | self.add_module('al' + str(hg_module), nn.Conv2d(self.config.num_landmarks, 145 | self.config.hg_num_features, 146 | kernel_size=1, stride=1, padding=0)) 147 | 148 | def forward(self, x): 149 | x = self.conv2(F.relu(self.bn1(self.conv1(x)), True)) 150 | if self.config.stem_pool_kernel_size > 1: 151 | if self.config.use_avg_pool: 152 | x = F.avg_pool2d(x, self.config.stem_pool_kernel_size) 153 | else: 154 | x = F.max_pool2d(x, self.config.stem_pool_kernel_size) 155 | x = self.conv3(x) 156 | x = self.conv4(x) 157 | 158 | previous = x 159 | hg_feats = [] 160 | tmp_out = None 161 | for i in range(self.config.num_modules): 162 | hg = self._modules['m' + str(i)](previous) 163 | 164 | ll = hg 165 | ll = self._modules['top_m_' + str(i)](ll) 166 | 167 | ll = F.relu(self._modules['bn_end' + str(i)](self._modules['conv_last' + str(i)](ll)), True) 168 | 169 | # Predict heatmaps 170 | tmp_out = self._modules['l' + str(i)](ll) 171 | 172 | if i < self.config.num_modules - 1: 173 | ll = self._modules['bl' + str(i)](ll) 174 | tmp_out_ = self._modules['al' + str(i)](tmp_out) 175 | previous = previous + ll + tmp_out_ 176 | 177 | hg_feats.append(ll) 178 | 179 | return tmp_out, x, tuple(hg_feats) 180 | -------------------------------------------------------------------------------- /ibug/face_alignment/fan/fan_custom.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def conv3x3(in_planes, out_planes, strd=1, padding=1, bias=False): 7 | "3x3 convolution with padding" 8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=strd, padding=padding, bias=bias) 9 | 10 | 11 | class ConvBlock(nn.Module): 12 | def __init__(self, in_planes, out_planes, use_instance_norm): 13 | super(ConvBlock, self).__init__() 14 | self.bn1 = nn.InstanceNorm2d(in_planes) if use_instance_norm else nn.BatchNorm2d(in_planes) 15 | self.conv1 = conv3x3(in_planes, int(out_planes / 2)) 16 | self.bn2 = (nn.InstanceNorm2d(int(out_planes / 2)) if use_instance_norm 17 | else nn.BatchNorm2d(int(out_planes / 2))) 18 | self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4)) 19 | self.bn3 = (nn.InstanceNorm2d(int(out_planes / 4)) if use_instance_norm 20 | else nn.BatchNorm2d(int(out_planes / 4))) 21 | self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4)) 22 | 23 | if in_planes != out_planes: 24 | self.downsample = nn.Sequential(nn.InstanceNorm2d(in_planes) if use_instance_norm 25 | else nn.BatchNorm2d(in_planes), 26 | nn.ReLU(True), 27 | nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, bias=False)) 28 | else: 29 | self.downsample = None 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out1 = self.bn1(x) 35 | out1 = F.relu(out1, True) 36 | out1 = self.conv1(out1) 37 | 38 | out2 = self.bn2(out1) 39 | out2 = F.relu(out2, True) 40 | out2 = self.conv2(out2) 41 | 42 | out3 = self.bn3(out2) 43 | out3 = F.relu(out3, True) 44 | out3 = self.conv3(out3) 45 | 46 | out3 = torch.cat((out1, out2, out3), 1) 47 | 48 | if self.downsample is not None: 49 | residual = self.downsample(residual) 50 | 51 | out3 += residual 52 | 53 | return out3 54 | 55 | 56 | class HourGlass(nn.Module): 57 | def __init__(self, config): 58 | super(HourGlass, self).__init__() 59 | self.config = config 60 | 61 | self._generate_network(self.config.hg_depth) 62 | 63 | def _generate_network(self, level): 64 | self.add_module('b1_' + str(level), ConvBlock(self.config.hg_num_features, 65 | self.config.hg_num_features, 66 | self.config.use_instance_norm)) 67 | 68 | self.add_module('b2_' + str(level), ConvBlock(self.config.hg_num_features, 69 | self.config.hg_num_features, 70 | self.config.use_instance_norm)) 71 | 72 | if level > 1: 73 | self._generate_network(level - 1) 74 | else: 75 | self.add_module('b2_plus_' + str(level),ConvBlock(self.config.hg_num_features, 76 | self.config.hg_num_features, 77 | self.config.use_instance_norm)) 78 | 79 | self.add_module('b3_' + str(level), ConvBlock(self.config.hg_num_features, 80 | self.config.hg_num_features, 81 | self.config.use_instance_norm)) 82 | 83 | def _forward(self, level, inp): 84 | up1 = inp 85 | up1 = self._modules['b1_' + str(level)](up1) 86 | 87 | if self.config.use_avg_pool: 88 | low1 = F.avg_pool2d(inp, 2) 89 | else: 90 | low1 = F.max_pool2d(inp, 2) 91 | low1 = self._modules['b2_' + str(level)](low1) 92 | 93 | if level > 1: 94 | low2 = self._forward(level - 1, low1) 95 | else: 96 | low2 = low1 97 | low2 = self._modules['b2_plus_' + str(level)](low2) 98 | 99 | low3 = low2 100 | low3 = self._modules['b3_' + str(level)](low3) 101 | 102 | up2 = F.interpolate(low3, scale_factor=2, mode='nearest') 103 | 104 | return up1 + up2 105 | 106 | def forward(self, x): 107 | return self._forward(self.config.hg_depth, x) 108 | 109 | 110 | class FAN(nn.Module): 111 | def __init__(self, config): 112 | super(FAN, self).__init__() 113 | self.config = config 114 | 115 | # Stem 116 | self.conv1 = nn.Conv2d(3, 64, kernel_size=self.config.stem_conv_kernel_size, 117 | stride=self.config.stem_conv_stride, 118 | padding=self.config.stem_conv_kernel_size // 2) 119 | self.bn1 = nn.InstanceNorm2d(64) if self.config.use_instance_norm else nn.BatchNorm2d(64) 120 | self.conv2 = ConvBlock(64, 128, self.config.use_instance_norm) 121 | self.conv3 = ConvBlock(128, 128, self.config.use_instance_norm) 122 | self.conv4 = ConvBlock(128, self.config.hg_num_features, self.config.use_instance_norm) 123 | 124 | # Hourglasses 125 | for hg_module in range(self.config.num_modules): 126 | self.add_module('m' + str(hg_module), HourGlass(self.config)) 127 | self.add_module('top_m_' + str(hg_module), ConvBlock(self.config.hg_num_features, 128 | self.config.hg_num_features, 129 | self.config.use_instance_norm)) 130 | self.add_module('conv_last' + str(hg_module), nn.Conv2d(self.config.hg_num_features, 131 | self.config.hg_num_features, 132 | kernel_size=1, stride=1, padding=0)) 133 | self.add_module('bn_end' + str(hg_module), 134 | nn.InstanceNorm2d(self.config.hg_num_features) if self.config.use_instance_norm 135 | else nn.BatchNorm2d(self.config.hg_num_features)) 136 | self.add_module('l' + str(hg_module), nn.Conv2d(self.config.hg_num_features, 137 | self.config.num_landmarks, 138 | kernel_size=1, stride=1, padding=0)) 139 | 140 | if hg_module < self.config.num_modules - 1: 141 | self.add_module('bl' + str(hg_module), nn.Conv2d(self.config.hg_num_features, 142 | self.config.hg_num_features, 143 | kernel_size=1, stride=1, padding=0)) 144 | self.add_module('al' + str(hg_module), nn.Conv2d(self.config.num_landmarks, 145 | self.config.hg_num_features, 146 | kernel_size=1, stride=1, padding=0)) 147 | 148 | def forward(self, x): 149 | x = self.conv2(F.relu(self.bn1(self.conv1(x)), True)) 150 | if self.config.stem_pool_kernel_size > 1: 151 | if self.config.use_avg_pool: 152 | x = F.avg_pool2d(x, self.config.stem_pool_kernel_size) 153 | else: 154 | x = F.max_pool2d(x, self.config.stem_pool_kernel_size) 155 | x = self.conv3(x) 156 | x = self.conv4(x) 157 | 158 | previous = x 159 | hg_feats = [] 160 | tmp_out = None 161 | for i in range(self.config.num_modules): 162 | hg = self._modules['m' + str(i)](previous) 163 | 164 | ll = hg 165 | ll = self._modules['top_m_' + str(i)](ll) 166 | 167 | ll = F.relu(self._modules['bn_end' + str(i)](self._modules['conv_last' + str(i)](ll)), True) 168 | 169 | # Predict heatmaps 170 | tmp_out = self._modules['l' + str(i)](ll) 171 | 172 | if i < self.config.num_modules - 1: 173 | ll = self._modules['bl' + str(i)](ll) 174 | tmp_out_ = self._modules['al' + str(i)](tmp_out) 175 | previous = previous + ll + tmp_out_ 176 | 177 | hg_feats.append(ll) 178 | 179 | # return tmp_out, x, tuple(hg_feats) 180 | return self._decode(tmp_out), tmp_out 181 | 182 | 183 | 184 | def _decode(self, heatmaps: torch.Tensor): 185 | heatmaps = heatmaps.contiguous() 186 | scores = heatmaps.max(dim=3)[0].max(dim=2)[0] 187 | 188 | if (self.config.radius ** 2 * heatmaps.shape[2] * heatmaps.shape[3] < 189 | heatmaps.shape[2] ** 2 + heatmaps.shape[3] ** 2): 190 | # Find peaks in all heatmaps 191 | m = heatmaps.view(heatmaps.shape[0] * heatmaps.shape[1], -1).argmax(1) 192 | # all_peaks = torch.cat( 193 | # [(m / heatmaps.shape[3]).trunc().view(-1, 1), (m % heatmaps.shape[3]).view(-1, 1)], dim=1 194 | # ).reshape((heatmaps.shape[0], heatmaps.shape[1], 1, 1, 2)).repeat( 195 | # 1, 1, heatmaps.shape[2], heatmaps.shape[3], 1).float() 196 | all_peaks = torch.cat( 197 | [torch.div(m, heatmaps.shape[3], rounding_mode="trunc").view(-1, 1), (m % heatmaps.shape[3]).view(-1, 1)], dim=1 198 | ).reshape((heatmaps.shape[0], heatmaps.shape[1], 1, 1, 2)).repeat( 199 | 1, 1, heatmaps.shape[2], heatmaps.shape[3], 1).float() 200 | 201 | 202 | # Apply masks created from the peaks 203 | all_indices = torch.zeros_like(all_peaks) + torch.stack( 204 | [ 205 | torch.arange(0.0, all_peaks.shape[2], device=all_peaks.device).unsqueeze(-1).repeat(1, all_peaks.shape[3]), 206 | torch.arange(0.0, all_peaks.shape[3], device=all_peaks.device).unsqueeze(0).repeat(all_peaks.shape[2], 1) 207 | ], dim=-1) 208 | heatmaps = heatmaps * ((all_indices - all_peaks).norm(dim=-1) <= self.config.radius * 209 | (heatmaps.shape[2] * heatmaps.shape[3]) ** 0.5).float() 210 | 211 | # Prepare the indices for calculating centroids 212 | x_indices = (torch.zeros((*heatmaps.shape[:2], heatmaps.shape[3]), device=heatmaps.device) + torch.arange(0.5, heatmaps.shape[3], device=heatmaps.device)) 213 | y_indices = (torch.zeros(heatmaps.shape[:3], device=heatmaps.device) + torch.arange(0.5, heatmaps.shape[2], device=heatmaps.device)) 214 | 215 | # Finally, find centroids as landmark locations 216 | heatmaps = heatmaps.clamp_min(0.0) 217 | if self.config.gamma != 1.0: 218 | heatmaps = heatmaps.pow(self.config.gamma) 219 | m00s = heatmaps.sum(dim=(2, 3)).clamp_min(torch.finfo(heatmaps.dtype).eps) 220 | xs = heatmaps.sum(dim=2).mul(x_indices).sum(dim=2).div(m00s) 221 | ys = heatmaps.sum(dim=3).mul(y_indices).sum(dim=2).div(m00s) 222 | 223 | lm_info = torch.stack((xs, ys, scores), dim=-1)#.cpu().numpy() 224 | # return lm_info[..., :-1], lm_info[..., -1] 225 | return lm_info -------------------------------------------------------------------------------- /ibug/face_alignment/fan/fan_predictor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import torch 4 | import numpy as np 5 | from types import SimpleNamespace 6 | from typing import Union, Optional, Tuple 7 | # from .fan_custom import FAN 8 | from ibug.face_alignment.fan.fan import FAN 9 | 10 | 11 | __all__ = ['FANPredictor'] 12 | 13 | 14 | class FANPredictor(object): 15 | def __init__(self, device: Union[str, torch.device] = 'cuda:0', model: Optional[SimpleNamespace] = None, 16 | config: Optional[SimpleNamespace] = None) -> None: 17 | self.device = device 18 | if model is None: 19 | model = FANPredictor.get_model() 20 | if config is None: 21 | config = FANPredictor.create_config() 22 | self.config = SimpleNamespace(**model.config.__dict__, **config.__dict__) 23 | self.net = FAN(config=self.config).to(self.device) 24 | self.net.load_state_dict(torch.load(model.weights, map_location=self.device)) 25 | self.net.eval() 26 | if self.config.use_jit: 27 | self.net = torch.jit.trace(self.net, torch.rand(1, 3, self.config.input_size, 28 | self.config.input_size).to(self.device)) 29 | 30 | @staticmethod 31 | def get_model(name: str = '2dfan2') -> SimpleNamespace: 32 | name = name.lower() 33 | if name == '2dfan2': 34 | return SimpleNamespace(weights=os.path.join(os.path.dirname(__file__), 'weights', '2dfan2.pth'), 35 | config=SimpleNamespace(crop_ratio=0.55, input_size=256, num_modules=2, 36 | hg_num_features=256, hg_depth=4, use_avg_pool=False, 37 | use_instance_norm=False, stem_conv_kernel_size=7, 38 | stem_conv_stride=2, stem_pool_kernel_size=2, 39 | num_landmarks=68)) 40 | elif name == '2dfan4': 41 | return SimpleNamespace(weights=os.path.join(os.path.dirname(__file__), 'weights', '2dfan4.pth'), 42 | config=SimpleNamespace(crop_ratio=0.55, input_size=256, num_modules=4, 43 | hg_num_features=256, hg_depth=4, use_avg_pool=True, 44 | use_instance_norm=False, stem_conv_kernel_size=7, 45 | stem_conv_stride=2, stem_pool_kernel_size=2, 46 | num_landmarks=68)) 47 | elif name == '2dfan2_alt': 48 | return SimpleNamespace(weights=os.path.join(os.path.dirname(__file__), 'weights', '2dfan2_alt.pth'), 49 | config=SimpleNamespace(crop_ratio=0.55, input_size=256, num_modules=2, 50 | hg_num_features=256, hg_depth=4, use_avg_pool=False, 51 | use_instance_norm=False, stem_conv_kernel_size=7, 52 | stem_conv_stride=2, stem_pool_kernel_size=2, 53 | num_landmarks=68)) 54 | else: 55 | raise ValueError('name must be set to either 2dfan2, 2dfan4, or 2dfan2_alt') 56 | 57 | @staticmethod 58 | def create_config(gamma: float = 1.0, radius: float = 0.1, use_jit: bool = True) -> SimpleNamespace: 59 | return SimpleNamespace(gamma=gamma, radius=radius, use_jit=use_jit) 60 | 61 | @torch.no_grad() 62 | def __call__(self, image: np.ndarray, face_boxes: np.ndarray, rgb: bool = True, 63 | return_features: bool = False) -> Union[Tuple[np.ndarray, np.ndarray], 64 | Tuple[np.ndarray, np.ndarray, torch.Tensor]]: 65 | if face_boxes.size > 0: 66 | if not rgb: 67 | image = image[..., ::-1] 68 | if face_boxes.ndim == 1: 69 | face_boxes = face_boxes[np.newaxis, ...] 70 | 71 | # Crop the faces 72 | face_patches = [] 73 | centres = (face_boxes[:, [0, 1]] + face_boxes[:, [2, 3]]) / 2.0 74 | face_sizes = (face_boxes[:, [3, 2]] - face_boxes[:, [1, 0]]).mean(axis=1) 75 | enlarged_face_box_sizes = (face_sizes / self.config.crop_ratio)[:, np.newaxis].repeat(2, axis=1) 76 | enlarged_face_boxes = np.zeros_like(face_boxes[:, :4]) 77 | enlarged_face_boxes[:, :2] = np.round(centres - enlarged_face_box_sizes / 2.0) 78 | enlarged_face_boxes[:, 2:] = np.round(enlarged_face_boxes[:, :2] + enlarged_face_box_sizes) + 1 79 | enlarged_face_boxes = enlarged_face_boxes.astype(int) 80 | outer_bounding_box = np.hstack((enlarged_face_boxes[:, :2].min(axis=0), 81 | enlarged_face_boxes[:, 2:].max(axis=0))) 82 | pad_widths = np.zeros(shape=(3, 2), dtype=int) 83 | if outer_bounding_box[0] < 0: 84 | pad_widths[1][0] = -outer_bounding_box[0] 85 | if outer_bounding_box[1] < 0: 86 | pad_widths[0][0] = -outer_bounding_box[1] 87 | if outer_bounding_box[2] > image.shape[1]: 88 | pad_widths[1][1] = outer_bounding_box[2] - image.shape[1] 89 | if outer_bounding_box[3] > image.shape[0]: 90 | pad_widths[0][1] = outer_bounding_box[3] - image.shape[0] 91 | if np.any(pad_widths > 0): 92 | image = np.pad(image, pad_widths) 93 | for left, top, right, bottom in enlarged_face_boxes: 94 | left += pad_widths[1][0] 95 | top += pad_widths[0][0] 96 | right += pad_widths[1][0] 97 | bottom += pad_widths[0][0] 98 | face_patches.append(cv2.resize(image[top: bottom, left: right, :], 99 | (self.config.input_size, self.config.input_size))) 100 | face_patches = torch.from_numpy(np.array(face_patches).transpose( 101 | (0, 3, 1, 2)).astype(np.float32)).to(self.device) / 255.0 102 | 103 | # Get heatmaps 104 | heatmaps, stem_feats, hg_feats = self.net(face_patches) 105 | # landmarks, landmark_scores = self.net(face_patches) 106 | 107 | # import onnx 108 | # from onnxsim import simplify 109 | # RESOLUTION = [ 110 | # [256,256], 111 | # ] 112 | 113 | # # MODEL = f'2dfan2_alt' 114 | # MODEL = f'2dfan4' 115 | 116 | # for H, W in RESOLUTION: 117 | # onnx_file = f"{MODEL}_1x3x{H}x{W}.onnx" 118 | # x = torch.randn(1, 3, H, W).cuda() 119 | # torch.onnx.export( 120 | # self.net, 121 | # args=(x), 122 | # f=onnx_file, 123 | # opset_version=11, 124 | # input_names = ['input'], 125 | # output_names=['landmarks_xyscore','heatmaps'], 126 | # ) 127 | # model_onnx1 = onnx.load(onnx_file) 128 | # model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1) 129 | # onnx.save(model_onnx1, onnx_file) 130 | 131 | # model_onnx2 = onnx.load(onnx_file) 132 | # model_simp, check = simplify(model_onnx2) 133 | # onnx.save(model_simp, onnx_file) 134 | 135 | # onnx_file = f"{MODEL}_Nx3x{H}x{W}.onnx" 136 | # x = torch.randn(1, 3, H, W).cuda() 137 | # torch.onnx.export( 138 | # self.net, 139 | # args=(x), 140 | # f=onnx_file, 141 | # opset_version=11, 142 | # input_names = ['input'], 143 | # output_names=['landmarks_xyscore','heatmaps'], 144 | # dynamic_axes={ 145 | # # 'input' : {2: 'height', 3: 'width'}, 146 | # 'input' : {0: 'N'}, 147 | # # 'input' : {0: 'N', 2: 'height', 3: 'width'}, 148 | # 'landmarks_xyscore': {0: 'N'}, 149 | # 'heatmaps': {0: 'N'}, 150 | # } 151 | # ) 152 | # model_onnx1 = onnx.load(onnx_file) 153 | # model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1) 154 | # onnx.save(model_onnx1, onnx_file) 155 | 156 | # import sys 157 | # sys.exit(0) 158 | 159 | 160 | 161 | # Get landmark coordinates and scores 162 | landmarks, landmark_scores = self._decode(heatmaps) 163 | 164 | # Rectify landmark coordinates 165 | hh, hw = heatmaps.size(2), heatmaps.size(3) 166 | for landmark, (left, top, right, bottom) in zip(landmarks, enlarged_face_boxes): 167 | landmark[:, 0] = landmark[:, 0] * (right - left) / hw + left 168 | landmark[:, 1] = landmark[:, 1] * (bottom - top) / hh + top 169 | 170 | if return_features: 171 | return landmarks, landmark_scores, torch.cat((stem_feats, torch.cat(hg_feats, dim=1) * torch.sum(heatmaps, dim=1, keepdim=True)), dim=1) 172 | else: 173 | return landmarks, landmark_scores 174 | else: 175 | landmarks = np.empty(shape=(0, 68, 2), dtype=np.float32) 176 | landmark_scores = np.empty(shape=(0, 68), dtype=np.float32) 177 | if return_features: 178 | return landmarks, landmark_scores, torch.Tensor([]) 179 | else: 180 | return landmarks, landmark_scores 181 | 182 | 183 | 184 | def _decode(self, heatmaps: torch.Tensor) -> Tuple[np.ndarray, np.ndarray]: 185 | heatmaps = heatmaps.contiguous() 186 | scores = heatmaps.max(dim=3)[0].max(dim=2)[0] 187 | 188 | if (self.config.radius ** 2 * heatmaps.shape[2] * heatmaps.shape[3] < 189 | heatmaps.shape[2] ** 2 + heatmaps.shape[3] ** 2): 190 | # Find peaks in all heatmaps 191 | m = heatmaps.view(heatmaps.shape[0] * heatmaps.shape[1], -1).argmax(1) 192 | all_peaks = torch.cat( 193 | [(m / heatmaps.shape[3]).trunc().view(-1, 1), (m % heatmaps.shape[3]).view(-1, 1)], dim=1 194 | ).reshape((heatmaps.shape[0], heatmaps.shape[1], 1, 1, 2)).repeat( 195 | 1, 1, heatmaps.shape[2], heatmaps.shape[3], 1).float() 196 | 197 | # Apply masks created from the peaks 198 | all_indices = torch.zeros_like(all_peaks) + torch.stack( 199 | [torch.arange(0.0, all_peaks.shape[2], 200 | device=all_peaks.device).unsqueeze(-1).repeat(1, all_peaks.shape[3]), 201 | torch.arange(0.0, all_peaks.shape[3], 202 | device=all_peaks.device).unsqueeze(0).repeat(all_peaks.shape[2], 1)], dim=-1) 203 | heatmaps = heatmaps * ((all_indices - all_peaks).norm(dim=-1) <= self.config.radius * 204 | (heatmaps.shape[2] * heatmaps.shape[3]) ** 0.5).float() 205 | 206 | # Prepare the indices for calculating centroids 207 | x_indices = (torch.zeros((*heatmaps.shape[:2], heatmaps.shape[3]), device=heatmaps.device) + 208 | torch.arange(0.5, heatmaps.shape[3], device=heatmaps.device)) 209 | y_indices = (torch.zeros(heatmaps.shape[:3], device=heatmaps.device) + 210 | torch.arange(0.5, heatmaps.shape[2], device=heatmaps.device)) 211 | 212 | # Finally, find centroids as landmark locations 213 | heatmaps = heatmaps.clamp_min(0.0) 214 | if self.config.gamma != 1.0: 215 | heatmaps = heatmaps.pow(self.config.gamma) 216 | m00s = heatmaps.sum(dim=(2, 3)).clamp_min(torch.finfo(heatmaps.dtype).eps) 217 | xs = heatmaps.sum(dim=2).mul(x_indices).sum(dim=2).div(m00s) 218 | ys = heatmaps.sum(dim=3).mul(y_indices).sum(dim=2).div(m00s) 219 | 220 | lm_info = torch.stack((xs, ys, scores), dim=-1).cpu().numpy() 221 | return lm_info[..., :-1], lm_info[..., -1] 222 | -------------------------------------------------------------------------------- /ibug/face_detection/retina_face/box_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def point_form(boxes): 6 | """ Convert prior_boxes to (xmin, ymin, xmax, ymax) 7 | representation for comparison to point form ground truth data. 8 | Args: 9 | boxes: (tensor) center-size default boxes from priorbox layers. 10 | Return: 11 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 12 | """ 13 | return torch.cat( 14 | ( 15 | boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin 16 | boxes[:, :2] + boxes[:, 2:]/2, # xmax, ymax 17 | ), 18 | 1, 19 | ) 20 | 21 | 22 | def center_size(boxes): 23 | """ Convert prior_boxes to (cx, cy, w, h) 24 | representation for comparison to center-size form ground truth data. 25 | Args: 26 | boxes: (tensor) point_form boxes 27 | Return: 28 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 29 | """ 30 | return torch.cat( 31 | ( 32 | boxes[:, 2:] + boxes[:, :2])/2, # cx, cy 33 | boxes[:, 2:] - boxes[:, :2], # w, h 34 | 1, 35 | ) 36 | 37 | 38 | def intersect(box_a, box_b): 39 | """ We resize both tensors to [A,B,2] without new malloc: 40 | [A,2] -> [A,1,2] -> [A,B,2] 41 | [B,2] -> [1,B,2] -> [A,B,2] 42 | Then we compute the area of intersect between box_a and box_b. 43 | Args: 44 | box_a: (tensor) bounding boxes, Shape: [A,4]. 45 | box_b: (tensor) bounding boxes, Shape: [B,4]. 46 | Return: 47 | (tensor) intersection area, Shape: [A,B]. 48 | """ 49 | A = box_a.size(0) 50 | B = box_b.size(0) 51 | max_xy = torch.min( 52 | box_a[:, 2:].unsqueeze(1).expand(A, B, 2), 53 | box_b[:, 2:].unsqueeze(0).expand(A, B, 2) 54 | ) 55 | min_xy = torch.max( 56 | box_a[:, :2].unsqueeze(1).expand(A, B, 2), 57 | box_b[:, :2].unsqueeze(0).expand(A, B, 2) 58 | ) 59 | inter = torch.clamp((max_xy - min_xy), min=0) 60 | return inter[:, :, 0] * inter[:, :, 1] 61 | 62 | 63 | def jaccard(box_a, box_b): 64 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap 65 | is simply the intersection over union of two boxes. Here we operate on 66 | ground truth boxes and default boxes. 67 | E.g.: 68 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) 69 | Args: 70 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] 71 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] 72 | Return: 73 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] 74 | """ 75 | inter = intersect(box_a, box_b) 76 | area_a = ( 77 | (box_a[:, 2]-box_a[:, 0]) * 78 | (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] 79 | area_b = ( 80 | (box_b[:, 2]-box_b[:, 0]) * 81 | (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] 82 | union = area_a + area_b - inter 83 | return inter / union # [A,B] 84 | 85 | 86 | def matrix_iou(a, b): 87 | """ 88 | return iou of a and b, numpy version for data augenmentation 89 | """ 90 | lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) 91 | rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) 92 | 93 | area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) 94 | area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) 95 | area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) 96 | return area_i / (area_a[:, np.newaxis] + area_b - area_i) 97 | 98 | 99 | def matrix_iof(a, b): 100 | """ 101 | return iof of a and b, numpy version for data augenmentation 102 | """ 103 | lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) 104 | rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) 105 | 106 | area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) 107 | area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) 108 | return area_i / np.maximum(area_a[:, np.newaxis], 1) 109 | 110 | 111 | def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx): 112 | """Match each prior box with the ground truth box of the highest jaccard 113 | overlap, encode the bounding boxes, then return the matched indices 114 | corresponding to both confidence and location preds. 115 | Args: 116 | threshold: (float) The overlap threshold used when mathing boxes. 117 | truths: (tensor) Ground truth boxes, Shape: [num_obj, 4]. 118 | priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. 119 | variances: (tensor) Variances corresponding to each prior coord, 120 | Shape: [num_priors, 4]. 121 | labels: (tensor) All the class labels for the image, Shape: [num_obj]. 122 | landms: (tensor) Ground truth landms, Shape [num_obj, 10]. 123 | loc_t: (tensor) Tensor to be filled w/ endcoded location targets. 124 | conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. 125 | landm_t: (tensor) Tensor to be filled w/ endcoded landm targets. 126 | idx: (int) current batch index 127 | Return: 128 | The matched indices corresponding to 1)location 2)confidence 3)landm preds. 129 | """ 130 | # jaccard index 131 | overlaps = jaccard( 132 | truths, 133 | point_form(priors) 134 | ) 135 | # (Bipartite Matching) 136 | # [1,num_objects] best prior for each ground truth 137 | best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) 138 | 139 | # ignore hard gt 140 | valid_gt_idx = best_prior_overlap[:, 0] >= 0.2 141 | best_prior_idx_filter = best_prior_idx[valid_gt_idx, :] 142 | if best_prior_idx_filter.shape[0] <= 0: 143 | loc_t[idx] = 0 144 | conf_t[idx] = 0 145 | return 146 | 147 | # [1,num_priors] best ground truth for each prior 148 | best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True) 149 | best_truth_idx.squeeze_(0) 150 | best_truth_overlap.squeeze_(0) 151 | best_prior_idx.squeeze_(1) 152 | best_prior_idx_filter.squeeze_(1) 153 | best_prior_overlap.squeeze_(1) 154 | best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) # ensure best prior 155 | # TODO refactor: index best_prior_idx with long tensor 156 | # ensure every gt matches with its prior of max overlap 157 | for j in range(best_prior_idx.size(0)): # 判别此anchor是预测哪一个boxes 158 | best_truth_idx[best_prior_idx[j]] = j 159 | matches = truths[best_truth_idx] # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来 160 | conf = labels[best_truth_idx] # Shape: [num_priors] 此处为每一个anchor对应的label取出来 161 | conf[best_truth_overlap < threshold] = 0 # label as background overlap<0.35的全部作为负样本 162 | loc = encode(matches, priors, variances) 163 | 164 | matches_landm = landms[best_truth_idx] 165 | landm = encode_landm(matches_landm, priors, variances) 166 | loc_t[idx] = loc # [num_priors,4] encoded offsets to learn 167 | conf_t[idx] = conf # [num_priors] top class label for each prior 168 | landm_t[idx] = landm 169 | 170 | 171 | def encode(matched, priors, variances): 172 | """Encode the variances from the priorbox layers into the ground truth boxes 173 | we have matched (based on jaccard overlap) with the prior boxes. 174 | Args: 175 | matched: (tensor) Coords of ground truth for each prior in point-form 176 | Shape: [num_priors, 4]. 177 | priors: (tensor) Prior boxes in center-offset form 178 | Shape: [num_priors,4]. 179 | variances: (list[float]) Variances of priorboxes 180 | Return: 181 | encoded boxes (tensor), Shape: [num_priors, 4] 182 | """ 183 | 184 | # dist b/t match center and prior's center 185 | g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] 186 | # encode variance 187 | g_cxcy /= (variances[0] * priors[:, 2:]) 188 | # match wh / prior wh 189 | g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] 190 | g_wh = torch.log(g_wh) / variances[1] 191 | # return target for smooth_l1_loss 192 | return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] 193 | 194 | 195 | def encode_landm(matched, priors, variances): 196 | """Encode the variances from the priorbox layers into the ground truth boxes 197 | we have matched (based on jaccard overlap) with the prior boxes. 198 | Args: 199 | matched: (tensor) Coords of ground truth for each prior in point-form 200 | Shape: [num_priors, 10]. 201 | priors: (tensor) Prior boxes in center-offset form 202 | Shape: [num_priors,4]. 203 | variances: (list[float]) Variances of priorboxes 204 | Return: 205 | encoded landm (tensor), Shape: [num_priors, 10] 206 | """ 207 | 208 | # dist b/t match center and prior's center 209 | matched = torch.reshape(matched, (matched.size(0), 5, 2)) 210 | priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) 211 | priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) 212 | priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) 213 | priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2) 214 | priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2) 215 | g_cxcy = matched[:, :, :2] - priors[:, :, :2] 216 | # encode variance 217 | g_cxcy /= (variances[0] * priors[:, :, 2:]) 218 | # g_cxcy /= priors[:, :, 2:] 219 | g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1) 220 | # return target for smooth_l1_loss 221 | return g_cxcy 222 | 223 | 224 | # Adapted from https://github.com/Hakuyume/chainer-ssd 225 | def decode(loc, priors, variances): 226 | """Decode locations from predictions using priors to undo 227 | the encoding we did for offset regression at train time. 228 | Args: 229 | loc (tensor): location predictions for loc layers, 230 | Shape: [num_priors,4] 231 | priors (tensor): Prior boxes in center-offset form. 232 | Shape: [num_priors,4]. 233 | variances: (list[float]) Variances of priorboxes 234 | Return: 235 | decoded bounding box predictions 236 | """ 237 | 238 | boxes = torch.cat(( 239 | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], 240 | priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) 241 | boxes[:, :2] -= boxes[:, 2:] / 2 242 | boxes[:, 2:] += boxes[:, :2] 243 | return boxes 244 | 245 | 246 | def decode_landm(pre, priors, variances): 247 | """Decode landm from predictions using priors to undo 248 | the encoding we did for offset regression at train time. 249 | Args: 250 | pre (tensor): landm predictions for loc layers, 251 | Shape: [num_priors,10] 252 | priors (tensor): Prior boxes in center-offset form. 253 | Shape: [num_priors,4]. 254 | variances: (list[float]) Variances of priorboxes 255 | Return: 256 | decoded landm predictions 257 | """ 258 | landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:], 259 | priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:], 260 | priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:], 261 | priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:], 262 | priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:], 263 | ), dim=1) 264 | return landms 265 | 266 | 267 | def log_sum_exp(x): 268 | """Utility function for computing log_sum_exp while determining 269 | This will be used to determine unaveraged confidence loss across 270 | all examples in a batch. 271 | Args: 272 | x (Variable(tensor)): conf_preds from conf layers 273 | """ 274 | x_max = x.data.max() 275 | return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max 276 | 277 | 278 | # Original author: Francisco Massa: 279 | # https://github.com/fmassa/object-detection.torch 280 | # Ported to PyTorch by Max deGroot (02/01/2017) 281 | def nms(boxes, scores, overlap=0.5, top_k=200): 282 | """Apply non-maximum suppression at test time to avoid detecting too many 283 | overlapping bounding boxes for a given object. 284 | Args: 285 | boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. 286 | scores: (tensor) The class predscores for the img, Shape:[num_priors]. 287 | overlap: (float) The overlap thresh for suppressing unnecessary boxes. 288 | top_k: (int) The Maximum number of box preds to consider. 289 | Return: 290 | The indices of the kept boxes with respect to num_priors. 291 | """ 292 | 293 | keep = torch.Tensor(scores.size(0)).fill_(0).long() 294 | if boxes.numel() == 0: 295 | return keep 296 | x1 = boxes[:, 0] 297 | y1 = boxes[:, 1] 298 | x2 = boxes[:, 2] 299 | y2 = boxes[:, 3] 300 | area = torch.mul(x2 - x1, y2 - y1) 301 | v, idx = scores.sort(0) # sort in ascending order 302 | # I = I[v >= 0.01] 303 | idx = idx[-top_k:] # indices of the top-k largest vals 304 | xx1 = boxes.new() 305 | yy1 = boxes.new() 306 | xx2 = boxes.new() 307 | yy2 = boxes.new() 308 | w = boxes.new() 309 | h = boxes.new() 310 | 311 | # keep = torch.Tensor() 312 | count = 0 313 | while idx.numel() > 0: 314 | i = idx[-1] # index of current largest val 315 | # keep.append(i) 316 | keep[count] = i 317 | count += 1 318 | if idx.size(0) == 1: 319 | break 320 | idx = idx[:-1] # remove kept element from view 321 | # load bboxes of next highest vals 322 | torch.index_select(x1, 0, idx, out=xx1) 323 | torch.index_select(y1, 0, idx, out=yy1) 324 | torch.index_select(x2, 0, idx, out=xx2) 325 | torch.index_select(y2, 0, idx, out=yy2) 326 | # store element-wise max with next highest score 327 | xx1 = torch.clamp(xx1, min=x1[i]) 328 | yy1 = torch.clamp(yy1, min=y1[i]) 329 | xx2 = torch.clamp(xx2, max=x2[i]) 330 | yy2 = torch.clamp(yy2, max=y2[i]) 331 | w.resize_as_(xx2) 332 | h.resize_as_(yy2) 333 | w = xx2 - xx1 334 | h = yy2 - yy1 335 | # check sizes of xx1 and xx2.. after each iteration 336 | w = torch.clamp(w, min=0.0) 337 | h = torch.clamp(h, min=0.0) 338 | inter = w*h 339 | # IoU = i / (area(a) + area(b) - i) 340 | rem_areas = torch.index_select(area, 0, idx) # load remaining areas) 341 | union = (rem_areas - inter) + area[i] 342 | IoU = inter/union # store result in iou 343 | # keep only elements with an IoU <= overlap 344 | idx = idx[IoU.le(overlap)] 345 | return keep, count 346 | 347 | 348 | -------------------------------------------------------------------------------- /11_create_masked_face_dataset_yolo_test_yolov4_filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import cv2 5 | import glob 6 | import copy 7 | import numpy as np 8 | import onnxruntime 9 | from tqdm import tqdm 10 | from natsort import natsorted 11 | from argparse import ArgumentParser 12 | from typing import Tuple, Optional, List 13 | 14 | 15 | class YOLOv4ONNX(object): 16 | def __init__( 17 | self, 18 | model_path: Optional[str] = 'yolov4_headdetection_480x640_post.onnx', 19 | input_shape: Optional[Tuple[int,int]] = (480, 640), 20 | class_score_th: Optional[float] = 0.20, 21 | providers: Optional[List] = [ 22 | ( 23 | 'TensorrtExecutionProvider', { 24 | 'trt_engine_cache_enable': True, 25 | 'trt_engine_cache_path': '.', 26 | 'trt_fp16_enable': True, 27 | } 28 | ), 29 | 'CUDAExecutionProvider', 30 | 'CPUExecutionProvider', 31 | ], 32 | ): 33 | """YOLOv4ONNX 34 | 35 | Parameters 36 | ---------- 37 | model_path: Optional[str] 38 | ONNX file path for YOLOv4 39 | 40 | input_shape: Optional[Tuple[int,int]] 41 | Model Input Resolution, Default: (480,640) 42 | 43 | class_score_th: Optional[float] 44 | 45 | class_score_th: Optional[float] 46 | Score threshold. Default: 0.20 47 | 48 | providers: Optional[List] 49 | Name of onnx execution providers 50 | Default: 51 | [ 52 | 'TensorrtExecutionProvider', 53 | 'CUDAExecutionProvider', 54 | 'CPUExecutionProvider', 55 | ] 56 | """ 57 | # Input size 58 | self.input_shape = input_shape 59 | 60 | # Threshold 61 | self.class_score_th = class_score_th 62 | 63 | # Model loading 64 | session_option = onnxruntime.SessionOptions() 65 | session_option.log_severity_level = 3 66 | self.onnx_session = onnxruntime.InferenceSession( 67 | model_path, 68 | sess_options=session_option, 69 | providers=providers, 70 | ) 71 | self.providers = self.onnx_session.get_providers() 72 | 73 | self.input_name = self.onnx_session.get_inputs()[0].name 74 | self.output_name = self.onnx_session.get_outputs()[0].name 75 | 76 | 77 | def __call__( 78 | self, 79 | image: np.ndarray, 80 | ) -> Tuple[np.ndarray, np.ndarray]: 81 | """YOLOV4ONNX 82 | 83 | Parameters 84 | ---------- 85 | image: np.ndarray 86 | Entire image 87 | 88 | Returns 89 | ------- 90 | faceboxes: np.ndarray 91 | Predicted face boxes: [facecount, x1, y1, x2, y2] 92 | 93 | facescores: np.ndarray 94 | Predicted face box confs: [facecount, conf] 95 | """ 96 | temp_image = copy.deepcopy(image) 97 | 98 | # PreProcess 99 | resized_image = self.__preprocess( 100 | temp_image, 101 | ) 102 | 103 | # Inference 104 | inferece_image = np.asarray([resized_image], dtype=np.float32) 105 | boxes = self.onnx_session.run( 106 | None, 107 | {self.input_name: inferece_image}, 108 | )[0] 109 | 110 | # PostProcess 111 | faceboxes, facescores = self.__postprocess( 112 | image= temp_image, 113 | boxes=boxes, 114 | ) 115 | 116 | return faceboxes, facescores 117 | 118 | 119 | def __preprocess( 120 | self, 121 | image: np.ndarray, 122 | swap: Optional[Tuple[int,int,int]] = (2, 0, 1), 123 | ) -> np.ndarray: 124 | """__preprocess 125 | 126 | Parameters 127 | ---------- 128 | image: np.ndarray 129 | Entire image 130 | 131 | swap: tuple 132 | HWC to CHW: (2,0,1) 133 | CHW to HWC: (1,2,0) 134 | HWC to HWC: (0,1,2) 135 | CHW to CHW: (0,1,2) 136 | 137 | Returns 138 | ------- 139 | resized_image: np.ndarray 140 | Resized and normalized image. 141 | """ 142 | # Normalization + BGR->RGB 143 | resized_image = cv2.resize( 144 | image, 145 | ( 146 | int(self.input_shape[1]), # type: ignore 147 | int(self.input_shape[0]), # type: ignore 148 | ) 149 | ) 150 | resized_image = np.divide(resized_image, 255.0) # type: ignore 151 | resized_image = resized_image.transpose(swap) 152 | resized_image = np.ascontiguousarray(resized_image, dtype=np.float32) 153 | return resized_image 154 | 155 | 156 | def __postprocess( 157 | self, 158 | image: np.ndarray, 159 | boxes: np.ndarray, 160 | ) -> Tuple[np.ndarray, np.ndarray]: 161 | """__postprocess 162 | 163 | Parameters 164 | ---------- 165 | image: np.ndarray 166 | Entire image. 167 | 168 | boxes: np.ndarray 169 | (boxcount, 5) = (boxcount, x1y1x2y2score) 170 | 171 | Returns 172 | ------- 173 | faceboxes: np.ndarray 174 | Predicted face boxes: [facecount, x1, y1, x2, y2] 175 | 176 | facescores: np.ndarray 177 | Predicted face box confs: [facecount, score] 178 | """ 179 | image_height = image.shape[0] 180 | image_width = image.shape[1] 181 | 182 | scores = boxes[:,4] 183 | keep_idxs = scores > self.class_score_th 184 | boxes_keep = boxes[keep_idxs, :] 185 | 186 | faceboxes = [] 187 | facescores = [] 188 | 189 | if len(boxes_keep) > 0: 190 | boxes_keep[:, 0] = boxes_keep[:, 0] * image_width 191 | boxes_keep[:, 1] = boxes_keep[:, 1] * image_height 192 | boxes_keep[:, 2] = boxes_keep[:, 2] * image_width 193 | boxes_keep[:, 3] = boxes_keep[:, 3] * image_height 194 | 195 | for box in boxes_keep: 196 | x_min = int(box[0]) if int(box[0]) > 0 else 0 197 | y_min = int(box[1]) if int(box[1]) > 0 else 0 198 | x_max = int(box[2]) if int(box[2]) < image_width else image_width 199 | y_max = int(box[3]) if int(box[3]) < image_height else image_height 200 | score = box[4] 201 | 202 | faceboxes.append( 203 | [ 204 | x_min, 205 | y_min, 206 | x_max, 207 | y_max, 208 | ] 209 | ) 210 | facescores.append( 211 | [ 212 | score 213 | ] 214 | ) 215 | 216 | return np.asarray(faceboxes), np.asarray(facescores) # type: ignore 217 | 218 | 219 | class YOLOv7ONNX(object): 220 | def __init__( 221 | self, 222 | model_path: Optional[str] = 'yolov7_tiny_head_0.752_post_480x640.onnx', 223 | class_score_th: Optional[float] = 0.30, 224 | providers: Optional[List] = [ 225 | # ( 226 | # 'TensorrtExecutionProvider', { 227 | # 'trt_engine_cache_enable': True, 228 | # 'trt_engine_cache_path': '.', 229 | # 'trt_fp16_enable': True, 230 | # } 231 | # ), 232 | 'CUDAExecutionProvider', 233 | 'CPUExecutionProvider', 234 | ], 235 | ): 236 | """YOLOv7ONNX 237 | Parameters 238 | ---------- 239 | model_path: Optional[str] 240 | ONNX file path for YOLOv7 241 | class_score_th: Optional[float] 242 | class_score_th: Optional[float] 243 | Score threshold. Default: 0.30 244 | providers: Optional[List] 245 | Name of onnx execution providers 246 | Default: 247 | [ 248 | ( 249 | 'TensorrtExecutionProvider', { 250 | 'trt_engine_cache_enable': True, 251 | 'trt_engine_cache_path': '.', 252 | 'trt_fp16_enable': True, 253 | } 254 | ), 255 | 'CUDAExecutionProvider', 256 | 'CPUExecutionProvider', 257 | ] 258 | """ 259 | # Threshold 260 | self.class_score_th = class_score_th 261 | 262 | # Model loading 263 | session_option = onnxruntime.SessionOptions() 264 | session_option.log_severity_level = 3 265 | self.onnx_session = onnxruntime.InferenceSession( 266 | model_path, 267 | sess_options=session_option, 268 | providers=providers, 269 | ) 270 | self.providers = self.onnx_session.get_providers() 271 | 272 | self.input_shapes = [ 273 | input.shape for input in self.onnx_session.get_inputs() 274 | ] 275 | self.input_names = [ 276 | input.name for input in self.onnx_session.get_inputs() 277 | ] 278 | self.output_names = [ 279 | output.name for output in self.onnx_session.get_outputs() 280 | ] 281 | 282 | 283 | def __call__( 284 | self, 285 | image: np.ndarray, 286 | ) -> Tuple[np.ndarray, np.ndarray]: 287 | """YOLOv7ONNX 288 | Parameters 289 | ---------- 290 | image: np.ndarray 291 | Entire image 292 | Returns 293 | ------- 294 | face_boxes: np.ndarray 295 | Predicted face boxes: [facecount, y1, x1, y2, x2] 296 | face_scores: np.ndarray 297 | Predicted face box scores: [facecount, score] 298 | """ 299 | temp_image = copy.deepcopy(image) 300 | 301 | # PreProcess 302 | resized_image = self.__preprocess( 303 | temp_image, 304 | ) 305 | 306 | # Inference 307 | inferece_image = np.asarray([resized_image], dtype=np.float32) 308 | scores, boxes = self.onnx_session.run( 309 | self.output_names, 310 | {input_name: inferece_image for input_name in self.input_names}, 311 | ) 312 | 313 | # PostProcess 314 | face_boxes, face_scores = self.__postprocess( 315 | image=temp_image, 316 | scores=scores, 317 | boxes=boxes, 318 | ) 319 | 320 | return face_boxes, face_scores 321 | 322 | 323 | def __preprocess( 324 | self, 325 | image: np.ndarray, 326 | swap: Optional[Tuple[int,int,int]] = (2, 0, 1), 327 | ) -> np.ndarray: 328 | """__preprocess 329 | Parameters 330 | ---------- 331 | image: np.ndarray 332 | Entire image 333 | swap: tuple 334 | HWC to CHW: (2,0,1) 335 | CHW to HWC: (1,2,0) 336 | HWC to HWC: (0,1,2) 337 | CHW to CHW: (0,1,2) 338 | Returns 339 | ------- 340 | resized_image: np.ndarray 341 | Resized and normalized image. 342 | """ 343 | # Normalization + BGR->RGB 344 | resized_image = cv2.resize( 345 | image, 346 | ( 347 | int(self.input_shapes[0][3]), 348 | int(self.input_shapes[0][2]), 349 | ) 350 | ) 351 | resized_image = np.divide(resized_image, 255.0) 352 | resized_image = resized_image[..., ::-1] 353 | resized_image = resized_image.transpose(swap) 354 | resized_image = np.ascontiguousarray( 355 | resized_image, 356 | dtype=np.float32, 357 | ) 358 | return resized_image 359 | 360 | 361 | def __postprocess( 362 | self, 363 | image: np.ndarray, 364 | scores: np.ndarray, 365 | boxes: np.ndarray, 366 | ) -> Tuple[np.ndarray, np.ndarray]: 367 | """__postprocess 368 | Parameters 369 | ---------- 370 | image: np.ndarray 371 | Entire image. 372 | scores: np.ndarray 373 | float32[N, 1] 374 | boxes: np.ndarray 375 | int64[N, 6] 376 | Returns 377 | ------- 378 | faceboxes: np.ndarray 379 | Predicted face boxes: [facecount, y1, x1, y2, x2] 380 | facescores: np.ndarray 381 | Predicted face box confs: [facecount, score] 382 | """ 383 | image_height = image.shape[0] 384 | image_width = image.shape[1] 385 | 386 | """ 387 | Head Detector is 388 | N -> Number of boxes detected 389 | batchno -> always 0: BatchNo.0 390 | classid -> always 0: "Head" 391 | scores: float32[N,1], 392 | batchno_classid_y1x1y2x2: int64[N,6], 393 | """ 394 | scores = scores 395 | keep_idxs = scores[:, 0] > self.class_score_th 396 | scores_keep = scores[keep_idxs, :] 397 | boxes_keep = boxes[keep_idxs, :] 398 | faceboxes = [] 399 | facescores = [] 400 | 401 | if len(boxes_keep) > 0: 402 | for box, score in zip(boxes_keep, scores_keep): 403 | x_min = max(int(box[3]), 0) 404 | y_min = max(int(box[2]), 0) 405 | x_max = min(int(box[5]), image_width) 406 | y_max = min(int(box[4]), image_height) 407 | 408 | faceboxes.append( 409 | [x_min, y_min, x_max, y_max] 410 | ) 411 | facescores.append( 412 | score 413 | ) 414 | 415 | return np.asarray(faceboxes), np.asarray(facescores) 416 | 417 | 418 | def main(): 419 | parser = ArgumentParser() 420 | parser.add_argument( 421 | '-y', 422 | '--yolo_mode', 423 | type=str, 424 | default='yolov4', 425 | choices=['yolov4', 'yolov7'] 426 | ) 427 | parser.add_argument( 428 | '-i', 429 | '--image_folder_path', 430 | type=str, 431 | default='300W_LP_onlyone_person', 432 | ) 433 | args = parser.parse_args() 434 | 435 | yolo_mode = args.yolo_mode 436 | 437 | model = None 438 | if yolo_mode == 'yolov4': 439 | model = YOLOv4ONNX( 440 | model_path='yolov4_headdetection_480x640_post.onnx', 441 | class_score_th=0.80, 442 | ) 443 | elif yolo_mode == 'yolov7': 444 | model = YOLOv7ONNX( 445 | model_path='yolov7_tiny_head_0.752_post_480x640.onnx', 446 | # class_score_th=0.90, 447 | ) 448 | 449 | image_files = glob.glob(f"{args.image_folder_path}/*/*.jpg") 450 | 451 | image_count = 0 452 | for image_file in tqdm(natsorted(image_files), dynamic_ncols=True): 453 | 454 | dirname = os.path.dirname(image_file) 455 | # print(f'@@@ dirname: {dirname} split: {dirname.split("/")}') 456 | new_dirname = f'{args.image_folder_path}_yolov4_filterd/{dirname.split("/")[1]}' 457 | os.makedirs(new_dirname, exist_ok=True) 458 | 459 | image = cv2.imread(image_file) 460 | 461 | debug_image = copy.deepcopy(image) 462 | face_boxes, face_scores = model(debug_image) 463 | 464 | if len(face_boxes) == 1: 465 | 466 | # for face_box, face_score in zip(face_boxes, face_scores): 467 | 468 | # x_min = int(face_box[0]) 469 | # y_min = int(face_box[1]) 470 | # x_max = int(face_box[2]) 471 | # y_max = int(face_box[3]) 472 | 473 | # # add margin 474 | # y_min = int(max(0, y_min - abs(y_min - y_max) / 17)) 475 | # y_max = int(min(image.shape[0], y_max + abs(y_min - y_max) / 17)) 476 | # x_min = int(max(0, x_min - abs(x_min - x_max) / 7)) 477 | # x_max = min(image.shape[1], x_max + abs(x_min - x_max) / 7) 478 | # x_max = int(min(x_max, image.shape[1])) 479 | 480 | # cv2.rectangle( 481 | # debug_image, 482 | # (x_min, y_min), 483 | # (x_max, y_max), 484 | # (255,255,255), 485 | # 2, 486 | # ) 487 | # cv2.rectangle( 488 | # debug_image, 489 | # (x_min, y_min), 490 | # (x_max, y_max), 491 | # (0,255,0), 492 | # 1, 493 | # ) 494 | # cv2.putText( 495 | # debug_image, 496 | # f'{face_score[0]:.2f}', 497 | # ( 498 | # x_min, 499 | # y_min-10 if y_min-10 > 0 else 20 500 | # ), 501 | # cv2.FONT_HERSHEY_SIMPLEX, 502 | # 0.7, 503 | # (255, 255, 255), 504 | # 2, 505 | # cv2.LINE_AA, 506 | # ) 507 | # cv2.putText( 508 | # debug_image, 509 | # f'{face_score[0]:.2f}', 510 | # ( 511 | # x_min, 512 | # y_min-10 if y_min-10 > 0 else 20 513 | # ), 514 | # cv2.FONT_HERSHEY_SIMPLEX, 515 | # 0.7, 516 | # (0, 255, 0), 517 | # 1, 518 | # cv2.LINE_AA, 519 | # ) 520 | 521 | # cv2.imshow("test", debug_image) 522 | 523 | # key = cv2.waitKey(0) 524 | # if key == 27: # ESC 525 | # break 526 | 527 | basename = os.path.basename(image_file) 528 | cv2.imwrite(f'{new_dirname}/{basename}', image) 529 | image_count += 1 530 | 531 | print(f'image_count: {image_count}') 532 | 533 | if __name__ == "__main__": 534 | main() -------------------------------------------------------------------------------- /12_create_masked_face_dataset_yolo_test_yolov4_annotation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import cv2 5 | import glob 6 | import copy 7 | import json 8 | import numpy as np 9 | import onnxruntime 10 | from tqdm import tqdm 11 | from pathlib import Path 12 | from natsort import natsorted 13 | from argparse import ArgumentParser 14 | from typing import Tuple, Optional, List 15 | from sklearn.model_selection import train_test_split 16 | 17 | # input image width/height of the yolov4 model, set by command-line argument 18 | INPUT_WIDTH = 0 19 | INPUT_HEIGHT = 0 20 | 21 | # Minimum width/height of objects for detection (don't learn from objects smaller than these) 22 | MIN_W = 5 23 | MIN_H = 5 24 | 25 | # Do K-Means clustering in order to determine "anchor" sizes 26 | DO_KMEANS = True 27 | KMEANS_CLUSTERS = 9 28 | BBOX_WHS = [] # keep track of bbox width/height with respect to 640x640 29 | 30 | 31 | class YOLOv4ONNX(object): 32 | def __init__( 33 | self, 34 | model_path: Optional[str] = 'yolov4_headdetection_480x640_post.onnx', 35 | input_shape: Optional[Tuple[int,int]] = (480, 640), 36 | class_score_th: Optional[float] = 0.20, 37 | providers: Optional[List] = [ 38 | ( 39 | 'TensorrtExecutionProvider', { 40 | 'trt_engine_cache_enable': True, 41 | 'trt_engine_cache_path': '.', 42 | 'trt_fp16_enable': True, 43 | } 44 | ), 45 | 'CUDAExecutionProvider', 46 | 'CPUExecutionProvider', 47 | ], 48 | ): 49 | """YOLOv4ONNX 50 | 51 | Parameters 52 | ---------- 53 | model_path: Optional[str] 54 | ONNX file path for YOLOv4 55 | 56 | input_shape: Optional[Tuple[int,int]] 57 | Model Input Resolution, Default: (480,640) 58 | 59 | class_score_th: Optional[float] 60 | 61 | class_score_th: Optional[float] 62 | Score threshold. Default: 0.20 63 | 64 | providers: Optional[List] 65 | Name of onnx execution providers 66 | Default: 67 | [ 68 | 'TensorrtExecutionProvider', 69 | 'CUDAExecutionProvider', 70 | 'CPUExecutionProvider', 71 | ] 72 | """ 73 | # Input size 74 | self.input_shape = input_shape 75 | 76 | # Threshold 77 | self.class_score_th = class_score_th 78 | 79 | # Model loading 80 | session_option = onnxruntime.SessionOptions() 81 | session_option.log_severity_level = 3 82 | self.onnx_session = onnxruntime.InferenceSession( 83 | model_path, 84 | sess_options=session_option, 85 | providers=providers, 86 | ) 87 | self.providers = self.onnx_session.get_providers() 88 | 89 | self.input_name = self.onnx_session.get_inputs()[0].name 90 | self.output_name = self.onnx_session.get_outputs()[0].name 91 | 92 | 93 | def __call__( 94 | self, 95 | image: np.ndarray, 96 | ) -> Tuple[np.ndarray, np.ndarray]: 97 | """YOLOV4ONNX 98 | 99 | Parameters 100 | ---------- 101 | image: np.ndarray 102 | Entire image 103 | 104 | Returns 105 | ------- 106 | faceboxes: np.ndarray 107 | Predicted face boxes: [facecount, x1, y1, x2, y2] 108 | 109 | facescores: np.ndarray 110 | Predicted face box confs: [facecount, conf] 111 | """ 112 | temp_image = copy.deepcopy(image) 113 | 114 | # PreProcess 115 | resized_image = self.__preprocess( 116 | temp_image, 117 | ) 118 | 119 | # Inference 120 | inferece_image = np.asarray([resized_image], dtype=np.float32) 121 | boxes = self.onnx_session.run( 122 | None, 123 | {self.input_name: inferece_image}, 124 | )[0] 125 | 126 | # PostProcess 127 | faceboxes, facescores = self.__postprocess( 128 | image= temp_image, 129 | boxes=boxes, 130 | ) 131 | 132 | return faceboxes, facescores 133 | 134 | 135 | def __preprocess( 136 | self, 137 | image: np.ndarray, 138 | swap: Optional[Tuple[int,int,int]] = (2, 0, 1), 139 | ) -> np.ndarray: 140 | """__preprocess 141 | 142 | Parameters 143 | ---------- 144 | image: np.ndarray 145 | Entire image 146 | 147 | swap: tuple 148 | HWC to CHW: (2,0,1) 149 | CHW to HWC: (1,2,0) 150 | HWC to HWC: (0,1,2) 151 | CHW to CHW: (0,1,2) 152 | 153 | Returns 154 | ------- 155 | resized_image: np.ndarray 156 | Resized and normalized image. 157 | """ 158 | # Normalization + BGR->RGB 159 | resized_image = cv2.resize( 160 | image, 161 | ( 162 | int(self.input_shape[1]), # type: ignore 163 | int(self.input_shape[0]), # type: ignore 164 | ) 165 | ) 166 | resized_image = np.divide(resized_image, 255.0) # type: ignore 167 | resized_image = resized_image.transpose(swap) 168 | resized_image = np.ascontiguousarray(resized_image, dtype=np.float32) 169 | return resized_image 170 | 171 | 172 | def __postprocess( 173 | self, 174 | image: np.ndarray, 175 | boxes: np.ndarray, 176 | ) -> Tuple[np.ndarray, np.ndarray]: 177 | """__postprocess 178 | 179 | Parameters 180 | ---------- 181 | image: np.ndarray 182 | Entire image. 183 | 184 | boxes: np.ndarray 185 | (boxcount, 5) = (boxcount, x1y1x2y2score) 186 | 187 | Returns 188 | ------- 189 | faceboxes: np.ndarray 190 | Predicted face boxes: [facecount, x1, y1, x2, y2] 191 | 192 | facescores: np.ndarray 193 | Predicted face box confs: [facecount, score] 194 | """ 195 | image_height = image.shape[0] 196 | image_width = image.shape[1] 197 | 198 | scores = boxes[:,4] 199 | keep_idxs = scores > self.class_score_th 200 | boxes_keep = boxes[keep_idxs, :] 201 | 202 | faceboxes = [] 203 | facescores = [] 204 | 205 | if len(boxes_keep) > 0: 206 | boxes_keep[:, 0] = boxes_keep[:, 0] * image_width 207 | boxes_keep[:, 1] = boxes_keep[:, 1] * image_height 208 | boxes_keep[:, 2] = boxes_keep[:, 2] * image_width 209 | boxes_keep[:, 3] = boxes_keep[:, 3] * image_height 210 | 211 | for box in boxes_keep: 212 | x_min = int(box[0]) if int(box[0]) > 0 else 0 213 | y_min = int(box[1]) if int(box[1]) > 0 else 0 214 | x_max = int(box[2]) if int(box[2]) < image_width else image_width 215 | y_max = int(box[3]) if int(box[3]) < image_height else image_height 216 | score = box[4] 217 | 218 | faceboxes.append( 219 | [ 220 | x_min, 221 | y_min, 222 | x_max, 223 | y_max, 224 | ] 225 | ) 226 | facescores.append( 227 | [ 228 | score 229 | ] 230 | ) 231 | 232 | return np.asarray(faceboxes), np.asarray(facescores) # type: ignore 233 | 234 | 235 | class YOLOv7ONNX(object): 236 | def __init__( 237 | self, 238 | model_path: Optional[str] = 'yolov7_tiny_head_0.752_post_480x640.onnx', 239 | class_score_th: Optional[float] = 0.30, 240 | providers: Optional[List] = [ 241 | # ( 242 | # 'TensorrtExecutionProvider', { 243 | # 'trt_engine_cache_enable': True, 244 | # 'trt_engine_cache_path': '.', 245 | # 'trt_fp16_enable': True, 246 | # } 247 | # ), 248 | 'CUDAExecutionProvider', 249 | 'CPUExecutionProvider', 250 | ], 251 | ): 252 | """YOLOv7ONNX 253 | Parameters 254 | ---------- 255 | model_path: Optional[str] 256 | ONNX file path for YOLOv7 257 | class_score_th: Optional[float] 258 | class_score_th: Optional[float] 259 | Score threshold. Default: 0.30 260 | providers: Optional[List] 261 | Name of onnx execution providers 262 | Default: 263 | [ 264 | ( 265 | 'TensorrtExecutionProvider', { 266 | 'trt_engine_cache_enable': True, 267 | 'trt_engine_cache_path': '.', 268 | 'trt_fp16_enable': True, 269 | } 270 | ), 271 | 'CUDAExecutionProvider', 272 | 'CPUExecutionProvider', 273 | ] 274 | """ 275 | # Threshold 276 | self.class_score_th = class_score_th 277 | 278 | # Model loading 279 | session_option = onnxruntime.SessionOptions() 280 | session_option.log_severity_level = 3 281 | self.onnx_session = onnxruntime.InferenceSession( 282 | model_path, 283 | sess_options=session_option, 284 | providers=providers, 285 | ) 286 | self.providers = self.onnx_session.get_providers() 287 | 288 | self.input_shapes = [ 289 | input.shape for input in self.onnx_session.get_inputs() 290 | ] 291 | self.input_names = [ 292 | input.name for input in self.onnx_session.get_inputs() 293 | ] 294 | self.output_names = [ 295 | output.name for output in self.onnx_session.get_outputs() 296 | ] 297 | 298 | 299 | def __call__( 300 | self, 301 | image: np.ndarray, 302 | ) -> Tuple[np.ndarray, np.ndarray]: 303 | """YOLOv7ONNX 304 | Parameters 305 | ---------- 306 | image: np.ndarray 307 | Entire image 308 | Returns 309 | ------- 310 | face_boxes: np.ndarray 311 | Predicted face boxes: [facecount, y1, x1, y2, x2] 312 | face_scores: np.ndarray 313 | Predicted face box scores: [facecount, score] 314 | """ 315 | temp_image = copy.deepcopy(image) 316 | 317 | # PreProcess 318 | resized_image = self.__preprocess( 319 | temp_image, 320 | ) 321 | 322 | # Inference 323 | inferece_image = np.asarray([resized_image], dtype=np.float32) 324 | scores, boxes = self.onnx_session.run( 325 | self.output_names, 326 | {input_name: inferece_image for input_name in self.input_names}, 327 | ) 328 | 329 | # PostProcess 330 | face_boxes, face_scores = self.__postprocess( 331 | image=temp_image, 332 | scores=scores, 333 | boxes=boxes, 334 | ) 335 | 336 | return face_boxes, face_scores 337 | 338 | 339 | def __preprocess( 340 | self, 341 | image: np.ndarray, 342 | swap: Optional[Tuple[int,int,int]] = (2, 0, 1), 343 | ) -> np.ndarray: 344 | """__preprocess 345 | Parameters 346 | ---------- 347 | image: np.ndarray 348 | Entire image 349 | swap: tuple 350 | HWC to CHW: (2,0,1) 351 | CHW to HWC: (1,2,0) 352 | HWC to HWC: (0,1,2) 353 | CHW to CHW: (0,1,2) 354 | Returns 355 | ------- 356 | resized_image: np.ndarray 357 | Resized and normalized image. 358 | """ 359 | # Normalization + BGR->RGB 360 | resized_image = cv2.resize( 361 | image, 362 | ( 363 | int(self.input_shapes[0][3]), 364 | int(self.input_shapes[0][2]), 365 | ) 366 | ) 367 | resized_image = np.divide(resized_image, 255.0) 368 | resized_image = resized_image[..., ::-1] 369 | resized_image = resized_image.transpose(swap) 370 | resized_image = np.ascontiguousarray( 371 | resized_image, 372 | dtype=np.float32, 373 | ) 374 | return resized_image 375 | 376 | 377 | def __postprocess( 378 | self, 379 | image: np.ndarray, 380 | scores: np.ndarray, 381 | boxes: np.ndarray, 382 | ) -> Tuple[np.ndarray, np.ndarray]: 383 | """__postprocess 384 | Parameters 385 | ---------- 386 | image: np.ndarray 387 | Entire image. 388 | scores: np.ndarray 389 | float32[N, 1] 390 | boxes: np.ndarray 391 | int64[N, 6] 392 | Returns 393 | ------- 394 | faceboxes: np.ndarray 395 | Predicted face boxes: [facecount, y1, x1, y2, x2] 396 | facescores: np.ndarray 397 | Predicted face box confs: [facecount, score] 398 | """ 399 | image_height = image.shape[0] 400 | image_width = image.shape[1] 401 | 402 | """ 403 | Head Detector is 404 | N -> Number of boxes detected 405 | batchno -> always 0: BatchNo.0 406 | classid -> always 0: "Head" 407 | scores: float32[N,1], 408 | batchno_classid_y1x1y2x2: int64[N,6], 409 | """ 410 | scores = scores 411 | keep_idxs = scores[:, 0] > self.class_score_th 412 | scores_keep = scores[keep_idxs, :] 413 | boxes_keep = boxes[keep_idxs, :] 414 | faceboxes = [] 415 | facescores = [] 416 | 417 | if len(boxes_keep) > 0: 418 | for box, score in zip(boxes_keep, scores_keep): 419 | x_min = max(int(box[3]), 0) 420 | y_min = max(int(box[2]), 0) 421 | x_max = min(int(box[5]), image_width) 422 | y_max = min(int(box[4]), image_height) 423 | 424 | faceboxes.append( 425 | [x_min, y_min, x_max, y_max] 426 | ) 427 | facescores.append( 428 | score 429 | ) 430 | 431 | return np.asarray(faceboxes), np.asarray(facescores) 432 | 433 | 434 | def txt_line(cls, bbox, img_w, img_h): 435 | """Generate 1 line in the txt file.""" 436 | x, y, w, h = bbox 437 | x = max(int(x), 0) 438 | y = max(int(y), 0) 439 | w = min(int(w), img_w - x) 440 | h = min(int(h), img_h - y) 441 | w_rescaled = float(w) * INPUT_WIDTH / img_w 442 | h_rescaled = float(h) * INPUT_HEIGHT / img_h 443 | if w_rescaled < MIN_W or h_rescaled < MIN_H: 444 | return '' 445 | else: 446 | if DO_KMEANS: 447 | global BBOX_WHS 448 | BBOX_WHS.append((w_rescaled, h_rescaled)) 449 | cx = (x + w / 2.) / img_w 450 | cy = (y + h / 2.) / img_h 451 | nw = float(w) / img_w 452 | nh = float(h) / img_h 453 | return f'{int(cls)} {cx:.6f} {cy:.6f} {nw:.6f} {nh:.6f}\n' 454 | 455 | 456 | def process(set_, data_list, output_dir, model): 457 | """Process either 'train' or 'test' set.""" 458 | jpgs = [] 459 | raw_anno_count = 0 460 | print(f'** Processing Sets: {set_}') 461 | for image_file_path in tqdm(data_list, dynamic_ncols=True): 462 | image = cv2.imread(image_file_path) 463 | img_h, img_w, img_c = image.shape 464 | basename = os.path.basename(image_file_path) 465 | basename_without_ext = os.path.splitext(basename)[0] 466 | txt_path = output_dir / (f'{basename_without_ext}.txt') 467 | 468 | # inference 469 | face_boxes, face_scores = model(image) 470 | if len(face_boxes) == 1: 471 | line_count = 0 472 | with open(txt_path.as_posix(), 'w') as ftxt: 473 | for face_box, face_score in zip(face_boxes, face_scores): 474 | x_min = int(face_box[0]) 475 | y_min = int(face_box[1]) 476 | x_max = int(face_box[2]) 477 | y_max = int(face_box[3]) 478 | 479 | # add margin 480 | y_min = int(max(0, y_min - abs(y_min - y_max) / 17)) 481 | y_max = int(min(img_h, y_max + abs(y_min - y_max) / 17)) 482 | x_min = int(max(0, x_min - abs(x_min - x_max) / 7)) 483 | x_max = min(img_w, x_max + abs(x_min - x_max) / 7) 484 | x_max = int(min(x_max, img_w)) 485 | w = int(x_max - x_min) 486 | h = int(y_max - y_min) 487 | bbox = [x_min, y_min, w, h] 488 | 489 | line = txt_line(0, bbox, img_w, img_h) 490 | if line: 491 | ftxt.write(line) 492 | line_count += 1 493 | 494 | if line_count > 0: 495 | jpgs.append(f'{output_dir}/{basename_without_ext}.jpg') 496 | cv2.imwrite(f'{output_dir}/{basename_without_ext}.jpg', image) 497 | raw_anno_count += 1 498 | 499 | 500 | print(f'** Processed Images: {raw_anno_count}') 501 | # write the 'data/300wlp-{args.dim}/train.txt' or 'data/300wlp-{args.dim}/test.txt' 502 | set_path = output_dir / (f'{set_}.txt') 503 | with open(set_path.as_posix(), 'w') as fset: 504 | for jpg in jpgs: 505 | fset.write(f'{jpg}\n') 506 | 507 | 508 | def rm_txts(output_dir): 509 | """Remove txt files in output_dir.""" 510 | for txt in output_dir.glob('*.txt'): 511 | if txt.is_file(): 512 | txt.unlink() 513 | 514 | 515 | def main(): 516 | global INPUT_WIDTH, INPUT_HEIGHT 517 | 518 | parser = ArgumentParser() 519 | parser.add_argument( 520 | '-y', 521 | '--yolo_mode', 522 | type=str, 523 | default='yolov4', 524 | choices=['yolov4', 'yolov7'] 525 | ) 526 | parser.add_argument( 527 | '-i', 528 | '--image_folder_path', 529 | type=str, 530 | default='300W_LP_onlyone_person_yolov4_filterd', 531 | ) 532 | parser.add_argument( 533 | '-d', 534 | '--dim', 535 | type=str, 536 | default='640x480', 537 | help='input width and height, e.g. 640x480' 538 | ) 539 | args = parser.parse_args() 540 | 541 | yolo_mode = args.yolo_mode 542 | 543 | model = None 544 | if yolo_mode == 'yolov4': 545 | model = YOLOv4ONNX( 546 | model_path='yolov4_headdetection_480x640_post.onnx', 547 | class_score_th=0.80, 548 | ) 549 | elif yolo_mode == 'yolov7': 550 | model = YOLOv7ONNX( 551 | model_path='yolov7_tiny_head_0.752_post_480x640.onnx', 552 | # class_score_th=0.90, 553 | ) 554 | 555 | dim_split = args.dim.split('x') 556 | if len(dim_split) != 2: 557 | raise SystemExit(f'ERROR: bad spec of input dim ({args.dim})') 558 | INPUT_WIDTH, INPUT_HEIGHT = int(dim_split[0]), int(dim_split[1]) 559 | if INPUT_WIDTH % 32 != 0 or INPUT_HEIGHT % 32 != 0: 560 | raise SystemExit(f'ERROR: bad spec of input dim ({args.dim})') 561 | 562 | output_dir = Path(f'data/300wlp-{args.dim}') 563 | output_dir.mkdir(parents=True, exist_ok=True) 564 | rm_txts(output_dir) 565 | 566 | # Train:Test = 0.95:0.05 567 | image_files = natsorted(glob.glob(f"{args.image_folder_path}/*/*.jpg")) 568 | train_list, test_list = train_test_split( 569 | image_files, 570 | test_size=0.05, 571 | train_size=0.95, 572 | random_state=1, 573 | ) 574 | 575 | 576 | process(set_='test', data_list=test_list, output_dir=output_dir, model=model) 577 | process(set_='train', data_list=train_list, output_dir=output_dir, model=model) 578 | 579 | 580 | if __name__ == "__main__": 581 | main() --------------------------------------------------------------------------------