├── LICENSE ├── README.md ├── common ├── __init__.py ├── landmark_helper.py └── landmark_utils.py ├── create_dataset.py ├── data_generator.py ├── demo ├── CED.png ├── ced_curve.py └── demo.py ├── detection ├── detector.py ├── model │ ├── mnet.25-0000.params │ └── mnet.25-symbol.json ├── rcnn │ └── processing │ │ ├── __init__.py │ │ ├── bbox_transform.py │ │ ├── generate_anchor.py │ │ └── nms.py └── retinaface.py ├── loss.py ├── model.py ├── train.py └── transformer.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MNE得分: 2 | 0.0426 pfld 3 | 0.0348 mobilenetv3 4 | -------------------------------------------------------------------------------- /common/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["LandmarkHelper", "LandmarkImageCrop"] 2 | -------------------------------------------------------------------------------- /common/landmark_helper.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | ''' 3 | Jacly LUO in 20190114 4 | ''' 5 | import numpy as np 6 | import cv2 7 | import sys 8 | 9 | 10 | class LandmarkHelper(object): 11 | ''' 12 | Helper for different landmark type 13 | ''' 14 | 15 | def __init__(self): 16 | pass 17 | 18 | @classmethod 19 | def parse(cls, line): 20 | ''' 21 | use for parse txt line to get file path and landmarks and so on 22 | Args: 23 | cls: this class 24 | line: line of input txt 25 | landmark_type: len of landmarks 26 | Return: 27 | see child parse 28 | Raises: 29 | unsupport type 30 | ''' 31 | return cls.__landmark106_txt_parse(line) 32 | 33 | @staticmethod 34 | def __landmark106_txt_parse(line): 35 | ''' 36 | [1] image path 37 | [2:5] bounding box 38 | [6:] 106 landmarks 39 | ''' 40 | a = line.split() 41 | landmarks = list(map(int, a[1:-3]))[4:] 42 | pose = list(map(float, a[1:]))[-3:] 43 | 44 | return a[0], np.array(landmarks).reshape((-1, 2)), np.array(pose, dtype=np.float32) 45 | 46 | @staticmethod 47 | def flip(a): 48 | ''' 49 | use for flip landmarks. Because we have to renumber it after flip 50 | Args: 51 | a: original landmarks 52 | landmark_type: type of landmarks(106) 53 | Returns: 54 | landmarks: new landmarks 55 | Raises: 56 | unsupport type 57 | ''' 58 | landmarks = np.concatenate(( 59 | a[0:1], a[17:33], a[1:17], 60 | a[87:89], a[93:94], a[91:92], a[90:91], a[92:93], a[89:90], 61 | a[94:95], a[96:97], a[95:96], a[99:102][::- 62 | 1], a[97:99], a[104:106][::-1], a[102:104], 63 | a[61:62], a[53:54], a[57:60], a[54:57], a[60:61], a[52: 64 | 53], a[62:63], a[67:71], a[63:67], a[71:72], 65 | a[72:75], a[81:86], a[80:81], a[75:80], a[86:87], 66 | a[33:35], a[39:40], a[37:38], a[36:37], a[38:39], a[35:36], 67 | a[40:41], a[42:43], a[41:42], a[46:47], a[47:48], a[45:46], a[44:45], a[43:44], 68 | a[50:51], a[51:52], a[49:50], a[48:49] 69 | ), axis=0) 70 | 71 | return landmarks.reshape((-1, 2)) 72 | -------------------------------------------------------------------------------- /common/landmark_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | from tqdm import tqdm 5 | np.random.seed(2018) 6 | 7 | 8 | class LandmarkImageCrop(object): 9 | ''' 10 | Facial 106 landmarks augmentation. 11 | ''' 12 | 13 | def __init__(self): 14 | pass 15 | 16 | def __visualize(self, image, landmarks, output_size): 17 | ''' 18 | Visualize images and corresponding landmarks 19 | ''' 20 | try: 21 | image.shape 22 | except: 23 | raise ValueError("read image error...") 24 | 25 | for (x, y) in landmarks: 26 | cv2.circle(image, (int(x * output_size), int(y * output_size)), 27 | 1, (0, 0, 255), -1) 28 | 29 | cv2.imshow("image", image) 30 | cv2.waitKey(0) 31 | 32 | def mini_crop_by_landmarks(self, sample_list, scale, output_size=112, is_vis=False): 33 | ''' 34 | Crop full image to mini. Only keep vaild image to save 35 | Args: 36 | sample_list: (image, landmarks) 37 | scale: up scale rate 38 | value: color value 39 | output_size: output image size 40 | Returns: 41 | new sample list 42 | ''' 43 | 44 | boxes = np.empty((len(sample_list), output_size, 45 | output_size, 3), dtype=np.uint8) 46 | ldmarks = np.empty((len(sample_list), 212)) 47 | poses = np.empty((len(sample_list), 3)) 48 | 49 | for idx, sample in tqdm(enumerate(sample_list)): 50 | image = cv2.imread(sample[0]) 51 | landmarks = sample[1] 52 | pose = sample[2] 53 | try: 54 | (x1, y1, x2, y2), new_size, need_pad, (p_x, p_y, p_w, p_h) = LandmarkImageCrop.get_bbox_of_landmarks( 55 | image, landmarks, scale, 0.5) 56 | except: 57 | print(sample[0]) 58 | # Extract roi image 59 | box_image = image[y1:y2, x1:x2] 60 | if need_pad: 61 | box_image = np.lib.pad( 62 | box_image, ((p_y, p_h), (p_x, p_w), (0, 0)), 'constant') 63 | box_image = cv2.resize(box_image, (output_size, output_size)) 64 | landmarks = (landmarks - (x1 - p_x, y1 - p_y)) / \ 65 | (new_size, new_size) 66 | if is_vis: 67 | self.__visualize(box_image, landmarks, output_size) 68 | # Convert to (212,) 69 | landmarks = landmarks.flatten() 70 | 71 | boxes[idx] = box_image 72 | ldmarks[idx] = landmarks 73 | poses[idx] = pose 74 | 75 | return boxes, ldmarks, poses 76 | 77 | @staticmethod 78 | def get_bbox_of_landmarks(image, landmarks, scale, shift_rate=0.3): 79 | ''' 80 | According to landmark box to generate a new bigger bbox 81 | Args: 82 | image: a numpy type 83 | landmarks: face landmarks with format [(x1, y1), ...]. range is 0-w or h in int 84 | scale: scale bbox in (min, max). eg: (1.3, 1.5) 85 | shift_rate: up, down, left, right to shift 86 | Returns: 87 | return new bbox and other info 88 | Raises: 89 | No 90 | ''' 91 | ori_h, ori_w = image.shape[:2] 92 | 93 | x = int(min(landmarks[:, 0])) 94 | y = int(min(landmarks[:, 1])) 95 | w = int(max(landmarks[:, 0]) - x) 96 | h = int(max(landmarks[:, 1]) - y) 97 | if type(scale) == float: 98 | scale = scale 99 | else: 100 | scale = np.random.randint( 101 | int(scale[0] * 100.0), int(scale[1] * 100.0)) / 100.0 102 | new_size = int(max(w, h) * scale) 103 | if shift_rate >= 0.5: 104 | x1 = x - (new_size - w) // 2 105 | y1 = y - (new_size - h) // 2 106 | else: 107 | x1 = x - np.random.randint(int((new_size-w) * shift_rate), 108 | int((new_size-w) * (1.0-shift_rate))) 109 | y1 = y - np.random.randint(int((new_size-h) * shift_rate), 110 | int((new_size-h) * (1.0-shift_rate))) 111 | x2 = x1 + new_size 112 | y2 = y1 + new_size 113 | need_pad = False 114 | p_x, p_y, p_w, p_h = 0, 0, 0, 0 115 | if x1 < 0: 116 | p_x = -x1 117 | x1 = 0 118 | need_pad = True 119 | if y1 < 0: 120 | p_y = -y1 121 | y1 = 0 122 | need_pad = True 123 | if x2 > ori_w: 124 | p_w = x2 - ori_w 125 | x2 = ori_w 126 | need_pad = True 127 | if y2 > ori_h: 128 | p_h = y2 - ori_h 129 | y2 = ori_h 130 | need_pad = True 131 | 132 | return (x1, y1, x2, y2), new_size, need_pad, (p_x, p_y, p_w, p_h) 133 | -------------------------------------------------------------------------------- /create_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create training dataset 3 | """ 4 | import cv2 5 | import os 6 | import sys 7 | import glob 8 | import numpy as np 9 | from tqdm import tqdm 10 | import pandas as pd 11 | 12 | from common.landmark_helper import LandmarkHelper 13 | from common.landmark_utils import LandmarkImageCrop 14 | import time 15 | from pprint import pprint 16 | 17 | import argparse 18 | ap = argparse.ArgumentParser() 19 | ap.add_argument("-l", "--landmark_txt", type=str, default='./test_dataset/landmarks.txt', 20 | help="path to landmarks txt") 21 | ap.add_argument("-b", "--base_dir", type=str, default='./test_dataset', 22 | help="base dataset dir") 23 | ap.add_argument("-s", "--output_size", type=int, default=64, 24 | help="output image size") 25 | ap.add_argument("-n", "--new_path", type=str, default='./demo_test_dataset', 26 | help="new save image file") 27 | args = vars(ap.parse_args()) 28 | 29 | 30 | def main(): 31 | 32 | if not os.path.exists(args['new_path']): 33 | os.mkdir(args['new_path']) 34 | 35 | with open(args['landmark_txt']) as f: 36 | 37 | samples_list = [] 38 | 39 | for line in f.readlines(): 40 | # Parse txt file 41 | img_path, landmarks, poses = LandmarkHelper.parse(line) 42 | image_path = os.path.join(args['base_dir'], img_path) 43 | samples_list.append([image_path, landmarks, poses]) 44 | 45 | boxes, ldmarks, poses = LandmarkImageCrop().mini_crop_by_landmarks( 46 | samples_list, scale=(1.2, 1.5), output_size=args['output_size'], is_vis=False) 47 | 48 | # Save image , new landmarks and poses 49 | mix_dict = dict() 50 | 51 | for box, ldmark, pose, num in tqdm(zip(boxes, ldmarks, poses, np.arange(len(samples_list)))): 52 | cv2.imwrite("{}.png".format( 53 | os.path.join(args['new_path'], str(num).zfill(5))), box) 54 | mix_dict["{}.png".format(str(num).zfill(5))] = np.concatenate( 55 | (ldmark, pose), axis=0) 56 | # print(np.concatenate((ldmark, pose), axis=0)) 57 | 58 | df = pd.DataFrame(mix_dict).T 59 | df.to_csv("{}/face_mixed.csv".format(args['new_path']), 60 | encoding="utf-8", header=None) 61 | 62 | pprint("Complete conversion!!!") 63 | 64 | 65 | if __name__ == "__main__": 66 | main() 67 | -------------------------------------------------------------------------------- /data_generator.py: -------------------------------------------------------------------------------- 1 | # https: // stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly 2 | 3 | from keras.utils import Sequence 4 | import numpy as np 5 | import pandas as pd 6 | import sys 7 | import os 8 | import cv2 9 | from pprint import pprint 10 | 11 | from common.landmark_utils import LandmarkImageCrop 12 | from common.landmark_helper import LandmarkHelper 13 | 14 | 15 | class DataGenerator(Sequence): 16 | ''' 17 | Generates data for Keras 18 | ''' 19 | 20 | def __init__(self, batch_size, root_dir, csv_file, output_size=112, 21 | shuffle=False, max_angle=45, transformer=None): 22 | 23 | self.landmarks_frame = pd.read_csv(csv_file, header=None) 24 | self.batch_size = batch_size 25 | self.root_dir = root_dir 26 | self.shuffle = shuffle 27 | self.max_angle = max_angle 28 | 29 | assert isinstance(output_size, int) 30 | self.output_size = output_size 31 | self.transformer = transformer 32 | self.on_epoch_end() 33 | 34 | def __getitem__(self, index): 35 | '''Generate one batch of data''' 36 | 37 | indexes = self.indexes[index * 38 | self.batch_size: (index+1) * self.batch_size] 39 | list_frames = self.landmarks_frame.iloc[indexes, :] 40 | X, y_ld, y_p = self.__data_generation(list_frames) 41 | 42 | if self.transformer: 43 | 44 | X_imgs = np.empty( 45 | (self.batch_size, self.output_size, self.output_size, 3), dtype=np.uint8) 46 | y_ldmarks = np.empty((self.batch_size, 212), dtype=np.float32) 47 | y_poses = np.empty((self.batch_size, 3), dtype=np.float32) 48 | 49 | for idx, img, ldmark, pose in zip(np.arange(len(indexes)), X, y_ld, y_p): 50 | ldmark = ldmark.reshape((-1, 2)) * \ 51 | (self.output_size, self.output_size) 52 | 53 | # Data Augmentationm you can custom parameter 54 | img, ldmark, pose = self.__flip(img, ldmark, pose) 55 | img, ldmark, pose = self.__rotate( 56 | img, ldmark, pose, max_angle=self.max_angle) 57 | 58 | # Do not need to modified pose... 59 | img, ldmark, pose = self.__scale_and_shift( 60 | img, ldmark, pose, (1.1, 1.5), output_size=self.output_size) 61 | img, ldmark, pose = self.__occlusion(img, ldmark, pose) 62 | 63 | X_imgs[idx] = img 64 | y_ldmarks[idx] = (ldmark / (self.output_size, 65 | self.output_size)).flatten() 66 | y_poses[idx] = pose 67 | # Image normalization 68 | return X_imgs.astype(np.float32) / 255., [y_ldmarks, y_poses] 69 | 70 | return X.astype(np.float32) / 255., [y_ld, y_p] 71 | 72 | def __len__(self): 73 | '''Denotes the number of batches per epoch''' 74 | return int(np.floor(len(self.landmarks_frame) / self.batch_size)) 75 | 76 | def on_epoch_end(self): 77 | '''Updates indexes after each epoch''' 78 | self.indexes = np.arange(len(self.landmarks_frame)) 79 | if self.shuffle: 80 | np.random.shuffle(self.indexes) 81 | 82 | @property 83 | def data_predict(self): 84 | '''Predict a batch size data''' 85 | return self.__data_generation() 86 | 87 | def __data_generation(self, list_frames): 88 | '''Producing batches of data''' 89 | 90 | X_imgs = np.empty((self.batch_size, self.output_size, 91 | self.output_size, 3), dtype=np.uint8) 92 | y_ldmarks = np.empty((self.batch_size, 212), dtype=np.float32) 93 | y_poses = np.empty((self.batch_size, 3), dtype=np.float32) 94 | 95 | for i in range(len(list_frames)): 96 | image_path = os.path.join( 97 | self.root_dir, list_frames.iloc[i, 0]) 98 | X_imgs[i] = cv2.imread(image_path) 99 | y_ldmarks[i] = list_frames.iloc[i, 1:-3] 100 | y_poses[i] = list_frames.iloc[i, -3:] 101 | 102 | return X_imgs, y_ldmarks, y_poses 103 | 104 | def __flip(self, image, landmarks, poses, run_prob=0.5): 105 | ''' 106 | Do image flip. Only for horizontal 107 | 108 | Args: 109 | image: a numpy type 110 | landmarks: face landmarks with format [(x1, y1), (x2, y2), ...] 111 | run_prob: probability to do this operate. 0.0-1.0 112 | Returns: 113 | an image and landmarks will be returned 114 | Raises: 115 | Unsupport count of landmarks 116 | ''' 117 | if np.random.rand() < run_prob: 118 | return image, landmarks, poses 119 | image = np.fliplr(image) 120 | landmarks[:, 0] = image.shape[1] - landmarks[:, 0] 121 | landmarks = LandmarkHelper.flip(landmarks) 122 | 123 | # pitch, roll, yaw... 124 | poses[1] = -poses[1] 125 | poses[2] = -poses[2] 126 | 127 | return image, landmarks, poses 128 | 129 | def __rotate(self, image, landmarks, poses, max_angle, run_prob=0.5): 130 | ''' 131 | Do image rotate. 132 | 133 | Args: 134 | image: a numpy type 135 | landmarks: face landmarks with format [(x1, y1), ...]. range is 0-w or h in int 136 | max_angle: random to rotate in [-max_angle, max_angle]. range is 0-180. 137 | Returns: 138 | an image and landmarks will be returned 139 | Raises: 140 | No 141 | ''' 142 | if np.random.rand() < run_prob: 143 | return image, landmarks, poses 144 | 145 | c_x = (min(landmarks[:, 0]) + max(landmarks[:, 0])) / 2 146 | c_y = (min(landmarks[:, 1]) + max(landmarks[:, 1])) / 2 147 | h, w = image.shape[:2] 148 | angle = np.random.randint(-max_angle, max_angle) 149 | M = cv2.getRotationMatrix2D((c_x, c_y), angle, 1) 150 | image = cv2.warpAffine(image, M, (w, h)) 151 | b = np.ones((landmarks.shape[0], 1)) 152 | d = np.concatenate((landmarks, b), axis=1) 153 | landmarks = np.dot(d, np.transpose(M)) 154 | 155 | # Adjustment roll angle 156 | poses[1] += angle 157 | 158 | return image, landmarks, poses 159 | 160 | def __occlusion(self, image, landmarks, poses, sl=0.05, sh=0.2, r1=0.3, mean=[0, 0, 0], run_prob=0.2): 161 | ''' 162 | Do image part occlusion 163 | 164 | sl: min erasing area 165 | sh: max erasing area 166 | r1: min aspect ratio 167 | mean: erasing value 168 | https://github.com/zhunzhong07/Random-Erasing/blob/master/transforms.py 169 | ''' 170 | 171 | if np.random.rand() < run_prob: 172 | return image, landmarks, poses 173 | 174 | for attempt in range(50): 175 | area = image.shape[0] * image.shape[1] 176 | target_area = np.random.uniform(sl, sh) * area 177 | aspect_ratio = np.random.uniform(r1, 1/r1) 178 | 179 | h = int(round(np.sqrt(target_area * aspect_ratio))) 180 | w = int(round(np.sqrt(target_area / aspect_ratio))) 181 | 182 | if w < image.shape[1] and h < image.shape[0]: 183 | x1 = np.random.randint(0, image.shape[0] - h) 184 | y1 = np.random.randint(0, image.shape[1] - w) 185 | image[x1: x1+h, y1: y1+w, 0] = mean[0] 186 | image[x1: x1+h, y1: y1+w, 1] = mean[1] 187 | image[x1: x1+h, y1: y1+w, 2] = mean[2] 188 | 189 | return image, landmarks, poses 190 | 191 | return image, landmarks, poses 192 | 193 | def __scale_and_shift(self, image, landmarks, poses, scale_range, output_size, run_prob=0.5): 194 | ''' 195 | Auto generate bbox and then random to scale and shift it. 196 | 197 | Args: 198 | image: a numpy type 199 | landmarks: face landmarks with format [(x1, y1), ...]. range is 0-w or h in int 200 | scale_range: scale bbox in (min, max). eg: (1.3, 1.5) 201 | output_size: output size of image 202 | Returns: 203 | an image and landmarks will be returned 204 | Raises: 205 | No 206 | ''' 207 | if np.random.rand() < run_prob: 208 | return image, landmarks, poses 209 | 210 | (x1, y1, x2, y2), new_size, need_pad, (p_x, p_y, p_w, p_h) = LandmarkImageCrop.get_bbox_of_landmarks( 211 | image, landmarks, scale_range, shift_rate=0) 212 | box_image = image[y1:y2, x1:x2] 213 | if need_pad: 214 | box_image = np.lib.pad( 215 | box_image, ((p_y, p_h), (p_x, p_w), (0, 0)), 'constant') 216 | box_image = cv2.resize(box_image, (output_size, output_size)) 217 | landmarks = (landmarks - (x1 - p_x, y1 - p_y)) 218 | 219 | return box_image, landmarks, poses 220 | -------------------------------------------------------------------------------- /demo/CED.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JACKYLUO1991/FaceLandmarks/0e7eb636d25ba93e64cc3433a79239be51c883cc/demo/CED.png -------------------------------------------------------------------------------- /demo/ced_curve.py: -------------------------------------------------------------------------------- 1 | # Normalized Mean Error 2 | # Created by Jacky LUO 3 | # https://github.com/MarekKowalski/DeepAlignmentNetwork 4 | 5 | import sys 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from keras.models import load_model 9 | from keras.utils import custom_object_scope 10 | import pandas as pd 11 | import os 12 | import cv2 as cv 13 | from tqdm import tqdm 14 | from scipy.integrate import simps 15 | 16 | import tensorflow as tf 17 | tf.logging.set_verbosity(tf.logging.ERROR) 18 | 19 | sys.path.append("../") 20 | from loss import * 21 | from model import relu6, hard_swish 22 | 23 | 24 | class LandmarkNme(object): 25 | """Measure normalized mean error""" 26 | 27 | failure_threshold = 0.10 28 | 29 | def __init__(self, model_path, nb_points=106, output_dim=112): 30 | 31 | with custom_object_scope({'normalized_mean_error': normalized_mean_error, 32 | 'wing_loss': wing_loss, 'smoothL1': smoothL1, 33 | 'relu6': relu6, 'hard_swish': hard_swish}): 34 | self.model = load_model(model_path) 35 | 36 | self.output_dim = output_dim 37 | self.nb_points = nb_points 38 | 39 | self.__gt_landmarks = None 40 | self.__pred_landmarks = None 41 | self.__image_names = None 42 | 43 | @property 44 | def gt_landmarks(self): 45 | return self.__gt_landmarks 46 | 47 | @gt_landmarks.setter 48 | def gt_landmarks(self, landmarks_csv): 49 | '''Get Groundtruth landmarks''' 50 | df = pd.read_csv(landmarks_csv, header=None) 51 | self.__image_names = df.iloc[:, 0].values 52 | self.__gt_landmarks = df.iloc[:, 1:- 53 | 3].values.reshape((-1, self.nb_points, 2)) 54 | 55 | @property 56 | def pred_landmarks(self): 57 | return self.__pred_landmarks 58 | 59 | @pred_landmarks.setter 60 | def pred_landmarks(self, prefix): 61 | """Get pred landmarks""" 62 | marks_list = [] 63 | for image_name in tqdm(self.__image_names): 64 | image_path = os.path.join(prefix, image_name) 65 | # Resize image to specific size like 112, 64... 66 | img = cv.resize(cv.imread(image_path), 67 | (self.output_dim, self.output_dim)) 68 | if self.output_dim == 64: 69 | img_normalized = img.astype(np.float32) 70 | else: 71 | img_normalized = img.astype(np.float32) / 255. 72 | face_img = img_normalized.reshape( 73 | 1, self.output_dim, self.output_dim, 3) 74 | if self.output_dim == 64: 75 | marks = self.model.predict_on_batch(face_img) 76 | else: 77 | marks = self.model.predict_on_batch(face_img)[0] 78 | # marks = self.model.predict_on_batch(face_img) 79 | # marks = np.reshape(marks, (-1, 2)) 80 | marks_list.append(marks) 81 | # print(marks) 82 | self.__pred_landmarks = np.array( 83 | marks_list, dtype=np.float32).reshape((-1, self.nb_points, 2)) 84 | 85 | def landmark_error(self, normalization='centers'): 86 | """Get landmarks error between gt and pred""" 87 | errors = [] 88 | n_imgs = len(self.__gt_landmarks) 89 | 90 | for i in tqdm(range(n_imgs)): 91 | gt_ldmarks = self.__gt_landmarks[i] 92 | pred_ldmarks = self.__pred_landmarks[i] 93 | 94 | if normalization == 'centers': 95 | normDist = np.linalg.norm( 96 | gt_ldmarks[38] - gt_ldmarks[92]) 97 | error = np.mean(np.sqrt(np.sum((gt_ldmarks - 98 | pred_ldmarks) ** 2, axis=1))) / normDist 99 | errors.append(error) 100 | 101 | return errors 102 | 103 | @classmethod 104 | def plot_ced(cls, errors_lists, step=0.0001, fontsize=18, labels=None, colors=None, 105 | showCurve=True): 106 | '''Plot CED curve''' 107 | ced_list = [] 108 | xAxis_list = [] 109 | 110 | for errors in errors_lists: 111 | nErrors = len(errors) 112 | xAxis = list(np.arange(0., cls.failure_threshold + step, step)) 113 | ced = [float(np.count_nonzero([errors <= x])) / 114 | nErrors for x in xAxis] 115 | # AUC = simps(ced, x=xAxis) / cls.failure_threshold 116 | # failureRate = 1. - ced[-1] 117 | ced_list.append(ced) 118 | xAxis_list.append(xAxis) 119 | 120 | if showCurve: 121 | if labels is not None and colors is not None: 122 | plt.grid() 123 | plt.axis([0.0, cls.failure_threshold, 0, 1.0]) 124 | plt.xticks(fontsize=fontsize) 125 | plt.yticks(fontsize=fontsize) 126 | for i in range(len(errors_lists)): 127 | plt.plot(xAxis_list[i], ced_list[i], color=colors[i], 128 | label=labels[i]) 129 | plt.legend() 130 | plt.xlabel('Mean Normalized Error', fontsize=fontsize) 131 | plt.ylabel('Proportion of facial landmarks', fontsize=fontsize) 132 | plt.show() 133 | 134 | 135 | if __name__ == "__main__": 136 | 137 | # Pipline 138 | errors_lists = [] 139 | # PFLD network 140 | ln = LandmarkNme("../checkpoints/pfld.h5") 141 | ln.gt_landmarks = "../new_test_dataset/face_mixed.csv" 142 | ln.pred_landmarks = "../new_test_dataset" 143 | errors = ln.landmark_error() 144 | errors_lists.append(errors) 145 | 146 | # Mobilenetv3 network 147 | ln2 = LandmarkNme("../checkpoints/mobilenetv3.h5") 148 | ln2.gt_landmarks = "../new_test_dataset/face_mixed.csv" 149 | ln2.pred_landmarks = "../new_test_dataset" 150 | errors2 = ln2.landmark_error() 151 | errors_lists.append(errors2) 152 | 153 | # Basenet network 154 | ln3 = LandmarkNme("../checkpoints/model.h5", output_dim=64) 155 | ln3.gt_landmarks = "../new_test_dataset/face_mixed.csv" 156 | ln3.pred_landmarks = "../new_test_dataset" 157 | errors3 = ln3.landmark_error() 158 | errors_lists.append(errors3) 159 | 160 | # CED curve show 161 | LandmarkNme.plot_ced(errors_lists, showCurve=True, \ 162 | labels=['Plfd', 'Mobilenetv3', 'Basenet'], colors=['blue', 'green', 'red']) 163 | -------------------------------------------------------------------------------- /demo/demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | For static image landmarks detection 3 | """ 4 | import numpy as np 5 | import cv2 6 | import time 7 | import glob 8 | # import imutils 9 | import sys 10 | import os 11 | sys.path.append("../") 12 | # from transformer import FaceAlign 13 | from detection.detector import MarkDetector 14 | 15 | import tensorflow as tf 16 | tf.logging.set_verbosity(tf.logging.ERROR) 17 | 18 | CNN_INPUT_SIZE = 112 19 | # face_align = FaceAlign(out_size=CNN_INPUT_SIZE) 20 | 21 | 22 | def main(images_dir, savePath): 23 | 24 | if not os.path.isdir(savePath): 25 | os.makedirs(savePath) 26 | 27 | images_path = os.listdir(images_dir) 28 | 29 | # mark_detector = MarkDetector(threshold=[0.7, 0.6, 0.95], 30 | # mark_model='../checkpoints/pfld.h5') 31 | mark_detector = MarkDetector(model_path='../detection/model/mnet.25', 32 | gpuid=-1, thresh=0.9, scales=[224, 384], 33 | mark_model='../checkpoints/pfld.h5') 34 | 35 | for idx, image_path in enumerate(images_path): 36 | 37 | img = cv2.imread(os.path.join(images_dir, image_path)) 38 | # img = imutils.resize(img, width=512) 39 | h, w, _ = img.shape 40 | img_copy = img.copy() 41 | # img_copy = cv2.cvtColor(img_copy, cv2.COLOR_BGR2RGB) 42 | s1 = time.time() 43 | faceboxes, face_ldmarks = mark_detector.extract_cnn_facebox( 44 | img_copy) 45 | print(f"Detection: {time.time() - s1}s") 46 | pad_type = -1 47 | 48 | if len(faceboxes) == 0: 49 | print("Not detected face...") 50 | continue 51 | 52 | else: 53 | for facebox, ldmarks in zip(faceboxes, face_ldmarks): 54 | 55 | facebox = list(map(int, facebox)) 56 | x_min, y_min, x_max, y_max = facebox[0], facebox[1], facebox[2], facebox[3] 57 | 58 | if x_min < 0 or y_min < 0 or x_max > w or y_max > h: 59 | pad_type = 1 60 | absTmp = np.minimum( 61 | (x_min, y_min, w - x_max, h - y_max), 0) 62 | pad = np.max(np.abs(absTmp)) 63 | # Entire image op 64 | img = cv2.copyMakeBorder(img, pad, pad, pad, pad, 65 | cv2.BORDER_CONSTANT, value=[0, 0, 0]) 66 | y_min, y_max, x_min, x_max = y_min + pad, y_max + pad, x_min + pad, x_max + pad 67 | 68 | face_img_crop = img[y_min: y_max, x_min: x_max] 69 | face_img_align_uint = cv2.resize( 70 | face_img_crop, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) 71 | # ldmarks = [(x / (face_img_crop.shape[0] / CNN_INPUT_SIZE), y / 72 | # (face_img_crop.shape[1] / CNN_INPUT_SIZE)) for (x, y) in ldmarks] 73 | ##################################################################### 74 | # 5 points alignment code 75 | # face_img_align_uint, tform = face_align.face_aligned( 76 | # face_img_align_uint, ldmarks) 77 | ##################################################################### 78 | if CNN_INPUT_SIZE == 64: 79 | face_img_align = face_img_align_uint.astype(np.float32) 80 | else: 81 | face_img_align = face_img_align_uint.astype(np.float32) / 255. 82 | face_img0 = face_img_align.reshape( 83 | 1, CNN_INPUT_SIZE, CNN_INPUT_SIZE, 3) 84 | 85 | s2 = time.time() 86 | marks = mark_detector.detect_marks_keras(face_img0) 87 | print(f"Landmarks: {time.time() - s2}s") 88 | ############################################################################# 89 | # Inverse similarity transformation Matrix 90 | # marks *= CNN_INPUT_SIZE 91 | # b = np.ones((marks.shape[0], 1)) 92 | # d = np.concatenate((marks, b), axis=1) 93 | # M = cv2.invertAffineTransform(tform.params[:2, :]) 94 | # marks = np.dot(d, M.T) 95 | # marks /= CNN_INPUT_SIZE 96 | 97 | marks *= (x_max - x_min) 98 | ############################################################################# 99 | marks[:, 0] += x_min 100 | marks[:, 1] += y_min 101 | 102 | # Draw Predicted Landmarks 103 | MarkDetector.draw_marks(img, marks, thick=2) 104 | 105 | if pad_type == 1: 106 | pad_type = -1 107 | img = img[pad: pad+h, pad: pad+w] 108 | 109 | print("[INFO] Finished {} pictures".format(idx+1)) 110 | 111 | cv2.imwrite(os.path.join(savePath, "result_%d.jpg" % (idx)), img) 112 | 113 | 114 | if __name__ == "__main__": 115 | main(images_dir='./images', savePath='./result112') 116 | -------------------------------------------------------------------------------- /detection/detector.py: -------------------------------------------------------------------------------- 1 | """ 2 | Detect faces and add landmarks based on them 3 | """ 4 | import numpy as np 5 | import tensorflow as tf 6 | import cv2 7 | import os 8 | from keras.models import load_model 9 | from keras.utils import custom_object_scope 10 | from loss import normalized_mean_error, wing_loss, smoothL1 11 | 12 | import sys 13 | sys.path.append("../") 14 | from model import relu6, hard_swish 15 | from detection import retinaface 16 | # from detection import detect_face 17 | 18 | 19 | class MarkDetector: 20 | """Facial landmark detector by Convolutional Neural Network""" 21 | 22 | # def __init__(self, threshold=[0.6, 0.7, 0.7], factor=0.709, minsize=20, mark_model=None): 23 | 24 | def __init__(self, model_path, gpuid=-1, thresh=0.95, scales=[384, 512], mark_model=None): 25 | 26 | self.gpuid = -1 if gpuid < 0 else gpuid 27 | self.thresh = thresh 28 | 29 | if isinstance(scales, (list, tuple)) and len(scales) == 2: 30 | self.scales = scales 31 | else: 32 | raise Exception("scales set is error...") 33 | 34 | try: 35 | self.detector = retinaface.RetinaFace( 36 | model_path, 0, self.gpuid, 'net3') 37 | except: 38 | raise Exception("Detector loading error...") 39 | 40 | # if isinstance(threshold, list) and len(threshold) == 3: 41 | # self.threshold = threshold 42 | # self.factor = factor 43 | # self.minsize = minsize 44 | 45 | # with tf.Graph().as_default(): 46 | # sess = tf.Session() 47 | # with sess.as_default(): 48 | # self.pnet, self.rnet, self.onet = detect_face.create_mtcnn( 49 | # sess, None) 50 | 51 | if mark_model.split(".")[-1] == 'h5': 52 | with custom_object_scope({'normalized_mean_error': normalized_mean_error, 53 | 'wing_loss': wing_loss, 'smoothL1': smoothL1, \ 54 | 'relu6': relu6, 'hard_swish': hard_swish}): 55 | self.sess = load_model(mark_model) 56 | else: 57 | raise Exception("model should be given...") 58 | 59 | # else: 60 | # raise ValueError("error occur in threshold params!") 61 | 62 | def detect_marks_keras(self, image_np): 63 | """Detect marks from image""" 64 | predictions = self.sess.predict_on_batch(image_np) 65 | 66 | # Convert predictions to landmarks. 67 | marks = np.array(predictions[0]).flatten() 68 | marks = np.reshape(marks, (-1, 2)) 69 | 70 | return marks 71 | 72 | @staticmethod 73 | def move_box(box, offset, scale=1.1): 74 | """Move the box to direction specified by vector offset""" 75 | if scale < 1. or scale >= 2.: 76 | raise ValueError("scale should be between 1 and 2...") 77 | 78 | left_x = box[0] + offset[0] * scale 79 | top_y = box[1] + offset[1] * scale 80 | right_x = box[2] + offset[0] * scale 81 | bottom_y = box[3] + offset[1] * scale 82 | 83 | return [left_x, top_y, right_x, bottom_y] 84 | 85 | @staticmethod 86 | def get_square_box(box): 87 | """Get a square box out of the given box, by expanding it.""" 88 | 89 | left_x = int(box[0]) 90 | top_y = int(box[1]) 91 | right_x = int(box[2]) 92 | bottom_y = int(box[3]) 93 | 94 | box_width = right_x - left_x 95 | box_height = bottom_y - top_y 96 | 97 | # Check if box is already a square. If not, make it a square. 98 | diff = box_height - box_width 99 | delta = int(abs(diff / 2)) 100 | 101 | if diff == 0: # Already a square. 102 | return [left_x, top_y, right_x, bottom_y] 103 | elif diff > 0: # Height > width, a slim box. 104 | left_x -= delta 105 | right_x += delta 106 | if diff % 2 == 1: 107 | right_x += 1 108 | else: # Width > height, a short box. 109 | top_y -= delta 110 | bottom_y += delta 111 | if diff % 2 == 1: 112 | bottom_y += 1 113 | 114 | # Make sure box is always square. 115 | assert (right_x - left_x) == (bottom_y - top_y) 116 | 117 | return [left_x, top_y, right_x, bottom_y] 118 | 119 | def extract_cnn_facebox(self, image): 120 | """Extract face area from image.""" 121 | faceboxes = [] 122 | # scores = [] 123 | face_ldmarks = [] 124 | 125 | im_shape = image.shape 126 | target_size = self.scales[0] 127 | max_size = self.scales[1] 128 | im_size_min = np.min(im_shape[0:2]) 129 | im_size_max = np.max(im_shape[0:2]) 130 | im_scale = float(target_size) / float(im_size_min) 131 | 132 | # Prevent bigger axis from being more than max_size: 133 | if np.round(im_scale * im_size_max) > max_size: 134 | im_scale = float(max_size) / float(im_size_max) 135 | 136 | # bboxs, landmarks = detect_face.detect_face(image, self.minsize, self.pnet, self.rnet, self.onet, 137 | # self.threshold, self.factor) 138 | 139 | faces, landmarks = self.detector.detect( 140 | image, self.thresh, scales=[im_scale]) 141 | 142 | for box, ldmarks in zip(faces, landmarks): 143 | # Box: (x1, y1, x2, y2) 144 | diff_height_width = (box[2] - box[0]) - (box[3] - box[1]) 145 | offset = int(abs(diff_height_width / 2)) 146 | box_moved = self.move_box(box, [0, offset]) 147 | facebox = self.get_square_box(box_moved) 148 | ldmarks = ldmarks - (facebox[0], facebox[1]) 149 | faceboxes.append(facebox) 150 | face_ldmarks.append(ldmarks) 151 | 152 | return faceboxes, face_ldmarks 153 | 154 | # if bboxs.shape[0] == 0: 155 | # landmarks_reshape = landmarks 156 | # else: 157 | # landmarks_reshape = landmarks.reshape((-1, 5, 2), order='F') 158 | 159 | # for bbox, ldmarks in zip(bboxs, landmarks_reshape): 160 | # box, score = bbox[0: 4], bbox[4] 161 | # # Move down 162 | # # box coordinate: (x1, y1, x2, y2) 163 | # diff_height_width = (box[2] - box[0]) - (box[3] - box[1]) 164 | # offset = int(abs(diff_height_width / 2)) 165 | # box_moved = self.move_box(box, [0, offset]) 166 | # # Make box square and landmarks alignment 167 | # facebox = self.get_square_box(box_moved) 168 | # ldmarks = ldmarks - (facebox[0], facebox[1]) 169 | # faceboxes.append(facebox) 170 | # face_ldmarks.append(ldmarks) 171 | # scores.append(score) 172 | 173 | # return faceboxes, face_ldmarks, scores 174 | 175 | @staticmethod 176 | def draw_marks(image, marks, color=(255, 0, 255), thick=1): 177 | """Draw mark points on image""" 178 | for idx, mark in enumerate(marks): 179 | cv2.circle(image, (int(mark[0]), int(mark[1])), 180 | thick, color, -1, cv2.LINE_AA) 181 | # Visualization cropped image 182 | # cv2.imshow("image", image) 183 | # cv2.waitKey(0) 184 | -------------------------------------------------------------------------------- /detection/model/mnet.25-0000.params: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JACKYLUO1991/FaceLandmarks/0e7eb636d25ba93e64cc3433a79239be51c883cc/detection/model/mnet.25-0000.params -------------------------------------------------------------------------------- /detection/rcnn/processing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JACKYLUO1991/FaceLandmarks/0e7eb636d25ba93e64cc3433a79239be51c883cc/detection/rcnn/processing/__init__.py -------------------------------------------------------------------------------- /detection/rcnn/processing/bbox_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(boxes, query_boxes): 5 | return bbox_overlaps_py(boxes, query_boxes) 6 | 7 | 8 | def bbox_overlaps_py(boxes, query_boxes): 9 | """ 10 | determine overlaps between boxes and query_boxes 11 | :param boxes: n * 4 bounding boxes 12 | :param query_boxes: k * 4 bounding boxes 13 | :return: overlaps: n * k overlaps 14 | """ 15 | n_ = boxes.shape[0] 16 | k_ = query_boxes.shape[0] 17 | overlaps = np.zeros((n_, k_), dtype=np.float) 18 | for k in range(k_): 19 | query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * \ 20 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 21 | for n in range(n_): 22 | iw = min(boxes[n, 2], query_boxes[k, 2]) - \ 23 | max(boxes[n, 0], query_boxes[k, 0]) + 1 24 | if iw > 0: 25 | ih = min(boxes[n, 3], query_boxes[k, 3]) - \ 26 | max(boxes[n, 1], query_boxes[k, 1]) + 1 27 | if ih > 0: 28 | box_area = (boxes[n, 2] - boxes[n, 0] + 1) * \ 29 | (boxes[n, 3] - boxes[n, 1] + 1) 30 | all_area = float(box_area + query_box_area - iw * ih) 31 | overlaps[n, k] = iw * ih / all_area 32 | return overlaps 33 | 34 | 35 | def clip_boxes(boxes, im_shape): 36 | """ 37 | Clip boxes to image boundaries. 38 | :param boxes: [N, 4* num_classes] 39 | :param im_shape: tuple of 2 40 | :return: [N, 4* num_classes] 41 | """ 42 | # x1 >= 0 43 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 44 | # y1 >= 0 45 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 46 | # x2 < im_shape[1] 47 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 48 | # y2 < im_shape[0] 49 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 50 | return boxes 51 | 52 | 53 | def nonlinear_transform(ex_rois, gt_rois): 54 | """ 55 | compute bounding box regression targets from ex_rois to gt_rois 56 | :param ex_rois: [N, 4] 57 | :param gt_rois: [N, 4] 58 | :return: [N, 4] 59 | """ 60 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 61 | 62 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 63 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 64 | ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) 65 | ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) 66 | 67 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 68 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 69 | gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0) 70 | gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0) 71 | 72 | targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) 73 | targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) 74 | targets_dw = np.log(gt_widths / ex_widths) 75 | targets_dh = np.log(gt_heights / ex_heights) 76 | 77 | if gt_rois.shape[1] <= 4: 78 | targets = np.vstack( 79 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 80 | return targets 81 | else: 82 | targets = [targets_dx, targets_dy, targets_dw, targets_dh] 83 | targets = np.vstack(targets).transpose() 84 | return targets 85 | 86 | 87 | def landmark_transform(ex_rois, gt_rois): 88 | 89 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 90 | 91 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 92 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 93 | ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) 94 | ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) 95 | 96 | targets = [] 97 | for i in range(gt_rois.shape[1]): 98 | for j in range(gt_rois.shape[2]): 99 | if j == 2: 100 | continue 101 | if j == 0: # w 102 | target = (gt_rois[:, i, j] - ex_ctr_x) / (ex_widths + 1e-14) 103 | elif j == 1: # h 104 | target = (gt_rois[:, i, j] - ex_ctr_y) / (ex_heights + 1e-14) 105 | else: # visibile 106 | target = gt_rois[:, i, j] 107 | targets.append(target) 108 | 109 | targets = np.vstack(targets).transpose() 110 | return targets 111 | 112 | 113 | def nonlinear_pred(boxes, box_deltas): 114 | """ 115 | Transform the set of class-agnostic boxes into class-specific boxes 116 | by applying the predicted offsets (box_deltas) 117 | :param boxes: !important [N 4] 118 | :param box_deltas: [N, 4 * num_classes] 119 | :return: [N 4 * num_classes] 120 | """ 121 | if boxes.shape[0] == 0: 122 | return np.zeros((0, box_deltas.shape[1])) 123 | 124 | boxes = boxes.astype(np.float, copy=False) 125 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 126 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 127 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 128 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 129 | 130 | dx = box_deltas[:, 0::4] 131 | dy = box_deltas[:, 1::4] 132 | dw = box_deltas[:, 2::4] 133 | dh = box_deltas[:, 3::4] 134 | 135 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 136 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 137 | pred_w = np.exp(dw) * widths[:, np.newaxis] 138 | pred_h = np.exp(dh) * heights[:, np.newaxis] 139 | 140 | pred_boxes = np.zeros(box_deltas.shape) 141 | # x1 142 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0) 143 | # y1 144 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0) 145 | # x2 146 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0) 147 | # y2 148 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0) 149 | 150 | return pred_boxes 151 | 152 | 153 | def landmark_pred(boxes, landmark_deltas): 154 | if boxes.shape[0] == 0: 155 | return np.zeros((0, landmark_deltas.shape[1])) 156 | boxes = boxes.astype(np.float, copy=False) 157 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 158 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 159 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 160 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 161 | preds = [] 162 | for i in range(landmark_deltas.shape[1]): 163 | if i % 2 == 0: 164 | pred = (landmark_deltas[:, i]*widths + ctr_x) 165 | else: 166 | pred = (landmark_deltas[:, i]*heights + ctr_y) 167 | preds.append(pred) 168 | preds = np.vstack(preds).transpose() 169 | return preds 170 | 171 | 172 | def iou_transform(ex_rois, gt_rois): 173 | """ return bbox targets, IoU loss uses gt_rois as gt """ 174 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 175 | return gt_rois 176 | 177 | 178 | def iou_pred(boxes, box_deltas): 179 | """ 180 | Transform the set of class-agnostic boxes into class-specific boxes 181 | by applying the predicted offsets (box_deltas) 182 | :param boxes: !important [N 4] 183 | :param box_deltas: [N, 4 * num_classes] 184 | :return: [N 4 * num_classes] 185 | """ 186 | if boxes.shape[0] == 0: 187 | return np.zeros((0, box_deltas.shape[1])) 188 | 189 | boxes = boxes.astype(np.float, copy=False) 190 | x1 = boxes[:, 0] 191 | y1 = boxes[:, 1] 192 | x2 = boxes[:, 2] 193 | y2 = boxes[:, 3] 194 | 195 | dx1 = box_deltas[:, 0::4] 196 | dy1 = box_deltas[:, 1::4] 197 | dx2 = box_deltas[:, 2::4] 198 | dy2 = box_deltas[:, 3::4] 199 | 200 | pred_boxes = np.zeros(box_deltas.shape) 201 | # x1 202 | pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis] 203 | # y1 204 | pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis] 205 | # x2 206 | pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis] 207 | # y2 208 | pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis] 209 | 210 | return pred_boxes 211 | 212 | 213 | # define bbox_transform and bbox_pred 214 | bbox_transform = nonlinear_transform 215 | bbox_pred = nonlinear_pred 216 | -------------------------------------------------------------------------------- /detection/rcnn/processing/generate_anchor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate base anchors on index 0 3 | """ 4 | from __future__ import print_function 5 | 6 | import sys 7 | import numpy as np 8 | 9 | 10 | def anchors_plane(feat_h, feat_w, stride, base_anchor): 11 | return anchors_py(feat_h, feat_w, stride, base_anchor) 12 | 13 | 14 | def anchors_py(height, width, stride, base_anchors): 15 | """ 16 | Parameters 17 | ---------- 18 | height: height of plane 19 | width: width of plane 20 | stride: stride ot the original image 21 | anchors_base: (A, 4) a base set of anchors 22 | Returns 23 | ------- 24 | all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane 25 | """ 26 | A = base_anchors.shape[0] 27 | all_anchors = np.zeros((height, width, A, 4), dtype=np.float32) 28 | 29 | for iw in range(width): 30 | sw = iw * stride 31 | for ih in range(height): 32 | sh = ih * stride 33 | for k in range(A): 34 | all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw 35 | all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh 36 | all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw 37 | all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh 38 | return all_anchors 39 | 40 | 41 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 42 | scales=2 ** np.arange(3, 6), stride=16, dense_anchor=False): 43 | """ 44 | Generate anchor (reference) windows by enumerating aspect ratios X 45 | scales wrt a reference (0, 0, 15, 15) window. 46 | """ 47 | 48 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 49 | ratio_anchors = _ratio_enum(base_anchor, ratios) 50 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 51 | for i in range(ratio_anchors.shape[0])]) 52 | if dense_anchor: 53 | assert stride % 2 == 0 54 | anchors2 = anchors.copy() 55 | anchors2[:, :] += int(stride/2) 56 | anchors = np.vstack((anchors, anchors2)) 57 | return anchors 58 | 59 | 60 | def generate_anchors_fpn(dense_anchor=False, cfg=None): 61 | """ 62 | Generate anchor (reference) windows by enumerating aspect ratios X 63 | scales wrt a reference (0, 0, 15, 15) window. 64 | """ 65 | if cfg is None: 66 | from ..config import config 67 | cfg = config.RPN_ANCHOR_CFG 68 | RPN_FEAT_STRIDE = [] 69 | for k in cfg: 70 | RPN_FEAT_STRIDE.append(int(k)) 71 | RPN_FEAT_STRIDE = sorted(RPN_FEAT_STRIDE, reverse=True) 72 | anchors = [] 73 | for k in RPN_FEAT_STRIDE: 74 | v = cfg[str(k)] 75 | bs = v['BASE_SIZE'] 76 | __ratios = np.array(v['RATIOS']) 77 | __scales = np.array(v['SCALES']) 78 | stride = int(k) 79 | r = generate_anchors(bs, __ratios, __scales, stride, dense_anchor) 80 | anchors.append(r) 81 | 82 | return anchors 83 | 84 | 85 | def _whctrs(anchor): 86 | """ 87 | Return width, height, x center, and y center for an anchor (window). 88 | """ 89 | 90 | w = anchor[2] - anchor[0] + 1 91 | h = anchor[3] - anchor[1] + 1 92 | x_ctr = anchor[0] + 0.5 * (w - 1) 93 | y_ctr = anchor[1] + 0.5 * (h - 1) 94 | return w, h, x_ctr, y_ctr 95 | 96 | 97 | def _mkanchors(ws, hs, x_ctr, y_ctr): 98 | """ 99 | Given a vector of widths (ws) and heights (hs) around a center 100 | (x_ctr, y_ctr), output a set of anchors (windows). 101 | """ 102 | 103 | ws = ws[:, np.newaxis] 104 | hs = hs[:, np.newaxis] 105 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 106 | y_ctr - 0.5 * (hs - 1), 107 | x_ctr + 0.5 * (ws - 1), 108 | y_ctr + 0.5 * (hs - 1))) 109 | return anchors 110 | 111 | 112 | def _ratio_enum(anchor, ratios): 113 | """ 114 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 115 | """ 116 | 117 | w, h, x_ctr, y_ctr = _whctrs(anchor) 118 | size = w * h 119 | size_ratios = size / ratios 120 | ws = np.round(np.sqrt(size_ratios)) 121 | hs = np.round(ws * ratios) 122 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 123 | return anchors 124 | 125 | 126 | def _scale_enum(anchor, scales): 127 | """ 128 | Enumerate a set of anchors for each scale wrt an anchor. 129 | """ 130 | 131 | w, h, x_ctr, y_ctr = _whctrs(anchor) 132 | ws = w * scales 133 | hs = h * scales 134 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 135 | return anchors 136 | -------------------------------------------------------------------------------- /detection/rcnn/processing/nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | try: 3 | from ..cython.cpu_nms import cpu_nms 4 | except ImportError: 5 | cpu_nms = None 6 | try: 7 | from ..cython.gpu_nms import gpu_nms 8 | except ImportError: 9 | gpu_nms = None 10 | 11 | 12 | def py_nms_wrapper(thresh): 13 | def _nms(dets): 14 | return nms(dets, thresh) 15 | return _nms 16 | 17 | 18 | def cpu_nms_wrapper(thresh): 19 | def _nms(dets): 20 | return cpu_nms(dets, thresh) 21 | if cpu_nms is not None: 22 | return _nms 23 | else: 24 | return py_nms_wrapper(thresh) 25 | 26 | 27 | def gpu_nms_wrapper(thresh, device_id): 28 | def _nms(dets): 29 | return gpu_nms(dets, thresh, device_id) 30 | if gpu_nms is not None: 31 | return _nms 32 | elif cpu_nms is not None: 33 | return cpu_nms_wrapper(thresh) 34 | else: 35 | return py_nms_wrapper(thresh) 36 | 37 | 38 | def nms(dets, thresh): 39 | """ 40 | greedily select boxes with high confidence and overlap with current maximum <= thresh 41 | rule out overlap >= thresh 42 | :param dets: [[x1, y1, x2, y2 score]] 43 | :param thresh: retain overlap < thresh 44 | :return: indexes to keep 45 | """ 46 | x1 = dets[:, 0] 47 | y1 = dets[:, 1] 48 | x2 = dets[:, 2] 49 | y2 = dets[:, 3] 50 | scores = dets[:, 4] 51 | 52 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 53 | order = scores.argsort()[::-1] 54 | 55 | keep = [] 56 | while order.size > 0: 57 | i = order[0] 58 | keep.append(i) 59 | xx1 = np.maximum(x1[i], x1[order[1:]]) 60 | yy1 = np.maximum(y1[i], y1[order[1:]]) 61 | xx2 = np.minimum(x2[i], x2[order[1:]]) 62 | yy2 = np.minimum(y2[i], y2[order[1:]]) 63 | 64 | w = np.maximum(0.0, xx2 - xx1 + 1) 65 | h = np.maximum(0.0, yy2 - yy1 + 1) 66 | inter = w * h 67 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 68 | 69 | inds = np.where(ovr <= thresh)[0] 70 | order = order[inds + 1] 71 | 72 | return keep 73 | -------------------------------------------------------------------------------- /detection/retinaface.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | import os 5 | import numpy as np 6 | import mxnet as mx 7 | from mxnet import ndarray as nd 8 | import cv2 9 | from .rcnn.processing.bbox_transform import clip_boxes 10 | from .rcnn.processing.generate_anchor import generate_anchors_fpn, anchors_plane 11 | from .rcnn.processing.nms import gpu_nms_wrapper, cpu_nms_wrapper 12 | 13 | 14 | class RetinaFace: 15 | 16 | def __init__(self, prefix, epoch, ctx_id=0, network='net3', nms=0.4, nocrop=False, decay4=0.5, vote=False): 17 | 18 | self.ctx_id = ctx_id 19 | self.network = network 20 | self.decay4 = decay4 21 | self.nms_threshold = nms 22 | self.vote = vote 23 | self.nocrop = nocrop 24 | self.fpn_keys = [] 25 | self.anchor_cfg = None 26 | pixel_means=[0.0, 0.0, 0.0] 27 | pixel_stds=[1.0, 1.0, 1.0] 28 | pixel_scale = 1.0 29 | self.preprocess = False 30 | _ratio = (1.,) 31 | fmc = 3 32 | 33 | if network=='ssh' or network=='vgg': 34 | pixel_means=[103.939, 116.779, 123.68] 35 | self.preprocess = True 36 | elif network=='net3': 37 | _ratio = (1.,) 38 | elif network=='net3a': 39 | _ratio = (1., 1.5) 40 | elif network=='net6': # like pyramidbox or s3fd 41 | fmc = 6 42 | elif network=='net5': # retinaface 43 | fmc = 5 44 | elif network=='net5a': 45 | fmc = 5 46 | _ratio = (1., 1.5) 47 | elif network=='net4': 48 | fmc = 4 49 | elif network=='net4a': 50 | fmc = 4 51 | _ratio = (1., 1.5) 52 | else: 53 | assert False, 'network setting error %s'%network 54 | 55 | if fmc==3: 56 | self._feat_stride_fpn = [32, 16, 8] 57 | self.anchor_cfg = { 58 | '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 59 | '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 60 | '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 61 | } 62 | elif fmc==4: 63 | self._feat_stride_fpn = [32, 16, 8, 4] 64 | self.anchor_cfg = { 65 | '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 66 | '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 67 | '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 68 | '4': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 69 | } 70 | elif fmc==6: 71 | self._feat_stride_fpn = [128, 64, 32, 16, 8, 4] 72 | self.anchor_cfg = { 73 | '128': {'SCALES': (32,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 74 | '64': {'SCALES': (16,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 75 | '32': {'SCALES': (8,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 76 | '16': {'SCALES': (4,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 77 | '8': {'SCALES': (2,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 78 | '4': {'SCALES': (1,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, 79 | } 80 | elif fmc==5: 81 | self._feat_stride_fpn = [64, 32, 16, 8, 4] 82 | self.anchor_cfg = {} 83 | _ass = 2.0**(1.0/3) 84 | _basescale = 1.0 85 | for _stride in [4, 8, 16, 32, 64]: 86 | key = str(_stride) 87 | value = {'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999} 88 | scales = [] 89 | for _ in range(3): 90 | scales.append(_basescale) 91 | _basescale *= _ass 92 | value['SCALES'] = tuple(scales) 93 | self.anchor_cfg[key] = value 94 | 95 | for s in self._feat_stride_fpn: 96 | self.fpn_keys.append('stride%s'%s) 97 | 98 | dense_anchor = False 99 | self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(dense_anchor=dense_anchor, cfg=self.anchor_cfg))) 100 | for k in self._anchors_fpn: 101 | v = self._anchors_fpn[k].astype(np.float32) 102 | self._anchors_fpn[k] = v 103 | 104 | self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) 105 | sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) 106 | 107 | # Whether use GPU or CPU... 108 | if self.ctx_id>=0: 109 | self.ctx = mx.gpu(self.ctx_id) 110 | self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) 111 | else: 112 | self.ctx = mx.cpu() 113 | self.nms = cpu_nms_wrapper(self.nms_threshold) 114 | self.pixel_means = np.array(pixel_means, dtype=np.float32) 115 | self.pixel_stds = np.array(pixel_stds, dtype=np.float32) 116 | self.pixel_scale = float(pixel_scale) 117 | self.use_landmarks = False 118 | 119 | if len(sym) // len(self._feat_stride_fpn) == 3: 120 | self.use_landmarks = True 121 | 122 | image_size = (640, 640) 123 | self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) 124 | self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) 125 | self.model.set_params(arg_params, aux_params) 126 | 127 | def get_input(self, img): 128 | im = img.astype(np.float32) 129 | im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) 130 | for i in range(3): 131 | im_tensor[0, i, :, :] = (im[:, :, 2 - i] / self.pixel_scale - self.pixel_means[2 - i]) / self.pixel_stds[2-i] 132 | data = nd.array(im_tensor) 133 | 134 | return data 135 | 136 | def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False): 137 | proposals_list = [] 138 | scores_list = [] 139 | landmarks_list = [] 140 | flips = [0] 141 | if do_flip: 142 | flips = [0, 1] 143 | 144 | for im_scale in scales: 145 | for flip in flips: 146 | if im_scale!=1.0: 147 | im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) 148 | else: 149 | im = img.copy() 150 | if flip: 151 | im = im[:,::-1,:] 152 | if self.nocrop: 153 | if im.shape[0] % 32 == 0: 154 | h = im.shape[0] 155 | else: 156 | h = (im.shape[0] // 32 + 1) * 32 157 | if im.shape[1] % 32 == 0: 158 | w = im.shape[1] 159 | else: 160 | w = (im.shape[1] // 32 + 1) * 32 161 | _im = np.zeros( (h, w, 3), dtype=np.float32 ) 162 | _im[0:im.shape[0], 0:im.shape[1], :] = im 163 | im = _im 164 | else: 165 | im = im.astype(np.float32) 166 | 167 | im_info = [im.shape[0], im.shape[1]] 168 | im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) 169 | for i in range(3): 170 | im_tensor[0, i, :, :] = (im[:, :, 2 - i]/self.pixel_scale - self.pixel_means[2 - i])/self.pixel_stds[2-i] 171 | data = nd.array(im_tensor) 172 | db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)]) 173 | self.model.forward(db, is_train=False) 174 | net_out = self.model.get_outputs() 175 | 176 | for _idx,s in enumerate(self._feat_stride_fpn): 177 | _key = 'stride%s'%s 178 | stride = int(s) 179 | if self.use_landmarks: 180 | idx = _idx*3 181 | else: 182 | idx = _idx*2 183 | scores = net_out[idx].asnumpy() 184 | scores = scores[:, self._num_anchors['stride%s'%s]:, :, :] 185 | 186 | idx+=1 187 | bbox_deltas = net_out[idx].asnumpy() 188 | 189 | height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] 190 | 191 | A = self._num_anchors['stride%s'%s] 192 | K = height * width 193 | anchors_fpn = self._anchors_fpn['stride%s'%s] 194 | anchors = anchors_plane(height, width, stride, anchors_fpn) 195 | anchors = anchors.reshape((K * A, 4)) 196 | 197 | scores = self._clip_pad(scores, (height, width)) 198 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) 199 | bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) 200 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) 201 | bbox_pred_len = bbox_deltas.shape[3]//A 202 | bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) 203 | 204 | proposals = self.bbox_pred(anchors, bbox_deltas) 205 | proposals = clip_boxes(proposals, im_info[:2]) 206 | 207 | scores_ravel = scores.ravel() 208 | order = np.where(scores_ravel>=threshold)[0] 209 | 210 | proposals = proposals[order, :] 211 | scores = scores[order] 212 | if stride==4 and self.decay4<1.0: 213 | scores *= self.decay4 214 | 215 | if flip: 216 | oldx1 = proposals[:, 0].copy() 217 | oldx2 = proposals[:, 2].copy() 218 | proposals[:, 0] = im.shape[1] - oldx2 - 1 219 | proposals[:, 2] = im.shape[1] - oldx1 - 1 220 | 221 | proposals[:, 0:4] /= im_scale 222 | proposals_list.append(proposals) 223 | scores_list.append(scores) 224 | 225 | if not self.vote and self.use_landmarks: 226 | idx += 1 227 | landmark_deltas = net_out[idx].asnumpy() 228 | landmark_deltas = self._clip_pad(landmark_deltas, (height, width)) 229 | landmark_pred_len = landmark_deltas.shape[1]//A 230 | landmark_deltas = landmark_deltas.transpose((0, 2, 3, 1)).reshape((-1, 5, landmark_pred_len//5)) 231 | landmarks = self.landmark_pred(anchors, landmark_deltas) 232 | landmarks = landmarks[order, :] 233 | 234 | if flip: 235 | landmarks[:,:,0] = im.shape[1] - landmarks[:,:,0] - 1 236 | order = [1, 0, 2, 4, 3] 237 | flandmarks = landmarks.copy() 238 | for idx, a in enumerate(order): 239 | flandmarks[:,idx,:] = landmarks[:,a,:] 240 | landmarks = flandmarks 241 | landmarks[:,:,0:2] /= im_scale 242 | 243 | landmarks_list.append(landmarks) 244 | 245 | proposals = np.vstack(proposals_list) 246 | landmarks = None 247 | if proposals.shape[0]==0: 248 | if self.use_landmarks: 249 | landmarks = np.zeros( (0,5,2) ) 250 | return np.zeros( (0,5) ), landmarks 251 | scores = np.vstack(scores_list) 252 | scores_ravel = scores.ravel() 253 | order = scores_ravel.argsort()[::-1] 254 | proposals = proposals[order, :] 255 | scores = scores[order] 256 | if not self.vote and self.use_landmarks: 257 | landmarks = np.vstack(landmarks_list) 258 | landmarks = landmarks[order].astype(np.float32, copy=False) 259 | 260 | pre_det = np.hstack((proposals[:,0:4], scores)).astype(np.float32, copy=False) 261 | if not self.vote: 262 | keep = self.nms(pre_det) 263 | det = np.hstack( (pre_det, proposals[:,4:]) ) 264 | det = det[keep, :] 265 | if self.use_landmarks: 266 | landmarks = landmarks[keep] 267 | else: 268 | det = np.hstack( (pre_det, proposals[:,4:]) ) 269 | det = self.bbox_vote(det) 270 | 271 | return det, landmarks 272 | 273 | def detect_center(self, img, threshold=0.5, scales=[1.0], do_flip=False): 274 | det, landmarks = self.detect(img, threshold, scales, do_flip) 275 | if det.shape[0]==0: 276 | return None, None 277 | bindex = 0 278 | if det.shape[0]>1: 279 | img_size = np.asarray(img.shape)[0:2] 280 | bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) 281 | img_center = img_size / 2 282 | offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) 283 | offset_dist_squared = np.sum(np.power(offsets,2.0),0) 284 | bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering 285 | bbox = det[bindex,:] 286 | landmark = landmarks[bindex, :, :] 287 | 288 | return bbox, landmark 289 | 290 | @staticmethod 291 | def check_large_pose(landmark, bbox): 292 | assert landmark.shape==(5,2) 293 | assert len(bbox)==4 294 | def get_theta(base, x, y): 295 | vx = x-base 296 | vy = y-base 297 | vx[1] *= -1 298 | vy[1] *= -1 299 | tx = np.arctan2(vx[1], vx[0]) 300 | ty = np.arctan2(vy[1], vy[0]) 301 | d = ty-tx 302 | d = np.degrees(d) 303 | if d<-180.0: 304 | d+=360. 305 | elif d>180.0: 306 | d-=360.0 307 | return d 308 | landmark = landmark.astype(np.float32) 309 | 310 | theta1 = get_theta(landmark[0], landmark[3], landmark[2]) 311 | theta2 = get_theta(landmark[1], landmark[2], landmark[4]) 312 | theta3 = get_theta(landmark[0], landmark[2], landmark[1]) 313 | theta4 = get_theta(landmark[1], landmark[0], landmark[2]) 314 | theta5 = get_theta(landmark[3], landmark[4], landmark[2]) 315 | theta6 = get_theta(landmark[4], landmark[2], landmark[3]) 316 | theta7 = get_theta(landmark[3], landmark[2], landmark[0]) 317 | theta8 = get_theta(landmark[4], landmark[1], landmark[2]) 318 | left_score = 0.0 319 | right_score = 0.0 320 | up_score = 0.0 321 | down_score = 0.0 322 | if theta1<=0.0: 323 | left_score = 10.0 324 | elif theta2<=0.0: 325 | right_score = 10.0 326 | else: 327 | left_score = theta2/theta1 328 | right_score = theta1/theta2 329 | if theta3<=10.0 or theta4<=10.0: 330 | up_score = 10.0 331 | else: 332 | up_score = max(theta1/theta3, theta2/theta4) 333 | if theta5<=10.0 or theta6<=10.0: 334 | down_score = 10.0 335 | else: 336 | down_score = max(theta7/theta5, theta8/theta6) 337 | mleft = (landmark[0][0]+landmark[3][0])/2 338 | mright = (landmark[1][0]+landmark[4][0])/2 339 | box_center = ( (bbox[0]+bbox[2])/2, (bbox[1]+bbox[3])/2 ) 340 | ret = 0 341 | if left_score>=3.0: 342 | ret = 1 343 | if ret==0 and left_score>=2.0: 344 | if mright<=box_center[0]: 345 | ret = 1 346 | if ret==0 and right_score>=3.0: 347 | ret = 2 348 | if ret==0 and right_score>=2.0: 349 | if mleft>=box_center[0]: 350 | ret = 2 351 | if ret==0 and up_score>=2.0: 352 | ret = 3 353 | if ret==0 and down_score>=5.0: 354 | ret = 4 355 | return ret, left_score, right_score, up_score, down_score 356 | 357 | @staticmethod 358 | def _filter_boxes(boxes, min_size): 359 | """ Remove all boxes with any side smaller than min_size """ 360 | ws = boxes[:, 2] - boxes[:, 0] + 1 361 | hs = boxes[:, 3] - boxes[:, 1] + 1 362 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 363 | return keep 364 | 365 | @staticmethod 366 | def _filter_boxes2(boxes, max_size, min_size): 367 | """ Remove all boxes with any side smaller than min_size """ 368 | ws = boxes[:, 2] - boxes[:, 0] + 1 369 | hs = boxes[:, 3] - boxes[:, 1] + 1 370 | if max_size>0: 371 | keep = np.where( np.minimum(ws, hs)0: 373 | keep = np.where( np.maximum(ws, hs)>min_size )[0] 374 | return keep 375 | 376 | @staticmethod 377 | def _clip_pad(tensor, pad_shape): 378 | """ 379 | Clip boxes of the pad area. 380 | :param tensor: [n, c, H, W] 381 | :param pad_shape: [h, w] 382 | :return: [n, c, h, w] 383 | """ 384 | H, W = tensor.shape[2:] 385 | h, w = pad_shape 386 | 387 | if h < H or w < W: 388 | tensor = tensor[:, :, :h, :w].copy() 389 | 390 | return tensor 391 | 392 | @staticmethod 393 | def bbox_pred(boxes, box_deltas): 394 | """ 395 | Transform the set of class-agnostic boxes into class-specific boxes 396 | by applying the predicted offsets (box_deltas) 397 | :param boxes: !important [N 4] 398 | :param box_deltas: [N, 4 * num_classes] 399 | :return: [N 4 * num_classes] 400 | """ 401 | if boxes.shape[0] == 0: 402 | return np.zeros((0, box_deltas.shape[1])) 403 | 404 | boxes = boxes.astype(np.float, copy=False) 405 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 406 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 407 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 408 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 409 | 410 | dx = box_deltas[:, 0:1] 411 | dy = box_deltas[:, 1:2] 412 | dw = box_deltas[:, 2:3] 413 | dh = box_deltas[:, 3:4] 414 | 415 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 416 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 417 | pred_w = np.exp(dw) * widths[:, np.newaxis] 418 | pred_h = np.exp(dh) * heights[:, np.newaxis] 419 | 420 | pred_boxes = np.zeros(box_deltas.shape) 421 | # x1 422 | pred_boxes[:, 0:1] = pred_ctr_x - 0.5 * (pred_w - 1.0) 423 | # y1 424 | pred_boxes[:, 1:2] = pred_ctr_y - 0.5 * (pred_h - 1.0) 425 | # x2 426 | pred_boxes[:, 2:3] = pred_ctr_x + 0.5 * (pred_w - 1.0) 427 | # y2 428 | pred_boxes[:, 3:4] = pred_ctr_y + 0.5 * (pred_h - 1.0) 429 | 430 | if box_deltas.shape[1]>4: 431 | pred_boxes[:,4:] = box_deltas[:,4:] 432 | 433 | return pred_boxes 434 | 435 | @staticmethod 436 | def landmark_pred(boxes, landmark_deltas): 437 | if boxes.shape[0] == 0: 438 | return np.zeros((0, landmark_deltas.shape[1])) 439 | boxes = boxes.astype(np.float, copy=False) 440 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 441 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 442 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 443 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 444 | pred = landmark_deltas.copy() 445 | for i in range(5): 446 | pred[:,i,0] = landmark_deltas[:,i,0]*widths + ctr_x 447 | pred[:,i,1] = landmark_deltas[:,i,1]*heights + ctr_y 448 | return pred 449 | 450 | def bbox_vote(self, det): 451 | if det.shape[0] == 0: 452 | dets = np.array([[10, 10, 20, 20, 0.002]]) 453 | det = np.empty(shape=[0, 5]) 454 | while det.shape[0] > 0: 455 | # IOU 456 | area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) 457 | xx1 = np.maximum(det[0, 0], det[:, 0]) 458 | yy1 = np.maximum(det[0, 1], det[:, 1]) 459 | xx2 = np.minimum(det[0, 2], det[:, 2]) 460 | yy2 = np.minimum(det[0, 3], det[:, 3]) 461 | w = np.maximum(0.0, xx2 - xx1 + 1) 462 | h = np.maximum(0.0, yy2 - yy1 + 1) 463 | inter = w * h 464 | o = inter / (area[0] + area[:] - inter) 465 | 466 | # nms 467 | merge_index = np.where(o >= self.nms_threshold)[0] 468 | det_accu = det[merge_index, :] 469 | det = np.delete(det, merge_index, 0) 470 | if merge_index.shape[0] <= 1: 471 | if det.shape[0] == 0: 472 | try: 473 | dets = np.row_stack((dets, det_accu)) 474 | except: 475 | dets = det_accu 476 | continue 477 | det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) 478 | max_score = np.max(det_accu[:, 4]) 479 | det_accu_sum = np.zeros((1, 5)) 480 | det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], 481 | axis=0) / np.sum(det_accu[:, -1:]) 482 | det_accu_sum[:, 4] = max_score 483 | try: 484 | dets = np.row_stack((dets, det_accu_sum)) 485 | except: 486 | dets = det_accu_sum 487 | dets = dets[0:750, :] 488 | 489 | return dets 490 | 491 | -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | import tensorflow as tf 3 | import math 4 | 5 | N_LANDMARK = 106 6 | 7 | 8 | def normalized_mean_error(y_true, y_pred): 9 | ''' 10 | normalised mean error 11 | ''' 12 | y_pred = K.reshape(y_pred, (-1, N_LANDMARK, 2)) 13 | y_true = K.reshape(y_true, (-1, N_LANDMARK, 2)) 14 | # Distance between pupils 15 | interocular_distance = K.sqrt( 16 | K.sum((y_true[:, 38, :] - y_true[:, 92, :]) ** 2, axis=-1)) 17 | return K.mean(K.sum(K.sqrt(K.sum((y_pred - y_true) ** 2, axis=-1)), axis=-1)) / \ 18 | K.mean((interocular_distance * N_LANDMARK)) 19 | 20 | 21 | # def wing_loss(y_true, y_pred, w=10.0, epsilon=2.0): 22 | # """ 23 | # Reference: wing loss for robust facial landmark localisation 24 | # with convolutional neural networks 25 | # """ 26 | # x = y_true - y_pred 27 | # c = w * (1.0 - math.log(1.0 + w/epsilon)) 28 | # absolute_x = K.abs(x) 29 | # losses = tf.where( 30 | # K.greater(w, absolute_x), 31 | # w * K.log(1.0 + absolute_x/epsilon), 32 | # absolute_x - c 33 | # ) 34 | # loss = K.mean(K.sum(losses, axis=-1), axis=0) 35 | 36 | # return loss 37 | 38 | def wing_loss(y_true, y_pred, w=10.0, epsilon=2.0): 39 | """ 40 | Arguments: 41 | landmarks, labels: float tensors with shape [batch_size, num_landmarks, 2]. 42 | w, epsilon: a float numbers. 43 | Returns: 44 | a float tensor with shape []. 45 | """ 46 | y_true = tf.reshape(y_true, [-1, N_LANDMARK, 2]) 47 | y_pred = tf.reshape(y_pred, [-1, N_LANDMARK, 2]) 48 | 49 | x = y_true - y_pred 50 | c = w * (1.0 - math.log(1.0 + w / epsilon)) 51 | absolute_x = tf.abs(x) 52 | losses = tf.where( 53 | tf.greater(w, absolute_x), 54 | w * tf.log(1.0 + absolute_x/epsilon), 55 | absolute_x - c 56 | ) 57 | loss = tf.reduce_mean(tf.reduce_sum(losses, axis=[1, 2]), axis=0) 58 | 59 | return loss 60 | 61 | 62 | def smoothL1(y_true, y_pred): 63 | """ 64 | More robust to noise 65 | """ 66 | THRESHOLD = K.variable(1.0) 67 | mae = K.abs(y_true - y_pred) 68 | flag = K.greater(mae, THRESHOLD) 69 | loss = K.mean(K.switch(flag, (mae - 0.5), K.pow(mae, 2)), axis=-1) 70 | 71 | return loss 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | # # PFLD: A Practical Facial Landmark Detector 2 | 3 | # import sys 4 | # import time 5 | 6 | # from keras.models import Model 7 | # from keras.layers import * 8 | # from keras import backend as K 9 | # from keras.utils.vis_utils import plot_model 10 | # from keras.utils import vis_utils 11 | 12 | 13 | # def _conv_block(inputs, filters, kernel, strides, dilation_rate=1, padding='same'): 14 | # """Convolution Block 15 | # This function defines a 2D convolution operation with BN and relu6. 16 | # # Arguments 17 | # inputs: Tensor, input tensor of conv layer. 18 | # filters: Integer, the dimensionality of the output space. 19 | # kernel: An integer or tuple/list of 2 integers, specifying the 20 | # width and height of the 2D convolution window. 21 | # strides: An integer or tuple/list of 2 integers, 22 | # specifying the strides of the convolution along the width and height. 23 | # Can be a single integer to specify the same value for 24 | # all spatial dimensions. 25 | # # Returns 26 | # Output tensor. 27 | # """ 28 | # channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 29 | # x = Conv2D(filters, kernel, padding=padding, strides=strides, 30 | # dilation_rate=dilation_rate)(inputs) 31 | # x = BatchNormalization(axis=channel_axis)(x) 32 | 33 | # return Activation('relu')(x) 34 | 35 | 36 | # def _depthwise_block(inputs, kernel, strides, padding='same'): 37 | # '''Depthwise separable 2D convolution block''' 38 | 39 | # assert isinstance(kernel, (tuple, int)) 40 | # assert isinstance(strides, (tuple, int)) 41 | 42 | # channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 43 | # x = DepthwiseConv2D(kernel_size=kernel, strides=strides, 44 | # depth_multiplier=1, padding=padding)(inputs) 45 | # x = BatchNormalization(axis=channel_axis)(x) 46 | 47 | # return Activation('relu')(x) 48 | 49 | 50 | # def _bottleneck(inputs, filters, kernel, t, s, alpha, r=False): 51 | # """Bottleneck 52 | # This function defines a basic bottleneck structure. 53 | # # Arguments 54 | # inputs: Tensor, input tensor of conv layer. 55 | # filters: Integer, the dimensionality of the output space. 56 | # kernel: An integer or tuple/list of 2 integers, specifying the 57 | # width and height of the 2D convolution window. 58 | # t: Integer, expansion factor. 59 | # t is always applied to the input size. 60 | # s: An integer or tuple/list of 2 integers,specifying the strides 61 | # of the convolution along the width and height.Can be a single 62 | # integer to specify the same value for all spatial dimensions. 63 | # r: Boolean, Whether to use the residuals. 64 | # # Returns 65 | # Output tensor. 66 | # """ 67 | 68 | # channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 69 | # tchannel = K.int_shape(inputs)[channel_axis] * t 70 | # filters = _make_divisible(filters * alpha) 71 | 72 | # x = _conv_block(inputs, tchannel, (1, 1), (1, 1)) 73 | 74 | # x = DepthwiseConv2D(kernel, strides=( 75 | # s, s), depth_multiplier=1, padding='same')(x) 76 | # x = BatchNormalization(axis=channel_axis)(x) 77 | # x = ReLU(max_value=6)(x) 78 | 79 | # x = Conv2D(filters, (1, 1), strides=(1, 1), padding='same')(x) 80 | # x = BatchNormalization(axis=channel_axis)(x) 81 | 82 | # if r: 83 | # x = add([x, inputs]) 84 | # return x 85 | 86 | 87 | # def _inverted_residual_block(inputs, filters, kernel, t, strides, n, alpha=1): 88 | # """Inverted Residual Block 89 | # This function defines a sequence of 1 or more identical layers. 90 | # # Arguments 91 | # inputs: Tensor, input tensor of conv layer. 92 | # filters: Integer, the dimensionality of the output space. 93 | # kernel: An integer or tuple/list of 2 integers, specifying the 94 | # width and height of the 2D convolution window. 95 | # t: Integer, expansion factor. 96 | # t is always applied to the input size. 97 | # s: An integer or tuple/list of 2 integers,specifying the strides 98 | # of the convolution along the width and height.Can be a single 99 | # integer to specify the same value for all spatial dimensions. 100 | # n: Integer, layer repeat times. 101 | # # Returns 102 | # Output tensor. 103 | # """ 104 | # x = _bottleneck(inputs, filters, kernel, t, strides, alpha=alpha) 105 | 106 | # for i in range(1, n): 107 | # x = _bottleneck(x, filters, kernel, t, 1, alpha=alpha, r=True) 108 | 109 | # return x 110 | 111 | # # https://github.com/titu1994/MobileNetworks/blob/master/mobilenets.py 112 | 113 | 114 | # def _make_divisible(v, divisor=8, min_value=8): 115 | # if min_value is None: 116 | # min_value = divisor 117 | 118 | # new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 119 | # # Make sure that round down does not go down by more than 10%. 120 | # if new_v < 0.9 * v: 121 | # new_v += divisor 122 | # return new_v 123 | 124 | 125 | # def PFLDNetBackbone(input_shape, output_nodes, alpha=1): 126 | # """ 127 | # This function defines a PFLDNet architectures. 128 | # # Arguments 129 | # input_shape: An integer or tuple/list of 3 integers, shape 130 | # of input tensor. 131 | # output_nodes: Integer, number of classes. 132 | # alpha: width parameter. 133 | # # Returns 134 | # PFLDNet model. 135 | # """ 136 | 137 | # inputs = Input(shape=input_shape) 138 | # # https://mp.weixin.qq.com/s/0oMqwQn2UlYYk557sbPBsQ 139 | # x = ZeroPadding2D(padding=(1, 1))(inputs) 140 | # x = _conv_block(x, 64, (3, 3), strides=1, dilation_rate=2) 141 | # x = _depthwise_block(x, (3, 3), strides=2) 142 | # s1_b = _inverted_residual_block( 143 | # x, 64, (3, 3), t=2, strides=2, n=5, alpha=alpha) 144 | # x = _inverted_residual_block( 145 | # s1_b, 128, (3, 3), t=2, strides=2, n=1, alpha=alpha) 146 | # x = _inverted_residual_block( 147 | # x, 128, (3, 3), t=4, strides=1, n=6, alpha=alpha) 148 | # s1 = _inverted_residual_block( 149 | # x, 256, (3, 3), t=2, strides=1, n=1, alpha=alpha) 150 | # s2 = _conv_block(s1, 256, (3, 3), strides=1, dilation_rate=2) 151 | # s3 = _conv_block(s2, 256, (3, 3), strides=1, dilation_rate=2) 152 | 153 | # # 106 Landmarks branch 154 | # # t1_g = Flatten()(s1) 155 | # # t2_g = Flatten()(s2) 156 | # # t3_g = Flatten()(s3) 157 | # # t1_212 = Dense(units=output_nodes, name='b1_s1')(t1_g) 158 | # # t2_212 = Dense(units=output_nodes, name='b1_s2')(t1_g) 159 | # # t3_212 = Dense(units=output_nodes, name='b1_s3')(t1_g) 160 | # # t1_out = Add(name='b1_s')([t1_212, t2_212, t3_212]) 161 | # t1_g = GlobalAveragePooling2D()(s1) 162 | # t2_g = GlobalAveragePooling2D()(s2) 163 | # t3_g = GlobalAveragePooling2D()(s3) 164 | # concat = Concatenate()([t1_g, t2_g, t3_g]) 165 | # t1_out = Dense(units=output_nodes, name='b1_s')(concat) 166 | 167 | # # Pose branch 168 | # v1 = _conv_block(s1_b, 128, (3, 3), strides=2) 169 | # v2 = _conv_block(v1, 128, (3, 3), strides=1) 170 | # v3 = _conv_block(v2, 32, (3, 3), strides=2) 171 | # v4 = _conv_block(v3, 128, (7, 7), strides=1, padding='valid') 172 | # t2_out = Dense(units=3, name='b2_s')(Flatten()(v4)) 173 | 174 | # # TODO angle... 175 | 176 | # # Merge branch 177 | # model = Model(inputs, [t1_out, t2_out]) 178 | 179 | # return model 180 | 181 | 182 | # if __name__ == '__main__': 183 | 184 | # # Testing designed network 185 | # model = PFLDNetBackbone((112, 112, 3), 212, alpha=1.0) 186 | # vis = True 187 | 188 | # if vis: 189 | # model.summary() 190 | # # plot_model(model, to_file='PFLDNet.png', show_shapes=True) 191 | 192 | # # inputs = np.random.randn(1, 112, 112, 3) 193 | 194 | # # for i in range(100): 195 | # # start = time.time() 196 | # # model.predict(inputs, batch_size=1) 197 | # # print("[info] time use {}".format(time.time() - start)) 198 | 199 | 200 | """MobileNet v3 small models for Keras. 201 | # Reference 202 | [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs) 203 | """ 204 | from keras import backend as K 205 | from keras.layers import * 206 | from keras.models import Model 207 | from keras.utils.generic_utils import get_custom_objects 208 | 209 | 210 | def hard_swish(x): 211 | return x * K.relu(x + 3.0, max_value=6.0) / 6.0 212 | 213 | 214 | def relu6(x): 215 | return K.relu(x, max_value=6.0) 216 | 217 | # Custom activation function 218 | get_custom_objects().update({'hard_swish': Activation(hard_swish)}) 219 | get_custom_objects().update({'relu6': Activation(relu6)}) 220 | 221 | 222 | class MobileNetBase: 223 | def __init__(self, shape, n_class): 224 | self.shape = shape 225 | self.n_class = n_class 226 | 227 | # def _relu6(self, x): 228 | # """Relu 6 229 | # """ 230 | # return K.relu(x, max_value=6.0) 231 | 232 | # def _hard_swish(self, x): 233 | # """Hard swish 234 | # """ 235 | # return x * K.relu(x + 3.0, max_value=6.0) / 6.0 236 | 237 | def _return_activation(self, x, nl): 238 | """Convolution Block 239 | This function defines a activation choice. 240 | 241 | # Arguments 242 | x: Tensor, input tensor of conv layer. 243 | nl: String, nonlinearity activation type. 244 | 245 | # Returns 246 | Output tensor. 247 | """ 248 | if nl == 'HS': 249 | x = Activation(hard_swish)(x) 250 | if nl == 'RE': 251 | x = Activation(relu6)(x) 252 | 253 | return x 254 | 255 | def _conv_block(self, inputs, filters, kernel, strides, nl): 256 | """Convolution Block 257 | This function defines a 2D convolution operation with BN and activation. 258 | 259 | # Arguments 260 | inputs: Tensor, input tensor of conv layer. 261 | filters: Integer, the dimensionality of the output space. 262 | kernel: An integer or tuple/list of 2 integers, specifying the 263 | width and height of the 2D convolution window. 264 | strides: An integer or tuple/list of 2 integers, 265 | specifying the strides of the convolution along the width and height. 266 | Can be a single integer to specify the same value for 267 | all spatial dimensions. 268 | nl: String, nonlinearity activation type. 269 | 270 | # Returns 271 | Output tensor. 272 | """ 273 | 274 | channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 275 | 276 | x = Conv2D(filters, kernel, padding='same', strides=strides)(inputs) 277 | x = BatchNormalization(axis=channel_axis)(x) 278 | 279 | return self._return_activation(x, nl) 280 | 281 | def _squeeze(self, inputs): 282 | """Squeeze and Excitation. 283 | This function defines a squeeze structure. 284 | 285 | # Arguments 286 | inputs: Tensor, input tensor of conv layer. 287 | """ 288 | # input_channels = int(inputs.shape[-1]) 289 | input_channels = inputs._keras_shape[-1] 290 | 291 | x = GlobalAveragePooling2D()(inputs) 292 | x = Dense(int(input_channels/4), activation='relu')(x) 293 | x = Dense(input_channels, activation='hard_sigmoid')(x) 294 | x = Reshape((1, 1, -1))(x) 295 | x = multiply([inputs, x]) 296 | 297 | return x 298 | 299 | def _bottleneck(self, inputs, filters, kernel, e, s, squeeze, nl): 300 | """Bottleneck 301 | This function defines a basic bottleneck structure. 302 | 303 | # Arguments 304 | inputs: Tensor, input tensor of conv layer. 305 | filters: Integer, the dimensionality of the output space. 306 | kernel: An integer or tuple/list of 2 integers, specifying the 307 | width and height of the 2D convolution window. 308 | e: Integer, expansion factor. 309 | t is always applied to the input size. 310 | s: An integer or tuple/list of 2 integers,specifying the strides 311 | of the convolution along the width and height.Can be a single 312 | integer to specify the same value for all spatial dimensions. 313 | squeeze: Boolean, Whether to use the squeeze. 314 | nl: String, nonlinearity activation type. 315 | 316 | # Returns 317 | Output tensor. 318 | """ 319 | 320 | channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 321 | input_shape = K.int_shape(inputs) 322 | tchannel = e 323 | r = s == 1 and input_shape[3] == filters 324 | x = self._conv_block(inputs, tchannel, (1, 1), (1, 1), nl) 325 | 326 | x = DepthwiseConv2D(kernel, strides=( 327 | s, s), depth_multiplier=1, padding='same')(x) 328 | x = BatchNormalization(axis=channel_axis)(x) 329 | 330 | if squeeze: 331 | # x = Lambda(lambda x: x * self._squeeze(x))(x) 332 | x = self._squeeze(x) 333 | 334 | x = self._return_activation(x, nl) 335 | 336 | x = Conv2D(filters, (1, 1), strides=(1, 1), padding='same')(x) 337 | x = BatchNormalization(axis=channel_axis)(x) 338 | 339 | if r: 340 | x = Add()([x, inputs]) 341 | 342 | return x 343 | 344 | def build(self): 345 | raise NotImplementedError 346 | 347 | 348 | class MobileNetV3(MobileNetBase): 349 | def __init__(self, shape, n_class): 350 | """Init. 351 | 352 | # Arguments 353 | input_shape: An integer or tuple/list of 3 integers, shape 354 | of input tensor. 355 | n_class: Integer, number of classes. 356 | 357 | # Returns 358 | MobileNetv3 model. 359 | """ 360 | super(MobileNetV3, self).__init__(shape, n_class) 361 | 362 | def build(self): 363 | """build MobileNetV3 Small. 364 | 365 | # Arguments 366 | plot: Boolean, weather to plot model. 367 | 368 | # Returns 369 | model: Model, model. 370 | """ 371 | inputs = Input(shape=self.shape) 372 | 373 | x = self._conv_block(inputs, 16, (3, 3), strides=(2, 2), nl='HS') 374 | 375 | # x = self._bottleneck(x, 16, (3, 3), e=16, s=2, squeeze=True, nl='RE') 376 | # x = self._bottleneck(x, 24, (3, 3), e=72, s=2, squeeze=False, nl='RE') 377 | # x = self._bottleneck(x, 24, (3, 3), e=88, s=1, squeeze=False, nl='RE') 378 | # x = self._bottleneck(x, 40, (5, 5), e=96, s=2, squeeze=True, nl='HS') 379 | # x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS') 380 | # x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS') 381 | # x = self._bottleneck(x, 48, (5, 5), e=120, s=1, squeeze=True, nl='HS') 382 | # x = self._bottleneck(x, 48, (5, 5), e=144, s=1, squeeze=True, nl='HS') 383 | # x = self._bottleneck(x, 96, (5, 5), e=288, s=2, squeeze=True, nl='HS') 384 | # x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS') 385 | # x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS') 386 | x = self._bottleneck(x, 16, (3, 3), e=16, s=1, squeeze=False, nl='RE') 387 | x = self._bottleneck(x, 24, (3, 3), e=64, s=2, squeeze=False, nl='RE') 388 | x = self._bottleneck(x, 24, (3, 3), e=72, s=1, squeeze=False, nl='RE') 389 | x = self._bottleneck(x, 40, (5, 5), e=72, s=2, squeeze=True, nl='RE') 390 | x = self._bottleneck(x, 40, (5, 5), e=120, s=1, squeeze=True, nl='RE') 391 | x = self._bottleneck(x, 40, (5, 5), e=120, s=1, squeeze=True, nl='RE') 392 | x = self._bottleneck(x, 80, (3, 3), e=240, s=2, squeeze=False, nl='HS') 393 | x = self._bottleneck(x, 80, (3, 3), e=200, s=1, squeeze=False, nl='HS') 394 | x = self._bottleneck(x, 80, (3, 3), e=184, s=1, squeeze=False, nl='HS') 395 | x = self._bottleneck(x, 80, (3, 3), e=184, s=1, squeeze=False, nl='HS') 396 | x = self._bottleneck(x, 112, (3, 3), e=480, s=1, squeeze=True, nl='HS') 397 | x = self._bottleneck(x, 112, (3, 3), e=672, s=1, squeeze=True, nl='HS') 398 | x = self._bottleneck(x, 160, (5, 5), e=672, s=2, squeeze=True, nl='HS') 399 | x = self._bottleneck(x, 160, (5, 5), e=960, s=1, squeeze=True, nl='HS') 400 | x = self._bottleneck(x, 160, (5, 5), e=960, s=1, squeeze=True, nl='HS') 401 | 402 | # x = self._conv_block(x, 576, (1, 1), strides=(1, 1), nl='HS') 403 | # x = GlobalAveragePooling2D()(x) 404 | # x = Reshape((1, 1, 576))(x) 405 | x = self._conv_block(x, 960, (1, 1), strides=(1, 1), nl='HS') 406 | x = GlobalAveragePooling2D()(x) 407 | x = Reshape((1, 1, 960))(x) 408 | 409 | x = Conv2D(1280, (1, 1), padding='same')(x) 410 | t1_0 = self._return_activation(x, 'HS') 411 | t1_1 = Conv2D(self.n_class, (1, 1), padding='same')(t1_0) 412 | t1_out = Reshape((self.n_class,), name='b1_s')(t1_1) 413 | 414 | t2 = Conv2D(3, (1, 1), padding='same')(t1_0) 415 | t2_out = Reshape((3,), name='b2_s')(t2) 416 | # Merge branch 417 | model = Model(inputs, [t1_out, t2_out]) 418 | 419 | return model 420 | 421 | 422 | if __name__ == "__main__": 423 | 424 | model = MobileNetV3((112, 112, 3), 202).build() 425 | model.summary() 426 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | # from model import PFLDNetBackbone 3 | from model import MobileNetV3 4 | from data_generator import DataGenerator 5 | from loss import * 6 | from keras.optimizers import Adam 7 | from keras import callbacks 8 | from keras import backend as K 9 | import os 10 | import numpy as np 11 | 12 | import tensorflow as tf 13 | tf.logging.set_verbosity(tf.logging.ERROR) 14 | 15 | ap = argparse.ArgumentParser() 16 | ap.add_argument("--batch_size", type=int, default=128, 17 | help="batch size of data") 18 | # ap.add_argument("--alpha", type=float, default=1.0, 19 | # help="control width parameter of of MobileNet blocks") 20 | ap.add_argument("--lr", type=float, default=1e-3, 21 | help="learning rate") 22 | ap.add_argument("--checkpoints", type=str, default="./checkpoints/pfld.h5", 23 | help="checkpoint path") 24 | ap.add_argument("--fine_tune_path", type=str, default="./checkpoints/pfld.h5", 25 | help="fine tune checkpoint path") 26 | ap.add_argument('--fine_tune', action='store_true', help='fine tune or not') 27 | ap.add_argument('--epochs', type=int, 28 | default=100, help='epoch of training') 29 | ap.add_argument('--workers', type=int, 30 | default=4, help='how many workers') 31 | args = vars(ap.parse_args()) 32 | 33 | 34 | class PolyDecay: 35 | ''' 36 | Exponential decay strategy implementation 37 | ''' 38 | 39 | def __init__(self, initial_lr, power, n_epochs): 40 | self.initial_lr = initial_lr 41 | self.power = power 42 | self.n_epochs = n_epochs 43 | 44 | def scheduler(self, epoch): 45 | return self.initial_lr * np.power(1.0 - 1.0 * epoch / self.n_epochs, self.power) 46 | 47 | 48 | if __name__ == '__main__': 49 | 50 | # Set GPU variable 51 | config = tf.ConfigProto() 52 | config.gpu_options.allow_growth = True 53 | session = tf.Session(config=config) 54 | 55 | train_generator = DataGenerator( 56 | batch_size=args['batch_size'], root_dir='./new_dataset', csv_file='./new_dataset/face_mixed.csv', 57 | shuffle=True, transformer=True) 58 | val_generator = DataGenerator( 59 | batch_size=args['batch_size'], root_dir='./new_test_dataset', csv_file='./new_test_dataset/face_mixed.csv') 60 | 61 | # model = PFLDNetBackbone(input_shape=(112, 112, 3), 62 | # output_nodes=212, alpha=args['alpha']) 63 | model = MobileNetV3(shape=(112, 112, 3), n_class=212).build() 64 | 65 | if args['fine_tune']: 66 | model.load_weights(args['fine_tune_path'], by_name=True) 67 | 68 | # https://blog.csdn.net/laolu1573/article/details/83626555 69 | # we can samply set 'b2_s' in loss_weights to 0... 70 | model.compile(loss={'b1_s': wing_loss, 'b2_s': smoothL1}, loss_weights={'b1_s': 2, 'b2_s': 1}, 71 | optimizer=Adam(lr=args['lr']), 72 | metrics={'b1_s': normalized_mean_error, 'b2_s': 'mae'}) 73 | 74 | if not os.path.exists("checkpoints"): 75 | os.mkdir("checkpoints") 76 | 77 | filepath = "./checkpoints/{epoch:02d}-{val_loss:.5f}.h5" 78 | tensorboard = callbacks.TensorBoard(log_dir='./checkpoints/logs') 79 | checkpoint = callbacks.ModelCheckpoint( 80 | filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min') 81 | lr_decay = callbacks.LearningRateScheduler( 82 | PolyDecay(args['lr'], 0.9, args['epochs']).scheduler) 83 | callbacks_list = [checkpoint, tensorboard, lr_decay] 84 | 85 | model.fit_generator( 86 | train_generator, 87 | len(train_generator), 88 | validation_data=val_generator, 89 | validation_steps=len(val_generator), 90 | epochs=args['epochs'], 91 | verbose=1, 92 | callbacks=callbacks_list, 93 | use_multiprocessing=True, 94 | workers=args['workers'] 95 | ) 96 | model.save(args['checkpoints']) 97 | 98 | K.clear_session() 99 | -------------------------------------------------------------------------------- /transformer.py: -------------------------------------------------------------------------------- 1 | # This algorithm is limited to algorithm verification 2 | 3 | import argparse 4 | import cv2 5 | import os 6 | import numpy as np 7 | import pandas as pd 8 | import sys 9 | from tqdm import tqdm 10 | from skimage import transform 11 | from pprint import pprint 12 | 13 | from mtcnn.mtcnn import MTCNN 14 | 15 | import tensorflow as tf 16 | tf.logging.set_verbosity(tf.logging.ERROR) 17 | 18 | # from common.landmark_utils import LandmarkImageCrop 19 | # from common.landmark_helper import LandmarkHelper 20 | 21 | ap = argparse.ArgumentParser() 22 | ap.add_argument("-l", "--landmark_txt", type=str, default='./new_dataset/landmarks.txt', 23 | help="path to landmarks txt") 24 | ap.add_argument("-c", "--landmark_csv", type=str, default='./new_dataset/face_landmarks.csv', 25 | help="exist landmarks csv") 26 | ap.add_argument("-b", "--base_dir", type=str, default='./new_dataset', 27 | help="base dataset dir") 28 | ap.add_argument("-s", "--output_size", type=int, default=112, 29 | help="output image size") 30 | ap.add_argument("-n", "--new_path", type=str, default='./align_new_dataset', 31 | help="new save image file") 32 | args = vars(ap.parse_args()) 33 | 34 | 35 | REFERENCE_FACIAL_POINTS = [[38.453125, 28.139446], 36 | [70.8962, 27.549734], 37 | [54.171013, 50.283226]] 38 | 39 | 40 | # def scale_and_shift(image, landmarks, scale_range, output_size): 41 | # ''' 42 | # Auto generate bbox and then random to scale and shift it. 43 | # Args: 44 | # image: a numpy type 45 | # landmarks: face landmarks with format [(x1, y1), ...]. range is 0-w or h in int 46 | # scale_range: scale bbox in (min, max). eg: (1.3, 1.5) 47 | # output_size: output size of image 48 | # Returns: 49 | # an image and landmarks will be returned 50 | # Raises: 51 | # No 52 | # ''' 53 | # (x1, y1, x2, y2), new_size, need_pad, (p_x, p_y, p_w, p_h) = LandmarkImageCrop.get_bbox_of_landmarks( 54 | # image, landmarks, scale_range, shift_rate=0.3) 55 | # box_image = image[y1:y2, x1:x2] 56 | # if need_pad: 57 | # box_image = np.lib.pad( 58 | # box_image, ((p_y, p_h), (p_x, p_w), (0, 0)), 'constant') 59 | # box_image = cv2.resize(box_image, (output_size, output_size)) 60 | # landmarks = (landmarks - (x1 - p_x, y1 - p_y)) 61 | 62 | # return box_image, landmarks 63 | 64 | class FaceAlign(object): 65 | '''Align face with MTCNN''' 66 | 67 | def __init__(self, out_size): 68 | self.detector = MTCNN() 69 | self.out_size = out_size 70 | 71 | def face_aligned_mtcnn(self, im): 72 | ''' 73 | Function: Alignment with MTCNN Prior box 74 | im: BGR image array 75 | ''' 76 | try: 77 | wrapper = self.detector.detect_faces(im[:, :, ::-1])[0] 78 | except: 79 | raise ValueError("No face...") 80 | 81 | points = wrapper['keypoints'] 82 | values = list(points.values()) 83 | gt_array = np.array(values).reshape((-1, 2))[:2] 84 | ref_array = np.array(REFERENCE_FACIAL_POINTS[:2], dtype=np.float32) 85 | 86 | tform = transform.SimilarityTransform() 87 | tform.estimate(gt_array, ref_array) 88 | tfm = tform.params[0: 2, :] 89 | 90 | return cv2.warpAffine( 91 | im, tfm, (self.out_size, self.out_size)) 92 | 93 | def face_aligned(self, im, ldmarks): 94 | ''' 95 | im: BGR array 96 | ldmarks: [(x0, y0), ...] 97 | ''' 98 | gt_array = np.array(ldmarks)[:2] 99 | ref_array = np.array(REFERENCE_FACIAL_POINTS[:2], dtype=np.float32) 100 | 101 | tform = transform.SimilarityTransform() 102 | tform.estimate(gt_array, ref_array) 103 | tfm = tform.params[0: 2, :] 104 | 105 | return cv2.warpAffine( 106 | im, tfm, (self.out_size, self.out_size)), tform 107 | 108 | 109 | if __name__ == '__main__': 110 | 111 | # with open('./dataset/landmarks.txt') as f: 112 | 113 | # samples_list = [] 114 | 115 | # for line in f.readlines(): 116 | # # Parse txt file 117 | # img_path, landmarks = LandmarkHelper.parse(line) 118 | # image_path = os.path.join("./dataset", img_path) 119 | 120 | # im = cv2.imread(image_path) 121 | # image, landmarks = scale_and_shift( 122 | # im, landmarks, scale_range=(1.1, 1.5), output_size=112) 123 | 124 | # cv2.imshow("image", image) 125 | # cv2.waitKey(0) 126 | 127 | if not os.path.exists(args['new_path']): 128 | os.mkdir(args['new_path']) 129 | 130 | root_dir = args['base_dir'] 131 | df = pd.read_csv(args['landmark_csv'], header=None) 132 | 133 | ldmarks = np.array(df.iloc[:, 1:]) 134 | ldmarks = ldmarks.reshape((-1, 106, 2)) * \ 135 | (args['output_size'], args['output_size']) 136 | 137 | ref_leftpupil = np.mean(ldmarks[:, 34], axis=0) 138 | ref_rightpupil = np.mean(ldmarks[:, 92], axis=0) 139 | ref_nose = np.mean(ldmarks[:, 86], axis=0) 140 | ref_array = np.stack( 141 | [ref_leftpupil, ref_rightpupil, ref_nose], axis=0).astype(np.float32) 142 | 143 | boxes = np.empty( 144 | (df.shape[0], args['output_size'], args['output_size'], 3), dtype=np.uint8) 145 | landmarks = np.empty((df.shape[0], 212)) 146 | 147 | for idx in tqdm(range(df.shape[0])): 148 | 149 | im = cv2.imread(os.path.join(root_dir, df.iloc[idx, 0])) 150 | im = cv2.resize(im, (args['output_size'], args['output_size'])) 151 | gt_ldmarks = ldmarks[idx] 152 | 153 | gt = np.array(df.iloc[idx, 1:], dtype=np.float32).reshape( 154 | (-1, 2)) * (args['output_size'], args['output_size']) 155 | gt_leftpupil = gt[34] 156 | gt_rightpupil = gt[92] 157 | gt_nose = gt[86] 158 | gt_array = np.stack( 159 | [gt_leftpupil, gt_rightpupil, gt_nose], axis=0).astype(np.float32) 160 | 161 | # M = cv2.getAffineTransform(gt_array, ref_array) 162 | # Similar transformation 163 | tform = transform.SimilarityTransform() 164 | tform.estimate(gt_array, ref_array) 165 | tfm = tform.params[0: 2, :] 166 | dst = cv2.warpAffine( 167 | im, tfm, (args['output_size'], args['output_size'])) 168 | 169 | b = np.ones((gt_ldmarks.shape[0], 1)) 170 | d = np.concatenate((gt_ldmarks, b), axis=1) 171 | gt_ldmarks = np.dot(d, np.transpose(tfm)) 172 | 173 | boxes[idx] = dst 174 | landmarks[idx] = (gt_ldmarks / (args['output_size'])).flatten() 175 | 176 | # for ldmark in gt_ldmarks: 177 | # cv2.circle( 178 | # dst, (int(ldmark[0]), int(ldmark[1])), 2, (255, 0, 0), -1) 179 | # cv2.imshow("image", dst) 180 | # cv2.waitKey(0) 181 | 182 | # Save image and new landmarks 183 | ldmark_dict = dict() 184 | 185 | for box, ldmark, num in tqdm(zip(boxes, landmarks, np.arange(df.shape[0]))): 186 | cv2.imwrite("{}.png".format( 187 | os.path.join(args['new_path'], str(num).zfill(5))), box) 188 | ldmark_dict["{}.png".format(str(num).zfill(5))] = ldmark 189 | 190 | df = pd.DataFrame(ldmark_dict).T 191 | df.to_csv("{}/face_landmarks.csv".format(args['new_path']), 192 | encoding="utf-8", header=None) 193 | 194 | pprint("Complete conversion!!!") 195 | --------------------------------------------------------------------------------