├── LICENSE
├── README.md
├── common
    ├── __init__.py
    ├── landmark_helper.py
    └── landmark_utils.py
├── create_dataset.py
├── data_generator.py
├── demo
    ├── CED.png
    ├── ced_curve.py
    └── demo.py
├── detection
    ├── detector.py
    ├── model
    │   ├── mnet.25-0000.params
    │   └── mnet.25-symbol.json
    ├── rcnn
    │   └── processing
    │   │   ├── __init__.py
    │   │   ├── bbox_transform.py
    │   │   ├── generate_anchor.py
    │   │   └── nms.py
    └── retinaface.py
├── loss.py
├── model.py
├── train.py
└── transformer.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | MNE得分: 
2 | 0.0426 pfld 
3 | 0.0348 mobilenetv3
4 | 


--------------------------------------------------------------------------------
/common/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["LandmarkHelper", "LandmarkImageCrop"]
2 | 


--------------------------------------------------------------------------------
/common/landmark_helper.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | '''
 3 | Jacly LUO in 20190114
 4 | '''
 5 | import numpy as np
 6 | import cv2
 7 | import sys
 8 | 
 9 | 
10 | class LandmarkHelper(object):
11 |     '''
12 |     Helper for different landmark type
13 |     '''
14 | 
15 |     def __init__(self):
16 |         pass
17 | 
18 |     @classmethod
19 |     def parse(cls, line):
20 |         '''
21 |         use for parse txt line to get file path and landmarks and so on
22 |         Args:
23 |             cls: this class
24 |             line: line of input txt
25 |             landmark_type: len of landmarks
26 |         Return:
27 |             see child parse
28 |         Raises:
29 |             unsupport type
30 |         '''
31 |         return cls.__landmark106_txt_parse(line)
32 | 
33 |     @staticmethod
34 |     def __landmark106_txt_parse(line):
35 |         '''
36 |         [1] image path
37 |         [2:5] bounding box
38 |         [6:] 106 landmarks
39 |         '''
40 |         a = line.split()
41 |         landmarks = list(map(int, a[1:-3]))[4:]
42 |         pose = list(map(float, a[1:]))[-3:]
43 | 
44 |         return a[0], np.array(landmarks).reshape((-1, 2)), np.array(pose, dtype=np.float32)
45 | 
46 |     @staticmethod
47 |     def flip(a):
48 |         '''
49 |         use for flip landmarks. Because we have to renumber it after flip
50 |         Args:
51 |             a: original landmarks
52 |             landmark_type: type of landmarks(106)
53 |         Returns:
54 |             landmarks: new landmarks
55 |         Raises:
56 |             unsupport type
57 |         '''
58 |         landmarks = np.concatenate((
59 |             a[0:1], a[17:33], a[1:17],
60 |             a[87:89], a[93:94], a[91:92], a[90:91], a[92:93], a[89:90],
61 |             a[94:95], a[96:97], a[95:96], a[99:102][::-
62 |                                                     1], a[97:99], a[104:106][::-1], a[102:104],
63 |             a[61:62], a[53:54], a[57:60], a[54:57], a[60:61], a[52:
64 |                                                                 53], a[62:63], a[67:71], a[63:67], a[71:72],
65 |             a[72:75], a[81:86], a[80:81], a[75:80], a[86:87],
66 |             a[33:35], a[39:40], a[37:38], a[36:37], a[38:39], a[35:36],
67 |             a[40:41], a[42:43], a[41:42], a[46:47], a[47:48], a[45:46], a[44:45], a[43:44],
68 |             a[50:51], a[51:52], a[49:50], a[48:49]
69 |         ), axis=0)
70 | 
71 |         return landmarks.reshape((-1, 2))
72 | 


--------------------------------------------------------------------------------
/common/landmark_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import os
  4 | from tqdm import tqdm
  5 | np.random.seed(2018)
  6 | 
  7 | 
  8 | class LandmarkImageCrop(object):
  9 |     '''
 10 |     Facial 106 landmarks augmentation.
 11 |     '''
 12 | 
 13 |     def __init__(self):
 14 |         pass
 15 | 
 16 |     def __visualize(self, image, landmarks, output_size):
 17 |         '''
 18 |         Visualize images and corresponding landmarks
 19 |         '''
 20 |         try:
 21 |             image.shape
 22 |         except:
 23 |             raise ValueError("read image error...")
 24 | 
 25 |         for (x, y) in landmarks:
 26 |             cv2.circle(image, (int(x * output_size), int(y * output_size)),
 27 |                        1, (0, 0, 255), -1)
 28 | 
 29 |         cv2.imshow("image", image)
 30 |         cv2.waitKey(0)
 31 | 
 32 |     def mini_crop_by_landmarks(self, sample_list, scale, output_size=112, is_vis=False):
 33 |         '''
 34 |         Crop full image to mini. Only keep vaild image to save
 35 |         Args:
 36 |             sample_list: (image, landmarks)
 37 |             scale: up scale rate
 38 |             value: color value
 39 |             output_size: output image size
 40 |         Returns:
 41 |             new sample list
 42 |         '''
 43 | 
 44 |         boxes = np.empty((len(sample_list), output_size,
 45 |                           output_size, 3), dtype=np.uint8)
 46 |         ldmarks = np.empty((len(sample_list), 212))
 47 |         poses = np.empty((len(sample_list), 3))
 48 | 
 49 |         for idx, sample in tqdm(enumerate(sample_list)):
 50 |             image = cv2.imread(sample[0])
 51 |             landmarks = sample[1]
 52 |             pose = sample[2]
 53 |             try:
 54 |                 (x1, y1, x2, y2), new_size, need_pad, (p_x, p_y, p_w, p_h) = LandmarkImageCrop.get_bbox_of_landmarks(
 55 |                     image, landmarks, scale, 0.5)
 56 |             except:
 57 |                 print(sample[0])
 58 |             # Extract roi image
 59 |             box_image = image[y1:y2, x1:x2]
 60 |             if need_pad:
 61 |                 box_image = np.lib.pad(
 62 |                     box_image, ((p_y, p_h), (p_x, p_w), (0, 0)), 'constant')
 63 |             box_image = cv2.resize(box_image, (output_size, output_size))
 64 |             landmarks = (landmarks - (x1 - p_x, y1 - p_y)) / \
 65 |                 (new_size, new_size)
 66 |             if is_vis:
 67 |                 self.__visualize(box_image, landmarks, output_size)
 68 |             # Convert to (212,)
 69 |             landmarks = landmarks.flatten()
 70 | 
 71 |             boxes[idx] = box_image
 72 |             ldmarks[idx] = landmarks
 73 |             poses[idx] = pose
 74 | 
 75 |         return boxes, ldmarks, poses
 76 | 
 77 |     @staticmethod
 78 |     def get_bbox_of_landmarks(image, landmarks, scale, shift_rate=0.3):
 79 |         '''
 80 |         According to landmark box to generate a new bigger bbox
 81 |         Args:
 82 |             image: a numpy type
 83 |             landmarks: face landmarks with format [(x1, y1), ...]. range is 0-w or h in int
 84 |             scale: scale bbox in (min, max). eg: (1.3, 1.5)
 85 |             shift_rate: up, down, left, right to shift
 86 |         Returns:
 87 |             return new bbox and other info
 88 |         Raises:
 89 |             No
 90 |         '''
 91 |         ori_h, ori_w = image.shape[:2]
 92 | 
 93 |         x = int(min(landmarks[:, 0]))
 94 |         y = int(min(landmarks[:, 1]))
 95 |         w = int(max(landmarks[:, 0]) - x)
 96 |         h = int(max(landmarks[:, 1]) - y)
 97 |         if type(scale) == float:
 98 |             scale = scale
 99 |         else:
100 |             scale = np.random.randint(
101 |                 int(scale[0] * 100.0), int(scale[1] * 100.0)) / 100.0
102 |         new_size = int(max(w, h) * scale)
103 |         if shift_rate >= 0.5:
104 |             x1 = x - (new_size - w) // 2
105 |             y1 = y - (new_size - h) // 2
106 |         else:
107 |             x1 = x - np.random.randint(int((new_size-w) * shift_rate),
108 |                                        int((new_size-w) * (1.0-shift_rate)))
109 |             y1 = y - np.random.randint(int((new_size-h) * shift_rate),
110 |                                        int((new_size-h) * (1.0-shift_rate)))
111 |         x2 = x1 + new_size
112 |         y2 = y1 + new_size
113 |         need_pad = False
114 |         p_x, p_y, p_w, p_h = 0, 0, 0, 0
115 |         if x1 < 0:
116 |             p_x = -x1
117 |             x1 = 0
118 |             need_pad = True
119 |         if y1 < 0:
120 |             p_y = -y1
121 |             y1 = 0
122 |             need_pad = True
123 |         if x2 > ori_w:
124 |             p_w = x2 - ori_w
125 |             x2 = ori_w
126 |             need_pad = True
127 |         if y2 > ori_h:
128 |             p_h = y2 - ori_h
129 |             y2 = ori_h
130 |             need_pad = True
131 | 
132 |         return (x1, y1, x2, y2), new_size, need_pad, (p_x, p_y, p_w, p_h)
133 | 


--------------------------------------------------------------------------------
/create_dataset.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Create training dataset
 3 | """
 4 | import cv2
 5 | import os
 6 | import sys
 7 | import glob
 8 | import numpy as np
 9 | from tqdm import tqdm
10 | import pandas as pd
11 | 
12 | from common.landmark_helper import LandmarkHelper
13 | from common.landmark_utils import LandmarkImageCrop
14 | import time
15 | from pprint import pprint
16 | 
17 | import argparse
18 | ap = argparse.ArgumentParser()
19 | ap.add_argument("-l", "--landmark_txt", type=str, default='./test_dataset/landmarks.txt',
20 |                 help="path to landmarks txt")
21 | ap.add_argument("-b", "--base_dir", type=str, default='./test_dataset',
22 |                 help="base dataset dir")
23 | ap.add_argument("-s", "--output_size", type=int, default=64,
24 |                 help="output image size")
25 | ap.add_argument("-n", "--new_path", type=str, default='./demo_test_dataset',
26 |                 help="new save image file")
27 | args = vars(ap.parse_args())
28 | 
29 | 
30 | def main():
31 | 
32 |     if not os.path.exists(args['new_path']):
33 |         os.mkdir(args['new_path'])
34 | 
35 |     with open(args['landmark_txt']) as f:
36 | 
37 |         samples_list = []
38 | 
39 |         for line in f.readlines():
40 |             # Parse txt file
41 |             img_path, landmarks, poses = LandmarkHelper.parse(line)
42 |             image_path = os.path.join(args['base_dir'], img_path)
43 |             samples_list.append([image_path, landmarks, poses])
44 | 
45 |         boxes, ldmarks, poses = LandmarkImageCrop().mini_crop_by_landmarks(
46 |             samples_list, scale=(1.2, 1.5), output_size=args['output_size'], is_vis=False)
47 | 
48 |         # Save image , new landmarks and poses
49 |         mix_dict = dict()
50 | 
51 |         for box, ldmark, pose, num in tqdm(zip(boxes, ldmarks, poses, np.arange(len(samples_list)))):
52 |             cv2.imwrite("{}.png".format(
53 |                 os.path.join(args['new_path'], str(num).zfill(5))), box)
54 |             mix_dict["{}.png".format(str(num).zfill(5))] = np.concatenate(
55 |                 (ldmark, pose), axis=0)
56 |             # print(np.concatenate((ldmark, pose), axis=0))
57 | 
58 |         df = pd.DataFrame(mix_dict).T
59 |         df.to_csv("{}/face_mixed.csv".format(args['new_path']),
60 |                   encoding="utf-8", header=None)
61 | 
62 |         pprint("Complete conversion!!!")
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     main()
67 | 


--------------------------------------------------------------------------------
/data_generator.py:
--------------------------------------------------------------------------------
  1 | # https: // stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
  2 | 
  3 | from keras.utils import Sequence
  4 | import numpy as np
  5 | import pandas as pd
  6 | import sys
  7 | import os
  8 | import cv2
  9 | from pprint import pprint
 10 | 
 11 | from common.landmark_utils import LandmarkImageCrop
 12 | from common.landmark_helper import LandmarkHelper
 13 | 
 14 | 
 15 | class DataGenerator(Sequence):
 16 |     '''
 17 |     Generates data for Keras
 18 |     '''
 19 | 
 20 |     def __init__(self, batch_size, root_dir, csv_file, output_size=112,
 21 |                  shuffle=False, max_angle=45, transformer=None):
 22 | 
 23 |         self.landmarks_frame = pd.read_csv(csv_file, header=None)
 24 |         self.batch_size = batch_size
 25 |         self.root_dir = root_dir
 26 |         self.shuffle = shuffle
 27 |         self.max_angle = max_angle
 28 | 
 29 |         assert isinstance(output_size, int)
 30 |         self.output_size = output_size
 31 |         self.transformer = transformer
 32 |         self.on_epoch_end()
 33 | 
 34 |     def __getitem__(self, index):
 35 |         '''Generate one batch of data'''
 36 | 
 37 |         indexes = self.indexes[index *
 38 |                                self.batch_size: (index+1) * self.batch_size]
 39 |         list_frames = self.landmarks_frame.iloc[indexes, :]
 40 |         X, y_ld, y_p = self.__data_generation(list_frames)
 41 | 
 42 |         if self.transformer:
 43 | 
 44 |             X_imgs = np.empty(
 45 |                 (self.batch_size, self.output_size, self.output_size, 3), dtype=np.uint8)
 46 |             y_ldmarks = np.empty((self.batch_size, 212), dtype=np.float32)
 47 |             y_poses = np.empty((self.batch_size, 3), dtype=np.float32)
 48 | 
 49 |             for idx, img, ldmark, pose in zip(np.arange(len(indexes)), X, y_ld, y_p):
 50 |                 ldmark = ldmark.reshape((-1, 2)) * \
 51 |                     (self.output_size, self.output_size)
 52 | 
 53 |                 # Data Augmentationm you can custom parameter
 54 |                 img, ldmark, pose = self.__flip(img, ldmark, pose)
 55 |                 img, ldmark, pose = self.__rotate(
 56 |                     img, ldmark, pose, max_angle=self.max_angle)
 57 | 
 58 |                 # Do not need to modified pose...
 59 |                 img, ldmark, pose = self.__scale_and_shift(
 60 |                     img, ldmark, pose, (1.1, 1.5), output_size=self.output_size)
 61 |                 img, ldmark, pose = self.__occlusion(img, ldmark, pose)
 62 | 
 63 |                 X_imgs[idx] = img
 64 |                 y_ldmarks[idx] = (ldmark / (self.output_size,
 65 |                                             self.output_size)).flatten()
 66 |                 y_poses[idx] = pose
 67 |             # Image normalization
 68 |             return X_imgs.astype(np.float32) / 255., [y_ldmarks, y_poses]
 69 | 
 70 |         return X.astype(np.float32) / 255., [y_ld, y_p]
 71 | 
 72 |     def __len__(self):
 73 |         '''Denotes the number of batches per epoch'''
 74 |         return int(np.floor(len(self.landmarks_frame) / self.batch_size))
 75 | 
 76 |     def on_epoch_end(self):
 77 |         '''Updates indexes after each epoch'''
 78 |         self.indexes = np.arange(len(self.landmarks_frame))
 79 |         if self.shuffle:
 80 |             np.random.shuffle(self.indexes)
 81 | 
 82 |     @property
 83 |     def data_predict(self):
 84 |         '''Predict a batch size data'''
 85 |         return self.__data_generation()
 86 | 
 87 |     def __data_generation(self, list_frames):
 88 |         '''Producing batches of data'''
 89 | 
 90 |         X_imgs = np.empty((self.batch_size, self.output_size,
 91 |                            self.output_size, 3), dtype=np.uint8)
 92 |         y_ldmarks = np.empty((self.batch_size, 212), dtype=np.float32)
 93 |         y_poses = np.empty((self.batch_size, 3), dtype=np.float32)
 94 | 
 95 |         for i in range(len(list_frames)):
 96 |             image_path = os.path.join(
 97 |                 self.root_dir, list_frames.iloc[i, 0])
 98 |             X_imgs[i] = cv2.imread(image_path)
 99 |             y_ldmarks[i] = list_frames.iloc[i, 1:-3]
100 |             y_poses[i] = list_frames.iloc[i, -3:]
101 | 
102 |         return X_imgs, y_ldmarks, y_poses
103 | 
104 |     def __flip(self, image, landmarks, poses, run_prob=0.5):
105 |         '''
106 |         Do image flip. Only for horizontal
107 | 
108 |         Args:
109 |             image: a numpy type
110 |             landmarks: face landmarks with format [(x1, y1), (x2, y2), ...]
111 |             run_prob: probability to do this operate. 0.0-1.0
112 |         Returns:
113 |             an image and landmarks will be returned
114 |         Raises:
115 |             Unsupport count of landmarks
116 |         '''
117 |         if np.random.rand() < run_prob:
118 |             return image, landmarks, poses
119 |         image = np.fliplr(image)
120 |         landmarks[:, 0] = image.shape[1] - landmarks[:, 0]
121 |         landmarks = LandmarkHelper.flip(landmarks)
122 | 
123 |         # pitch, roll, yaw...
124 |         poses[1] = -poses[1]
125 |         poses[2] = -poses[2]
126 | 
127 |         return image, landmarks, poses
128 | 
129 |     def __rotate(self, image, landmarks, poses, max_angle, run_prob=0.5):
130 |         '''
131 |         Do image rotate.
132 | 
133 |         Args:
134 |             image: a numpy type
135 |             landmarks: face landmarks with format [(x1, y1), ...]. range is 0-w or h in int
136 |             max_angle: random to rotate in [-max_angle, max_angle]. range is 0-180.
137 |         Returns:
138 |             an image and landmarks will be returned
139 |         Raises:
140 |             No
141 |         '''
142 |         if np.random.rand() < run_prob:
143 |             return image, landmarks, poses
144 | 
145 |         c_x = (min(landmarks[:, 0]) + max(landmarks[:, 0])) / 2
146 |         c_y = (min(landmarks[:, 1]) + max(landmarks[:, 1])) / 2
147 |         h, w = image.shape[:2]
148 |         angle = np.random.randint(-max_angle, max_angle)
149 |         M = cv2.getRotationMatrix2D((c_x, c_y), angle, 1)
150 |         image = cv2.warpAffine(image, M, (w, h))
151 |         b = np.ones((landmarks.shape[0], 1))
152 |         d = np.concatenate((landmarks, b), axis=1)
153 |         landmarks = np.dot(d, np.transpose(M))
154 | 
155 |         # Adjustment roll angle
156 |         poses[1] += angle
157 | 
158 |         return image, landmarks, poses
159 | 
160 |     def __occlusion(self, image, landmarks, poses, sl=0.05, sh=0.2, r1=0.3, mean=[0, 0, 0], run_prob=0.2):
161 |         '''
162 |         Do image part occlusion
163 | 
164 |         sl: min erasing area
165 |         sh: max erasing area
166 |         r1: min aspect ratio
167 |         mean: erasing value
168 |         https://github.com/zhunzhong07/Random-Erasing/blob/master/transforms.py
169 |         '''
170 | 
171 |         if np.random.rand() < run_prob:
172 |             return image, landmarks, poses
173 | 
174 |         for attempt in range(50):
175 |             area = image.shape[0] * image.shape[1]
176 |             target_area = np.random.uniform(sl, sh) * area
177 |             aspect_ratio = np.random.uniform(r1, 1/r1)
178 | 
179 |             h = int(round(np.sqrt(target_area * aspect_ratio)))
180 |             w = int(round(np.sqrt(target_area / aspect_ratio)))
181 | 
182 |             if w < image.shape[1] and h < image.shape[0]:
183 |                 x1 = np.random.randint(0, image.shape[0] - h)
184 |                 y1 = np.random.randint(0, image.shape[1] - w)
185 |                 image[x1: x1+h, y1: y1+w, 0] = mean[0]
186 |                 image[x1: x1+h, y1: y1+w, 1] = mean[1]
187 |                 image[x1: x1+h, y1: y1+w, 2] = mean[2]
188 | 
189 |                 return image, landmarks, poses
190 | 
191 |         return image, landmarks, poses
192 | 
193 |     def __scale_and_shift(self, image, landmarks, poses, scale_range, output_size, run_prob=0.5):
194 |         '''
195 |         Auto generate bbox and then random to scale and shift it.
196 | 
197 |         Args:
198 |             image: a numpy type
199 |             landmarks: face landmarks with format [(x1, y1), ...]. range is 0-w or h in int
200 |             scale_range: scale bbox in (min, max). eg: (1.3, 1.5)
201 |             output_size: output size of image
202 |         Returns:
203 |             an image and landmarks will be returned
204 |         Raises:
205 |             No
206 |         '''
207 |         if np.random.rand() < run_prob:
208 |             return image, landmarks, poses
209 | 
210 |         (x1, y1, x2, y2), new_size, need_pad, (p_x, p_y, p_w, p_h) = LandmarkImageCrop.get_bbox_of_landmarks(
211 |             image, landmarks, scale_range, shift_rate=0)
212 |         box_image = image[y1:y2, x1:x2]
213 |         if need_pad:
214 |             box_image = np.lib.pad(
215 |                 box_image, ((p_y, p_h), (p_x, p_w), (0, 0)), 'constant')
216 |         box_image = cv2.resize(box_image, (output_size, output_size))
217 |         landmarks = (landmarks - (x1 - p_x, y1 - p_y))
218 | 
219 |         return box_image, landmarks, poses
220 | 


--------------------------------------------------------------------------------
/demo/CED.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JACKYLUO1991/FaceLandmarks/0e7eb636d25ba93e64cc3433a79239be51c883cc/demo/CED.png


--------------------------------------------------------------------------------
/demo/ced_curve.py:
--------------------------------------------------------------------------------
  1 | # Normalized Mean Error
  2 | # Created by Jacky LUO
  3 | # https://github.com/MarekKowalski/DeepAlignmentNetwork
  4 | 
  5 | import sys
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from keras.models import load_model
  9 | from keras.utils import custom_object_scope
 10 | import pandas as pd
 11 | import os
 12 | import cv2 as cv
 13 | from tqdm import tqdm
 14 | from scipy.integrate import simps
 15 | 
 16 | import tensorflow as tf
 17 | tf.logging.set_verbosity(tf.logging.ERROR)
 18 | 
 19 | sys.path.append("../")
 20 | from loss import *
 21 | from model import relu6, hard_swish
 22 | 
 23 | 
 24 | class LandmarkNme(object):
 25 |     """Measure normalized mean error"""
 26 | 
 27 |     failure_threshold = 0.10
 28 | 
 29 |     def __init__(self, model_path, nb_points=106, output_dim=112):
 30 | 
 31 |         with custom_object_scope({'normalized_mean_error': normalized_mean_error,
 32 |                                   'wing_loss': wing_loss, 'smoothL1': smoothL1,
 33 |                                   'relu6': relu6, 'hard_swish': hard_swish}):
 34 |             self.model = load_model(model_path)
 35 | 
 36 |         self.output_dim = output_dim
 37 |         self.nb_points = nb_points
 38 | 
 39 |         self.__gt_landmarks = None
 40 |         self.__pred_landmarks = None
 41 |         self.__image_names = None
 42 | 
 43 |     @property
 44 |     def gt_landmarks(self):
 45 |         return self.__gt_landmarks
 46 | 
 47 |     @gt_landmarks.setter
 48 |     def gt_landmarks(self, landmarks_csv):
 49 |         '''Get Groundtruth landmarks'''
 50 |         df = pd.read_csv(landmarks_csv, header=None)
 51 |         self.__image_names = df.iloc[:, 0].values
 52 |         self.__gt_landmarks = df.iloc[:, 1:-
 53 |                                       3].values.reshape((-1, self.nb_points, 2)) 
 54 |     
 55 |     @property
 56 |     def pred_landmarks(self):
 57 |         return self.__pred_landmarks
 58 | 
 59 |     @pred_landmarks.setter
 60 |     def pred_landmarks(self, prefix):
 61 |         """Get pred landmarks"""
 62 |         marks_list = []
 63 |         for image_name in tqdm(self.__image_names):
 64 |             image_path = os.path.join(prefix, image_name)
 65 |             # Resize image to specific size like 112, 64...
 66 |             img = cv.resize(cv.imread(image_path),
 67 |                              (self.output_dim, self.output_dim))
 68 |             if self.output_dim == 64:
 69 |                 img_normalized = img.astype(np.float32)
 70 |             else:
 71 |                 img_normalized = img.astype(np.float32) / 255.
 72 |             face_img = img_normalized.reshape(
 73 |                 1, self.output_dim, self.output_dim, 3)
 74 |             if self.output_dim == 64:
 75 |                 marks = self.model.predict_on_batch(face_img)
 76 |             else:
 77 |                 marks = self.model.predict_on_batch(face_img)[0]
 78 |             # marks = self.model.predict_on_batch(face_img)
 79 |             # marks = np.reshape(marks, (-1, 2))
 80 |             marks_list.append(marks)
 81 |             # print(marks)
 82 |         self.__pred_landmarks = np.array(
 83 |             marks_list, dtype=np.float32).reshape((-1, self.nb_points, 2))
 84 | 
 85 |     def landmark_error(self, normalization='centers'):
 86 |         """Get landmarks error between gt and pred"""
 87 |         errors = []
 88 |         n_imgs = len(self.__gt_landmarks)
 89 | 
 90 |         for i in tqdm(range(n_imgs)):
 91 |             gt_ldmarks = self.__gt_landmarks[i]
 92 |             pred_ldmarks = self.__pred_landmarks[i]
 93 | 
 94 |             if normalization == 'centers':
 95 |                 normDist = np.linalg.norm(
 96 |                     gt_ldmarks[38] - gt_ldmarks[92])
 97 |             error = np.mean(np.sqrt(np.sum((gt_ldmarks -
 98 |                                             pred_ldmarks) ** 2, axis=1))) / normDist
 99 |             errors.append(error)
100 | 
101 |         return errors
102 | 
103 |     @classmethod
104 |     def plot_ced(cls, errors_lists, step=0.0001, fontsize=18, labels=None, colors=None,
105 |                  showCurve=True):
106 |         '''Plot CED curve'''
107 |         ced_list = []
108 |         xAxis_list = []
109 | 
110 |         for errors in errors_lists:
111 |             nErrors = len(errors)
112 |             xAxis = list(np.arange(0., cls.failure_threshold + step, step))
113 |             ced = [float(np.count_nonzero([errors <= x])) /
114 |                    nErrors for x in xAxis]
115 |             # AUC = simps(ced, x=xAxis) / cls.failure_threshold
116 |             # failureRate = 1. - ced[-1]
117 |             ced_list.append(ced)
118 |             xAxis_list.append(xAxis)
119 | 
120 |         if showCurve:
121 |             if labels is not None and colors is not None:
122 |                 plt.grid()
123 |                 plt.axis([0.0, cls.failure_threshold, 0, 1.0])
124 |                 plt.xticks(fontsize=fontsize)
125 |                 plt.yticks(fontsize=fontsize)
126 |                 for i in range(len(errors_lists)):
127 |                     plt.plot(xAxis_list[i], ced_list[i], color=colors[i],
128 |                              label=labels[i])
129 |                 plt.legend()
130 |                 plt.xlabel('Mean Normalized Error', fontsize=fontsize)
131 |                 plt.ylabel('Proportion of facial landmarks', fontsize=fontsize)
132 |                 plt.show()
133 | 
134 | 
135 | if __name__ == "__main__":
136 | 
137 |     # Pipline
138 |     errors_lists = []
139 |     # PFLD network
140 |     ln = LandmarkNme("../checkpoints/pfld.h5")
141 |     ln.gt_landmarks = "../new_test_dataset/face_mixed.csv"
142 |     ln.pred_landmarks = "../new_test_dataset"
143 |     errors = ln.landmark_error()
144 |     errors_lists.append(errors)
145 | 
146 |     # Mobilenetv3 network
147 |     ln2 = LandmarkNme("../checkpoints/mobilenetv3.h5")
148 |     ln2.gt_landmarks = "../new_test_dataset/face_mixed.csv"
149 |     ln2.pred_landmarks = "../new_test_dataset"
150 |     errors2 = ln2.landmark_error()
151 |     errors_lists.append(errors2)
152 | 
153 |     # Basenet network
154 |     ln3 = LandmarkNme("../checkpoints/model.h5", output_dim=64)
155 |     ln3.gt_landmarks = "../new_test_dataset/face_mixed.csv"
156 |     ln3.pred_landmarks = "../new_test_dataset"
157 |     errors3 = ln3.landmark_error()
158 |     errors_lists.append(errors3)
159 | 
160 |     # CED curve show
161 |     LandmarkNme.plot_ced(errors_lists, showCurve=True, \
162 |         labels=['Plfd', 'Mobilenetv3', 'Basenet'], colors=['blue', 'green', 'red'])
163 | 


--------------------------------------------------------------------------------
/demo/demo.py:
--------------------------------------------------------------------------------
  1 | """
  2 | For static image landmarks detection
  3 | """
  4 | import numpy as np
  5 | import cv2
  6 | import time
  7 | import glob
  8 | # import imutils
  9 | import sys
 10 | import os
 11 | sys.path.append("../")
 12 | # from transformer import FaceAlign
 13 | from detection.detector import MarkDetector
 14 | 
 15 | import tensorflow as tf
 16 | tf.logging.set_verbosity(tf.logging.ERROR)
 17 | 
 18 | CNN_INPUT_SIZE = 112
 19 | # face_align = FaceAlign(out_size=CNN_INPUT_SIZE)
 20 | 
 21 | 
 22 | def main(images_dir, savePath):
 23 | 
 24 |     if not os.path.isdir(savePath):
 25 |         os.makedirs(savePath)
 26 | 
 27 |     images_path = os.listdir(images_dir)
 28 | 
 29 |     # mark_detector = MarkDetector(threshold=[0.7, 0.6, 0.95],
 30 |     #                              mark_model='../checkpoints/pfld.h5')
 31 |     mark_detector = MarkDetector(model_path='../detection/model/mnet.25',
 32 |                                  gpuid=-1, thresh=0.9, scales=[224, 384],
 33 |                                      mark_model='../checkpoints/pfld.h5')
 34 | 
 35 |     for idx, image_path in enumerate(images_path):
 36 | 
 37 |         img = cv2.imread(os.path.join(images_dir, image_path))
 38 |         # img = imutils.resize(img, width=512)
 39 |         h, w, _ = img.shape
 40 |         img_copy = img.copy()
 41 |         # img_copy = cv2.cvtColor(img_copy, cv2.COLOR_BGR2RGB)
 42 |         s1 = time.time()
 43 |         faceboxes, face_ldmarks = mark_detector.extract_cnn_facebox(
 44 |             img_copy)
 45 |         print(f"Detection: {time.time() - s1}s")
 46 |         pad_type = -1
 47 | 
 48 |         if len(faceboxes) == 0:
 49 |             print("Not detected face...")
 50 |             continue
 51 | 
 52 |         else:
 53 |             for facebox, ldmarks in zip(faceboxes, face_ldmarks):
 54 | 
 55 |                 facebox = list(map(int, facebox))
 56 |                 x_min, y_min, x_max, y_max = facebox[0], facebox[1], facebox[2], facebox[3]
 57 | 
 58 |                 if x_min < 0 or y_min < 0 or x_max > w or y_max > h:
 59 |                     pad_type = 1
 60 |                     absTmp = np.minimum(
 61 |                         (x_min, y_min, w - x_max, h - y_max), 0)
 62 |                     pad = np.max(np.abs(absTmp))
 63 |                     # Entire image op
 64 |                     img = cv2.copyMakeBorder(img, pad, pad, pad, pad,
 65 |                                              cv2.BORDER_CONSTANT, value=[0, 0, 0])
 66 |                     y_min, y_max, x_min, x_max = y_min + pad, y_max + pad, x_min + pad, x_max + pad
 67 | 
 68 |                 face_img_crop = img[y_min: y_max, x_min: x_max]
 69 |                 face_img_align_uint = cv2.resize(
 70 |                     face_img_crop, (CNN_INPUT_SIZE, CNN_INPUT_SIZE))
 71 |                 # ldmarks = [(x / (face_img_crop.shape[0] / CNN_INPUT_SIZE), y /
 72 |                 #             (face_img_crop.shape[1] / CNN_INPUT_SIZE)) for (x, y) in ldmarks]
 73 |                 #####################################################################
 74 |                 # 5 points alignment code
 75 |                 # face_img_align_uint, tform = face_align.face_aligned(
 76 |                 #     face_img_align_uint, ldmarks)
 77 |                 #####################################################################
 78 |                 if CNN_INPUT_SIZE == 64:
 79 |                     face_img_align = face_img_align_uint.astype(np.float32)
 80 |                 else:
 81 |                     face_img_align = face_img_align_uint.astype(np.float32) / 255.
 82 |                 face_img0 = face_img_align.reshape(
 83 |                     1, CNN_INPUT_SIZE, CNN_INPUT_SIZE, 3)
 84 | 
 85 |                 s2 = time.time()
 86 |                 marks = mark_detector.detect_marks_keras(face_img0)
 87 |                 print(f"Landmarks: {time.time() - s2}s")
 88 |                 #############################################################################
 89 |                 # Inverse similarity transformation Matrix
 90 |                 # marks *= CNN_INPUT_SIZE
 91 |                 # b = np.ones((marks.shape[0], 1))
 92 |                 # d = np.concatenate((marks, b), axis=1)
 93 |                 # M = cv2.invertAffineTransform(tform.params[:2, :])
 94 |                 # marks = np.dot(d, M.T)
 95 |                 # marks /= CNN_INPUT_SIZE
 96 | 
 97 |                 marks *= (x_max - x_min)
 98 |                 #############################################################################
 99 |                 marks[:, 0] += x_min
100 |                 marks[:, 1] += y_min
101 | 
102 |                 # Draw Predicted Landmarks
103 |                 MarkDetector.draw_marks(img, marks, thick=2)
104 | 
105 |                 if pad_type == 1:
106 |                     pad_type = -1
107 |                     img = img[pad: pad+h, pad: pad+w]
108 | 
109 |         print("[INFO] Finished {} pictures".format(idx+1))
110 | 
111 |         cv2.imwrite(os.path.join(savePath, "result_%d.jpg" % (idx)), img)
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     main(images_dir='./images', savePath='./result112')
116 | 


--------------------------------------------------------------------------------
/detection/detector.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Detect faces and add landmarks based on them
  3 | """
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import cv2
  7 | import os
  8 | from keras.models import load_model
  9 | from keras.utils import custom_object_scope
 10 | from loss import normalized_mean_error, wing_loss, smoothL1
 11 | 
 12 | import sys
 13 | sys.path.append("../")
 14 | from model import relu6, hard_swish
 15 | from detection import retinaface
 16 | # from detection import detect_face
 17 | 
 18 | 
 19 | class MarkDetector:
 20 |     """Facial landmark detector by Convolutional Neural Network"""
 21 | 
 22 |     # def __init__(self, threshold=[0.6, 0.7, 0.7], factor=0.709, minsize=20, mark_model=None):
 23 | 
 24 |     def __init__(self, model_path, gpuid=-1, thresh=0.95, scales=[384, 512], mark_model=None):
 25 | 
 26 |         self.gpuid = -1 if gpuid < 0 else gpuid
 27 |         self.thresh = thresh
 28 | 
 29 |         if isinstance(scales, (list, tuple)) and len(scales) == 2:
 30 |             self.scales = scales
 31 |         else:
 32 |             raise Exception("scales set is error...")
 33 | 
 34 |         try:
 35 |             self.detector = retinaface.RetinaFace(
 36 |                 model_path, 0, self.gpuid, 'net3')
 37 |         except:
 38 |             raise Exception("Detector loading error...")
 39 | 
 40 |         # if isinstance(threshold, list) and len(threshold) == 3:
 41 |         #     self.threshold = threshold
 42 |         #     self.factor = factor
 43 |         #     self.minsize = minsize
 44 | 
 45 |             # with tf.Graph().as_default():
 46 |             #     sess = tf.Session()
 47 |             #     with sess.as_default():
 48 |             #         self.pnet, self.rnet, self.onet = detect_face.create_mtcnn(
 49 |             #             sess, None)
 50 | 
 51 |         if mark_model.split(".")[-1] == 'h5':
 52 |             with custom_object_scope({'normalized_mean_error': normalized_mean_error,
 53 |                                       'wing_loss': wing_loss, 'smoothL1': smoothL1, \
 54 |                                           'relu6': relu6, 'hard_swish': hard_swish}):
 55 |                 self.sess = load_model(mark_model)
 56 |         else:
 57 |             raise Exception("model should be given...")
 58 | 
 59 |         # else:
 60 |         #     raise ValueError("error occur in threshold params!")
 61 | 
 62 |     def detect_marks_keras(self, image_np):
 63 |         """Detect marks from image"""
 64 |         predictions = self.sess.predict_on_batch(image_np)
 65 | 
 66 |         # Convert predictions to landmarks.
 67 |         marks = np.array(predictions[0]).flatten()
 68 |         marks = np.reshape(marks, (-1, 2))
 69 | 
 70 |         return marks
 71 | 
 72 |     @staticmethod
 73 |     def move_box(box, offset, scale=1.1):
 74 |         """Move the box to direction specified by vector offset"""
 75 |         if scale < 1. or scale >= 2.:
 76 |             raise ValueError("scale should be between 1 and 2...")
 77 | 
 78 |         left_x = box[0] + offset[0] * scale
 79 |         top_y = box[1] + offset[1] * scale
 80 |         right_x = box[2] + offset[0] * scale
 81 |         bottom_y = box[3] + offset[1] * scale
 82 | 
 83 |         return [left_x, top_y, right_x, bottom_y]
 84 | 
 85 |     @staticmethod
 86 |     def get_square_box(box):
 87 |         """Get a square box out of the given box, by expanding it."""
 88 | 
 89 |         left_x = int(box[0])
 90 |         top_y = int(box[1])
 91 |         right_x = int(box[2])
 92 |         bottom_y = int(box[3])
 93 | 
 94 |         box_width = right_x - left_x
 95 |         box_height = bottom_y - top_y
 96 | 
 97 |         # Check if box is already a square. If not, make it a square.
 98 |         diff = box_height - box_width
 99 |         delta = int(abs(diff / 2))
100 | 
101 |         if diff == 0:                   # Already a square.
102 |             return [left_x, top_y, right_x, bottom_y]
103 |         elif diff > 0:                  # Height > width, a slim box.
104 |             left_x -= delta
105 |             right_x += delta
106 |             if diff % 2 == 1:
107 |                 right_x += 1
108 |         else:                           # Width > height, a short box.
109 |             top_y -= delta
110 |             bottom_y += delta
111 |             if diff % 2 == 1:
112 |                 bottom_y += 1
113 | 
114 |         # Make sure box is always square.
115 |         assert (right_x - left_x) == (bottom_y - top_y)
116 | 
117 |         return [left_x, top_y, right_x, bottom_y]
118 | 
119 |     def extract_cnn_facebox(self, image):
120 |         """Extract face area from image."""
121 |         faceboxes = []
122 |         # scores = []
123 |         face_ldmarks = []
124 | 
125 |         im_shape = image.shape
126 |         target_size = self.scales[0]
127 |         max_size = self.scales[1]
128 |         im_size_min = np.min(im_shape[0:2])
129 |         im_size_max = np.max(im_shape[0:2])
130 |         im_scale = float(target_size) / float(im_size_min)
131 | 
132 |         # Prevent bigger axis from being more than max_size:
133 |         if np.round(im_scale * im_size_max) > max_size:
134 |             im_scale = float(max_size) / float(im_size_max)
135 | 
136 |         # bboxs, landmarks = detect_face.detect_face(image, self.minsize, self.pnet, self.rnet, self.onet,
137 |         #                                            self.threshold, self.factor)
138 | 
139 |         faces, landmarks = self.detector.detect(
140 |             image, self.thresh, scales=[im_scale])
141 | 
142 |         for box, ldmarks in zip(faces, landmarks):
143 |             # Box: (x1, y1, x2, y2)
144 |             diff_height_width = (box[2] - box[0]) - (box[3] - box[1])
145 |             offset = int(abs(diff_height_width / 2))
146 |             box_moved = self.move_box(box, [0, offset])
147 |             facebox = self.get_square_box(box_moved)
148 |             ldmarks = ldmarks - (facebox[0], facebox[1])
149 |             faceboxes.append(facebox)
150 |             face_ldmarks.append(ldmarks)
151 | 
152 |         return faceboxes, face_ldmarks
153 | 
154 |         # if bboxs.shape[0] == 0:
155 |         #     landmarks_reshape = landmarks
156 |         # else:
157 |         #     landmarks_reshape = landmarks.reshape((-1, 5, 2), order='F')
158 | 
159 |         # for bbox, ldmarks in zip(bboxs, landmarks_reshape):
160 |         #     box, score = bbox[0: 4], bbox[4]
161 |         #     # Move down
162 |         #     # box coordinate: (x1, y1, x2, y2)
163 |         #     diff_height_width = (box[2] - box[0]) - (box[3] - box[1])
164 |         #     offset = int(abs(diff_height_width / 2))
165 |         #     box_moved = self.move_box(box, [0, offset])
166 |         #     # Make box square and landmarks alignment
167 |         #     facebox = self.get_square_box(box_moved)
168 |         #     ldmarks = ldmarks - (facebox[0], facebox[1])
169 |         #     faceboxes.append(facebox)
170 |         #     face_ldmarks.append(ldmarks)
171 |         #     scores.append(score)
172 | 
173 |         # return faceboxes, face_ldmarks, scores
174 | 
175 |     @staticmethod
176 |     def draw_marks(image, marks, color=(255, 0, 255), thick=1):
177 |         """Draw mark points on image"""
178 |         for idx, mark in enumerate(marks):
179 |             cv2.circle(image, (int(mark[0]), int(mark[1])),
180 |                        thick, color, -1, cv2.LINE_AA)
181 |         # Visualization cropped image
182 |         # cv2.imshow("image", image)
183 |         # cv2.waitKey(0)
184 | 


--------------------------------------------------------------------------------
/detection/model/mnet.25-0000.params:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JACKYLUO1991/FaceLandmarks/0e7eb636d25ba93e64cc3433a79239be51c883cc/detection/model/mnet.25-0000.params


--------------------------------------------------------------------------------
/detection/rcnn/processing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JACKYLUO1991/FaceLandmarks/0e7eb636d25ba93e64cc3433a79239be51c883cc/detection/rcnn/processing/__init__.py


--------------------------------------------------------------------------------
/detection/rcnn/processing/bbox_transform.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def bbox_overlaps(boxes, query_boxes):
  5 |     return bbox_overlaps_py(boxes, query_boxes)
  6 | 
  7 | 
  8 | def bbox_overlaps_py(boxes, query_boxes):
  9 |     """
 10 |     determine overlaps between boxes and query_boxes
 11 |     :param boxes: n * 4 bounding boxes
 12 |     :param query_boxes: k * 4 bounding boxes
 13 |     :return: overlaps: n * k overlaps
 14 |     """
 15 |     n_ = boxes.shape[0]
 16 |     k_ = query_boxes.shape[0]
 17 |     overlaps = np.zeros((n_, k_), dtype=np.float)
 18 |     for k in range(k_):
 19 |         query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * \
 20 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 21 |         for n in range(n_):
 22 |             iw = min(boxes[n, 2], query_boxes[k, 2]) - \
 23 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 24 |             if iw > 0:
 25 |                 ih = min(boxes[n, 3], query_boxes[k, 3]) - \
 26 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 27 |                 if ih > 0:
 28 |                     box_area = (boxes[n, 2] - boxes[n, 0] + 1) * \
 29 |                         (boxes[n, 3] - boxes[n, 1] + 1)
 30 |                     all_area = float(box_area + query_box_area - iw * ih)
 31 |                     overlaps[n, k] = iw * ih / all_area
 32 |     return overlaps
 33 | 
 34 | 
 35 | def clip_boxes(boxes, im_shape):
 36 |     """
 37 |     Clip boxes to image boundaries.
 38 |     :param boxes: [N, 4* num_classes]
 39 |     :param im_shape: tuple of 2
 40 |     :return: [N, 4* num_classes]
 41 |     """
 42 |     # x1 >= 0
 43 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
 44 |     # y1 >= 0
 45 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
 46 |     # x2 < im_shape[1]
 47 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
 48 |     # y2 < im_shape[0]
 49 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
 50 |     return boxes
 51 | 
 52 | 
 53 | def nonlinear_transform(ex_rois, gt_rois):
 54 |     """
 55 |     compute bounding box regression targets from ex_rois to gt_rois
 56 |     :param ex_rois: [N, 4]
 57 |     :param gt_rois: [N, 4]
 58 |     :return: [N, 4]
 59 |     """
 60 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
 61 | 
 62 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 63 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 64 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
 65 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
 66 | 
 67 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
 68 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
 69 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)
 70 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)
 71 | 
 72 |     targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14)
 73 |     targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14)
 74 |     targets_dw = np.log(gt_widths / ex_widths)
 75 |     targets_dh = np.log(gt_heights / ex_heights)
 76 | 
 77 |     if gt_rois.shape[1] <= 4:
 78 |         targets = np.vstack(
 79 |             (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
 80 |         return targets
 81 |     else:
 82 |         targets = [targets_dx, targets_dy, targets_dw, targets_dh]
 83 |         targets = np.vstack(targets).transpose()
 84 |         return targets
 85 | 
 86 | 
 87 | def landmark_transform(ex_rois, gt_rois):
 88 | 
 89 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
 90 | 
 91 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 92 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 93 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
 94 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
 95 | 
 96 |     targets = []
 97 |     for i in range(gt_rois.shape[1]):
 98 |         for j in range(gt_rois.shape[2]):
 99 |             if j == 2:
100 |                 continue
101 |             if j == 0:  # w
102 |                 target = (gt_rois[:, i, j] - ex_ctr_x) / (ex_widths + 1e-14)
103 |             elif j == 1:  # h
104 |                 target = (gt_rois[:, i, j] - ex_ctr_y) / (ex_heights + 1e-14)
105 |             else:  # visibile
106 |                 target = gt_rois[:, i, j]
107 |             targets.append(target)
108 | 
109 |     targets = np.vstack(targets).transpose()
110 |     return targets
111 | 
112 | 
113 | def nonlinear_pred(boxes, box_deltas):
114 |     """
115 |     Transform the set of class-agnostic boxes into class-specific boxes
116 |     by applying the predicted offsets (box_deltas)
117 |     :param boxes: !important [N 4]
118 |     :param box_deltas: [N, 4 * num_classes]
119 |     :return: [N 4 * num_classes]
120 |     """
121 |     if boxes.shape[0] == 0:
122 |         return np.zeros((0, box_deltas.shape[1]))
123 | 
124 |     boxes = boxes.astype(np.float, copy=False)
125 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
126 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
127 |     ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
128 |     ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
129 | 
130 |     dx = box_deltas[:, 0::4]
131 |     dy = box_deltas[:, 1::4]
132 |     dw = box_deltas[:, 2::4]
133 |     dh = box_deltas[:, 3::4]
134 | 
135 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
136 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
137 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
138 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
139 | 
140 |     pred_boxes = np.zeros(box_deltas.shape)
141 |     # x1
142 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0)
143 |     # y1
144 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0)
145 |     # x2
146 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0)
147 |     # y2
148 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
149 | 
150 |     return pred_boxes
151 | 
152 | 
153 | def landmark_pred(boxes, landmark_deltas):
154 |     if boxes.shape[0] == 0:
155 |         return np.zeros((0, landmark_deltas.shape[1]))
156 |     boxes = boxes.astype(np.float, copy=False)
157 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
158 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
159 |     ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
160 |     ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
161 |     preds = []
162 |     for i in range(landmark_deltas.shape[1]):
163 |         if i % 2 == 0:
164 |             pred = (landmark_deltas[:, i]*widths + ctr_x)
165 |         else:
166 |             pred = (landmark_deltas[:, i]*heights + ctr_y)
167 |         preds.append(pred)
168 |     preds = np.vstack(preds).transpose()
169 |     return preds
170 | 
171 | 
172 | def iou_transform(ex_rois, gt_rois):
173 |     """ return bbox targets, IoU loss uses gt_rois as gt """
174 |     assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number'
175 |     return gt_rois
176 | 
177 | 
178 | def iou_pred(boxes, box_deltas):
179 |     """
180 |     Transform the set of class-agnostic boxes into class-specific boxes
181 |     by applying the predicted offsets (box_deltas)
182 |     :param boxes: !important [N 4]
183 |     :param box_deltas: [N, 4 * num_classes]
184 |     :return: [N 4 * num_classes]
185 |     """
186 |     if boxes.shape[0] == 0:
187 |         return np.zeros((0, box_deltas.shape[1]))
188 | 
189 |     boxes = boxes.astype(np.float, copy=False)
190 |     x1 = boxes[:, 0]
191 |     y1 = boxes[:, 1]
192 |     x2 = boxes[:, 2]
193 |     y2 = boxes[:, 3]
194 | 
195 |     dx1 = box_deltas[:, 0::4]
196 |     dy1 = box_deltas[:, 1::4]
197 |     dx2 = box_deltas[:, 2::4]
198 |     dy2 = box_deltas[:, 3::4]
199 | 
200 |     pred_boxes = np.zeros(box_deltas.shape)
201 |     # x1
202 |     pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis]
203 |     # y1
204 |     pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis]
205 |     # x2
206 |     pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis]
207 |     # y2
208 |     pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis]
209 | 
210 |     return pred_boxes
211 | 
212 | 
213 | # define bbox_transform and bbox_pred
214 | bbox_transform = nonlinear_transform
215 | bbox_pred = nonlinear_pred
216 | 


--------------------------------------------------------------------------------
/detection/rcnn/processing/generate_anchor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generate base anchors on index 0
  3 | """
  4 | from __future__ import print_function
  5 | 
  6 | import sys
  7 | import numpy as np
  8 | 
  9 | 
 10 | def anchors_plane(feat_h, feat_w, stride, base_anchor):
 11 |     return anchors_py(feat_h, feat_w, stride, base_anchor)
 12 | 
 13 | 
 14 | def anchors_py(height, width, stride, base_anchors):
 15 |     """
 16 |     Parameters
 17 |     ----------
 18 |     height: height of plane
 19 |     width:  width of plane
 20 |     stride: stride ot the original image
 21 |     anchors_base: (A, 4) a base set of anchors
 22 |     Returns
 23 |     -------
 24 |     all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane
 25 |     """
 26 |     A = base_anchors.shape[0]
 27 |     all_anchors = np.zeros((height, width, A, 4), dtype=np.float32)
 28 | 
 29 |     for iw in range(width):
 30 |         sw = iw * stride
 31 |         for ih in range(height):
 32 |             sh = ih * stride
 33 |             for k in range(A):
 34 |                 all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw
 35 |                 all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh
 36 |                 all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw
 37 |                 all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh
 38 |     return all_anchors
 39 | 
 40 | 
 41 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 42 |                      scales=2 ** np.arange(3, 6), stride=16, dense_anchor=False):
 43 |     """
 44 |     Generate anchor (reference) windows by enumerating aspect ratios X
 45 |     scales wrt a reference (0, 0, 15, 15) window.
 46 |     """
 47 | 
 48 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 49 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 50 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 51 |                          for i in range(ratio_anchors.shape[0])])
 52 |     if dense_anchor:
 53 |         assert stride % 2 == 0
 54 |         anchors2 = anchors.copy()
 55 |         anchors2[:, :] += int(stride/2)
 56 |         anchors = np.vstack((anchors, anchors2))
 57 |     return anchors
 58 | 
 59 | 
 60 | def generate_anchors_fpn(dense_anchor=False, cfg=None):
 61 |     """
 62 |     Generate anchor (reference) windows by enumerating aspect ratios X
 63 |     scales wrt a reference (0, 0, 15, 15) window.
 64 |     """
 65 |     if cfg is None:
 66 |         from ..config import config
 67 |         cfg = config.RPN_ANCHOR_CFG
 68 |     RPN_FEAT_STRIDE = []
 69 |     for k in cfg:
 70 |         RPN_FEAT_STRIDE.append(int(k))
 71 |     RPN_FEAT_STRIDE = sorted(RPN_FEAT_STRIDE, reverse=True)
 72 |     anchors = []
 73 |     for k in RPN_FEAT_STRIDE:
 74 |         v = cfg[str(k)]
 75 |         bs = v['BASE_SIZE']
 76 |         __ratios = np.array(v['RATIOS'])
 77 |         __scales = np.array(v['SCALES'])
 78 |         stride = int(k)
 79 |         r = generate_anchors(bs, __ratios, __scales, stride, dense_anchor)
 80 |         anchors.append(r)
 81 | 
 82 |     return anchors
 83 | 
 84 | 
 85 | def _whctrs(anchor):
 86 |     """
 87 |     Return width, height, x center, and y center for an anchor (window).
 88 |     """
 89 | 
 90 |     w = anchor[2] - anchor[0] + 1
 91 |     h = anchor[3] - anchor[1] + 1
 92 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 93 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 94 |     return w, h, x_ctr, y_ctr
 95 | 
 96 | 
 97 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 98 |     """
 99 |     Given a vector of widths (ws) and heights (hs) around a center
100 |     (x_ctr, y_ctr), output a set of anchors (windows).
101 |     """
102 | 
103 |     ws = ws[:, np.newaxis]
104 |     hs = hs[:, np.newaxis]
105 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
106 |                          y_ctr - 0.5 * (hs - 1),
107 |                          x_ctr + 0.5 * (ws - 1),
108 |                          y_ctr + 0.5 * (hs - 1)))
109 |     return anchors
110 | 
111 | 
112 | def _ratio_enum(anchor, ratios):
113 |     """
114 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
115 |     """
116 | 
117 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
118 |     size = w * h
119 |     size_ratios = size / ratios
120 |     ws = np.round(np.sqrt(size_ratios))
121 |     hs = np.round(ws * ratios)
122 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
123 |     return anchors
124 | 
125 | 
126 | def _scale_enum(anchor, scales):
127 |     """
128 |     Enumerate a set of anchors for each scale wrt an anchor.
129 |     """
130 | 
131 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
132 |     ws = w * scales
133 |     hs = h * scales
134 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
135 |     return anchors
136 | 


--------------------------------------------------------------------------------
/detection/rcnn/processing/nms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | try:
 3 |     from ..cython.cpu_nms import cpu_nms
 4 | except ImportError:
 5 |     cpu_nms = None
 6 | try:
 7 |     from ..cython.gpu_nms import gpu_nms
 8 | except ImportError:
 9 |     gpu_nms = None
10 | 
11 | 
12 | def py_nms_wrapper(thresh):
13 |     def _nms(dets):
14 |         return nms(dets, thresh)
15 |     return _nms
16 | 
17 | 
18 | def cpu_nms_wrapper(thresh):
19 |     def _nms(dets):
20 |         return cpu_nms(dets, thresh)
21 |     if cpu_nms is not None:
22 |         return _nms
23 |     else:
24 |         return py_nms_wrapper(thresh)
25 | 
26 | 
27 | def gpu_nms_wrapper(thresh, device_id):
28 |     def _nms(dets):
29 |         return gpu_nms(dets, thresh, device_id)
30 |     if gpu_nms is not None:
31 |         return _nms
32 |     elif cpu_nms is not None:
33 |         return cpu_nms_wrapper(thresh)
34 |     else:
35 |         return py_nms_wrapper(thresh)
36 | 
37 | 
38 | def nms(dets, thresh):
39 |     """
40 |     greedily select boxes with high confidence and overlap with current maximum <= thresh
41 |     rule out overlap >= thresh
42 |     :param dets: [[x1, y1, x2, y2 score]]
43 |     :param thresh: retain overlap < thresh
44 |     :return: indexes to keep
45 |     """
46 |     x1 = dets[:, 0]
47 |     y1 = dets[:, 1]
48 |     x2 = dets[:, 2]
49 |     y2 = dets[:, 3]
50 |     scores = dets[:, 4]
51 | 
52 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
53 |     order = scores.argsort()[::-1]
54 | 
55 |     keep = []
56 |     while order.size > 0:
57 |         i = order[0]
58 |         keep.append(i)
59 |         xx1 = np.maximum(x1[i], x1[order[1:]])
60 |         yy1 = np.maximum(y1[i], y1[order[1:]])
61 |         xx2 = np.minimum(x2[i], x2[order[1:]])
62 |         yy2 = np.minimum(y2[i], y2[order[1:]])
63 | 
64 |         w = np.maximum(0.0, xx2 - xx1 + 1)
65 |         h = np.maximum(0.0, yy2 - yy1 + 1)
66 |         inter = w * h
67 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
68 | 
69 |         inds = np.where(ovr <= thresh)[0]
70 |         order = order[inds + 1]
71 | 
72 |     return keep
73 | 


--------------------------------------------------------------------------------
/detection/retinaface.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import sys
  4 | import os
  5 | import numpy as np
  6 | import mxnet as mx
  7 | from mxnet import ndarray as nd
  8 | import cv2
  9 | from .rcnn.processing.bbox_transform import clip_boxes
 10 | from .rcnn.processing.generate_anchor import generate_anchors_fpn, anchors_plane
 11 | from .rcnn.processing.nms import gpu_nms_wrapper, cpu_nms_wrapper
 12 | 
 13 | 
 14 | class RetinaFace:
 15 | 
 16 |   def __init__(self, prefix, epoch, ctx_id=0, network='net3', nms=0.4, nocrop=False, decay4=0.5, vote=False):
 17 |     
 18 |     self.ctx_id = ctx_id
 19 |     self.network = network
 20 |     self.decay4 = decay4
 21 |     self.nms_threshold = nms
 22 |     self.vote = vote
 23 |     self.nocrop = nocrop
 24 |     self.fpn_keys = []
 25 |     self.anchor_cfg = None
 26 |     pixel_means=[0.0, 0.0, 0.0]
 27 |     pixel_stds=[1.0, 1.0, 1.0]
 28 |     pixel_scale = 1.0
 29 |     self.preprocess = False
 30 |     _ratio = (1.,)
 31 |     fmc = 3
 32 | 
 33 |     if network=='ssh' or network=='vgg':
 34 |       pixel_means=[103.939, 116.779, 123.68]
 35 |       self.preprocess = True
 36 |     elif network=='net3':
 37 |       _ratio = (1.,)
 38 |     elif network=='net3a':
 39 |       _ratio = (1., 1.5)
 40 |     elif network=='net6': # like pyramidbox or s3fd
 41 |       fmc = 6
 42 |     elif network=='net5': # retinaface
 43 |       fmc = 5
 44 |     elif network=='net5a':
 45 |       fmc = 5
 46 |       _ratio = (1., 1.5)
 47 |     elif network=='net4':
 48 |       fmc = 4
 49 |     elif network=='net4a':
 50 |       fmc = 4
 51 |       _ratio = (1., 1.5)
 52 |     else:
 53 |       assert False, 'network setting error %s'%network
 54 | 
 55 |     if fmc==3:
 56 |       self._feat_stride_fpn = [32, 16, 8]
 57 |       self.anchor_cfg = {
 58 |           '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 59 |           '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 60 |           '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 61 |       }
 62 |     elif fmc==4:
 63 |       self._feat_stride_fpn = [32, 16, 8, 4]
 64 |       self.anchor_cfg = {
 65 |           '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 66 |           '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 67 |           '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 68 |           '4': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 69 |       }
 70 |     elif fmc==6:
 71 |       self._feat_stride_fpn = [128, 64, 32, 16, 8, 4]
 72 |       self.anchor_cfg = {
 73 |           '128': {'SCALES': (32,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 74 |           '64': {'SCALES': (16,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 75 |           '32': {'SCALES': (8,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 76 |           '16': {'SCALES': (4,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 77 |           '8': {'SCALES': (2,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 78 |           '4': {'SCALES': (1,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
 79 |       }
 80 |     elif fmc==5:
 81 |       self._feat_stride_fpn = [64, 32, 16, 8, 4]
 82 |       self.anchor_cfg = {}
 83 |       _ass = 2.0**(1.0/3)
 84 |       _basescale = 1.0
 85 |       for _stride in [4, 8, 16, 32, 64]:
 86 |         key = str(_stride)
 87 |         value = {'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}
 88 |         scales = []
 89 |         for _ in range(3):
 90 |           scales.append(_basescale)
 91 |           _basescale *= _ass
 92 |         value['SCALES'] = tuple(scales)
 93 |         self.anchor_cfg[key] = value
 94 | 
 95 |     for s in self._feat_stride_fpn:
 96 |         self.fpn_keys.append('stride%s'%s)
 97 | 
 98 |     dense_anchor = False
 99 |     self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(dense_anchor=dense_anchor, cfg=self.anchor_cfg)))
100 |     for k in self._anchors_fpn:
101 |       v = self._anchors_fpn[k].astype(np.float32)
102 |       self._anchors_fpn[k] = v
103 | 
104 |     self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
105 |     sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
106 | 
107 |     # Whether use GPU or CPU...
108 |     if self.ctx_id>=0:
109 |       self.ctx = mx.gpu(self.ctx_id)
110 |       self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id)
111 |     else:
112 |       self.ctx = mx.cpu()
113 |       self.nms = cpu_nms_wrapper(self.nms_threshold)
114 |     self.pixel_means = np.array(pixel_means, dtype=np.float32)
115 |     self.pixel_stds = np.array(pixel_stds, dtype=np.float32)
116 |     self.pixel_scale = float(pixel_scale)
117 |     self.use_landmarks = False
118 | 
119 |     if len(sym) // len(self._feat_stride_fpn) == 3:
120 |       self.use_landmarks = True
121 | 
122 |     image_size = (640, 640)
123 |     self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None)
124 |     self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
125 |     self.model.set_params(arg_params, aux_params)
126 | 
127 |   def get_input(self, img):
128 |     im = img.astype(np.float32)
129 |     im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
130 |     for i in range(3):
131 |         im_tensor[0, i, :, :] = (im[:, :, 2 - i] / self.pixel_scale - self.pixel_means[2 - i]) / self.pixel_stds[2-i]
132 |     data = nd.array(im_tensor)
133 | 
134 |     return data
135 | 
136 |   def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False):
137 |     proposals_list = []
138 |     scores_list = []
139 |     landmarks_list = []
140 |     flips = [0]
141 |     if do_flip:
142 |       flips = [0, 1]
143 | 
144 |     for im_scale in scales:
145 |       for flip in flips:
146 |         if im_scale!=1.0:
147 |           im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
148 |         else:
149 |           im = img.copy()
150 |         if flip:
151 |           im = im[:,::-1,:]
152 |         if self.nocrop:
153 |           if im.shape[0] % 32 == 0:
154 |             h = im.shape[0]
155 |           else:
156 |             h = (im.shape[0] // 32 + 1) * 32
157 |           if im.shape[1] % 32 == 0:
158 |             w = im.shape[1]
159 |           else:
160 |             w = (im.shape[1] // 32 + 1) * 32
161 |           _im = np.zeros( (h, w, 3), dtype=np.float32 )
162 |           _im[0:im.shape[0], 0:im.shape[1], :] = im
163 |           im = _im
164 |         else:
165 |           im = im.astype(np.float32)
166 | 
167 |         im_info = [im.shape[0], im.shape[1]]
168 |         im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
169 |         for i in range(3):
170 |             im_tensor[0, i, :, :] = (im[:, :, 2 - i]/self.pixel_scale - self.pixel_means[2 - i])/self.pixel_stds[2-i]
171 |         data = nd.array(im_tensor)
172 |         db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)])
173 |         self.model.forward(db, is_train=False)
174 |         net_out = self.model.get_outputs()
175 | 
176 |         for _idx,s in enumerate(self._feat_stride_fpn):
177 |             _key = 'stride%s'%s
178 |             stride = int(s)
179 |             if self.use_landmarks:
180 |               idx = _idx*3
181 |             else:
182 |               idx = _idx*2
183 |             scores = net_out[idx].asnumpy()
184 |             scores = scores[:, self._num_anchors['stride%s'%s]:, :, :]
185 | 
186 |             idx+=1
187 |             bbox_deltas = net_out[idx].asnumpy()
188 | 
189 |             height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]
190 | 
191 |             A = self._num_anchors['stride%s'%s]
192 |             K = height * width
193 |             anchors_fpn = self._anchors_fpn['stride%s'%s]
194 |             anchors = anchors_plane(height, width, stride, anchors_fpn)
195 |             anchors = anchors.reshape((K * A, 4))
196 | 
197 |             scores = self._clip_pad(scores, (height, width))
198 |             scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
199 |             bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
200 |             bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))
201 |             bbox_pred_len = bbox_deltas.shape[3]//A
202 |             bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))
203 | 
204 |             proposals = self.bbox_pred(anchors, bbox_deltas)
205 |             proposals = clip_boxes(proposals, im_info[:2])
206 | 
207 |             scores_ravel = scores.ravel()
208 |             order = np.where(scores_ravel>=threshold)[0]
209 | 
210 |             proposals = proposals[order, :]
211 |             scores = scores[order]
212 |             if stride==4 and self.decay4<1.0:
213 |               scores *= self.decay4
214 | 
215 |             if flip:
216 |               oldx1 = proposals[:, 0].copy()
217 |               oldx2 = proposals[:, 2].copy()
218 |               proposals[:, 0] = im.shape[1] - oldx2 - 1
219 |               proposals[:, 2] = im.shape[1] - oldx1 - 1
220 | 
221 |             proposals[:, 0:4] /= im_scale
222 |             proposals_list.append(proposals)
223 |             scores_list.append(scores)
224 | 
225 |             if not self.vote and self.use_landmarks:
226 |               idx += 1
227 |               landmark_deltas = net_out[idx].asnumpy()
228 |               landmark_deltas = self._clip_pad(landmark_deltas, (height, width))
229 |               landmark_pred_len = landmark_deltas.shape[1]//A
230 |               landmark_deltas = landmark_deltas.transpose((0, 2, 3, 1)).reshape((-1, 5, landmark_pred_len//5))
231 |               landmarks = self.landmark_pred(anchors, landmark_deltas)
232 |               landmarks = landmarks[order, :]
233 | 
234 |               if flip:
235 |                 landmarks[:,:,0] = im.shape[1] - landmarks[:,:,0] - 1
236 |                 order = [1, 0, 2, 4, 3]
237 |                 flandmarks = landmarks.copy()
238 |                 for idx, a in enumerate(order):
239 |                   flandmarks[:,idx,:] = landmarks[:,a,:]
240 |                 landmarks = flandmarks
241 |               landmarks[:,:,0:2] /= im_scale
242 | 
243 |               landmarks_list.append(landmarks)
244 | 
245 |     proposals = np.vstack(proposals_list)
246 |     landmarks = None
247 |     if proposals.shape[0]==0:
248 |       if self.use_landmarks:
249 |         landmarks = np.zeros( (0,5,2) )
250 |       return np.zeros( (0,5) ), landmarks
251 |     scores = np.vstack(scores_list)
252 |     scores_ravel = scores.ravel()
253 |     order = scores_ravel.argsort()[::-1]
254 |     proposals = proposals[order, :]
255 |     scores = scores[order]
256 |     if not self.vote and self.use_landmarks:
257 |       landmarks = np.vstack(landmarks_list)
258 |       landmarks = landmarks[order].astype(np.float32, copy=False)
259 | 
260 |     pre_det = np.hstack((proposals[:,0:4], scores)).astype(np.float32, copy=False)
261 |     if not self.vote:
262 |       keep = self.nms(pre_det)
263 |       det = np.hstack( (pre_det, proposals[:,4:]) )
264 |       det = det[keep, :]
265 |       if self.use_landmarks:
266 |         landmarks = landmarks[keep]
267 |     else:
268 |       det = np.hstack( (pre_det, proposals[:,4:]) )
269 |       det = self.bbox_vote(det)
270 | 
271 |     return det, landmarks
272 | 
273 |   def detect_center(self, img, threshold=0.5, scales=[1.0], do_flip=False):
274 |     det, landmarks = self.detect(img, threshold, scales, do_flip)
275 |     if det.shape[0]==0:
276 |       return None, None
277 |     bindex = 0
278 |     if det.shape[0]>1:
279 |       img_size = np.asarray(img.shape)[0:2]
280 |       bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
281 |       img_center = img_size / 2
282 |       offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
283 |       offset_dist_squared = np.sum(np.power(offsets,2.0),0)
284 |       bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
285 |     bbox = det[bindex,:]
286 |     landmark = landmarks[bindex, :, :]
287 | 
288 |     return bbox, landmark
289 | 
290 |   @staticmethod
291 |   def check_large_pose(landmark, bbox):
292 |     assert landmark.shape==(5,2)
293 |     assert len(bbox)==4
294 |     def get_theta(base, x, y):
295 |       vx = x-base
296 |       vy = y-base
297 |       vx[1] *= -1
298 |       vy[1] *= -1
299 |       tx = np.arctan2(vx[1], vx[0])
300 |       ty = np.arctan2(vy[1], vy[0])
301 |       d = ty-tx
302 |       d = np.degrees(d)
303 |       if d<-180.0:
304 |         d+=360.
305 |       elif d>180.0:
306 |         d-=360.0
307 |       return d
308 |     landmark = landmark.astype(np.float32)
309 | 
310 |     theta1 = get_theta(landmark[0], landmark[3], landmark[2])
311 |     theta2 = get_theta(landmark[1], landmark[2], landmark[4])
312 |     theta3 = get_theta(landmark[0], landmark[2], landmark[1])
313 |     theta4 = get_theta(landmark[1], landmark[0], landmark[2])
314 |     theta5 = get_theta(landmark[3], landmark[4], landmark[2])
315 |     theta6 = get_theta(landmark[4], landmark[2], landmark[3])
316 |     theta7 = get_theta(landmark[3], landmark[2], landmark[0])
317 |     theta8 = get_theta(landmark[4], landmark[1], landmark[2])
318 |     left_score = 0.0
319 |     right_score = 0.0
320 |     up_score = 0.0
321 |     down_score = 0.0
322 |     if theta1<=0.0:
323 |       left_score = 10.0
324 |     elif theta2<=0.0:
325 |       right_score = 10.0
326 |     else:
327 |       left_score = theta2/theta1
328 |       right_score = theta1/theta2
329 |     if theta3<=10.0 or theta4<=10.0:
330 |       up_score = 10.0
331 |     else:
332 |       up_score = max(theta1/theta3, theta2/theta4)
333 |     if theta5<=10.0 or theta6<=10.0:
334 |       down_score = 10.0
335 |     else:
336 |       down_score = max(theta7/theta5, theta8/theta6)
337 |     mleft = (landmark[0][0]+landmark[3][0])/2
338 |     mright = (landmark[1][0]+landmark[4][0])/2
339 |     box_center = ( (bbox[0]+bbox[2])/2,  (bbox[1]+bbox[3])/2 )
340 |     ret = 0
341 |     if left_score>=3.0:
342 |       ret = 1
343 |     if ret==0 and left_score>=2.0:
344 |       if mright<=box_center[0]:
345 |         ret = 1
346 |     if ret==0 and right_score>=3.0:
347 |       ret = 2
348 |     if ret==0 and right_score>=2.0:
349 |       if mleft>=box_center[0]:
350 |         ret = 2
351 |     if ret==0 and up_score>=2.0:
352 |       ret = 3
353 |     if ret==0 and down_score>=5.0:
354 |       ret = 4
355 |     return ret, left_score, right_score, up_score, down_score
356 | 
357 |   @staticmethod
358 |   def _filter_boxes(boxes, min_size):
359 |       """ Remove all boxes with any side smaller than min_size """
360 |       ws = boxes[:, 2] - boxes[:, 0] + 1
361 |       hs = boxes[:, 3] - boxes[:, 1] + 1
362 |       keep = np.where((ws >= min_size) & (hs >= min_size))[0]
363 |       return keep
364 | 
365 |   @staticmethod
366 |   def _filter_boxes2(boxes, max_size, min_size):
367 |       """ Remove all boxes with any side smaller than min_size """
368 |       ws = boxes[:, 2] - boxes[:, 0] + 1
369 |       hs = boxes[:, 3] - boxes[:, 1] + 1
370 |       if max_size>0:
371 |         keep = np.where( np.minimum(ws, hs)<max_size )[0]
372 |       elif min_size>0:
373 |         keep = np.where( np.maximum(ws, hs)>min_size )[0]
374 |       return keep
375 | 
376 |   @staticmethod
377 |   def _clip_pad(tensor, pad_shape):
378 |       """
379 |       Clip boxes of the pad area.
380 |       :param tensor: [n, c, H, W]
381 |       :param pad_shape: [h, w]
382 |       :return: [n, c, h, w]
383 |       """
384 |       H, W = tensor.shape[2:]
385 |       h, w = pad_shape
386 | 
387 |       if h < H or w < W:
388 |         tensor = tensor[:, :, :h, :w].copy()
389 | 
390 |       return tensor
391 | 
392 |   @staticmethod
393 |   def bbox_pred(boxes, box_deltas):
394 |       """
395 |       Transform the set of class-agnostic boxes into class-specific boxes
396 |       by applying the predicted offsets (box_deltas)
397 |       :param boxes: !important [N 4]
398 |       :param box_deltas: [N, 4 * num_classes]
399 |       :return: [N 4 * num_classes]
400 |       """
401 |       if boxes.shape[0] == 0:
402 |           return np.zeros((0, box_deltas.shape[1]))
403 | 
404 |       boxes = boxes.astype(np.float, copy=False)
405 |       widths = boxes[:, 2] - boxes[:, 0] + 1.0
406 |       heights = boxes[:, 3] - boxes[:, 1] + 1.0
407 |       ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
408 |       ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
409 | 
410 |       dx = box_deltas[:, 0:1]
411 |       dy = box_deltas[:, 1:2]
412 |       dw = box_deltas[:, 2:3]
413 |       dh = box_deltas[:, 3:4]
414 | 
415 |       pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
416 |       pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
417 |       pred_w = np.exp(dw) * widths[:, np.newaxis]
418 |       pred_h = np.exp(dh) * heights[:, np.newaxis]
419 | 
420 |       pred_boxes = np.zeros(box_deltas.shape)
421 |       # x1
422 |       pred_boxes[:, 0:1] = pred_ctr_x - 0.5 * (pred_w - 1.0)
423 |       # y1
424 |       pred_boxes[:, 1:2] = pred_ctr_y - 0.5 * (pred_h - 1.0)
425 |       # x2
426 |       pred_boxes[:, 2:3] = pred_ctr_x + 0.5 * (pred_w - 1.0)
427 |       # y2
428 |       pred_boxes[:, 3:4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
429 | 
430 |       if box_deltas.shape[1]>4:
431 |         pred_boxes[:,4:] = box_deltas[:,4:]
432 | 
433 |       return pred_boxes
434 | 
435 |   @staticmethod
436 |   def landmark_pred(boxes, landmark_deltas):
437 |       if boxes.shape[0] == 0:
438 |           return np.zeros((0, landmark_deltas.shape[1]))
439 |       boxes = boxes.astype(np.float, copy=False)
440 |       widths = boxes[:, 2] - boxes[:, 0] + 1.0
441 |       heights = boxes[:, 3] - boxes[:, 1] + 1.0
442 |       ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
443 |       ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
444 |       pred = landmark_deltas.copy()
445 |       for i in range(5):
446 |         pred[:,i,0] = landmark_deltas[:,i,0]*widths + ctr_x
447 |         pred[:,i,1] = landmark_deltas[:,i,1]*heights + ctr_y
448 |       return pred
449 | 
450 |   def bbox_vote(self, det):
451 |       if det.shape[0] == 0:
452 |           dets = np.array([[10, 10, 20, 20, 0.002]])
453 |           det = np.empty(shape=[0, 5])
454 |       while det.shape[0] > 0:
455 |           # IOU
456 |           area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
457 |           xx1 = np.maximum(det[0, 0], det[:, 0])
458 |           yy1 = np.maximum(det[0, 1], det[:, 1])
459 |           xx2 = np.minimum(det[0, 2], det[:, 2])
460 |           yy2 = np.minimum(det[0, 3], det[:, 3])
461 |           w = np.maximum(0.0, xx2 - xx1 + 1)
462 |           h = np.maximum(0.0, yy2 - yy1 + 1)
463 |           inter = w * h
464 |           o = inter / (area[0] + area[:] - inter)
465 | 
466 |           # nms
467 |           merge_index = np.where(o >= self.nms_threshold)[0]
468 |           det_accu = det[merge_index, :]
469 |           det = np.delete(det, merge_index, 0)
470 |           if merge_index.shape[0] <= 1:
471 |               if det.shape[0] == 0:
472 |                   try:
473 |                       dets = np.row_stack((dets, det_accu))
474 |                   except:
475 |                       dets = det_accu
476 |               continue
477 |           det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
478 |           max_score = np.max(det_accu[:, 4])
479 |           det_accu_sum = np.zeros((1, 5))
480 |           det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4],
481 |                                         axis=0) / np.sum(det_accu[:, -1:])
482 |           det_accu_sum[:, 4] = max_score
483 |           try:
484 |               dets = np.row_stack((dets, det_accu_sum))
485 |           except:
486 |               dets = det_accu_sum
487 |       dets = dets[0:750, :]
488 | 
489 |       return dets
490 | 
491 | 


--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
 1 | from keras import backend as K
 2 | import tensorflow as tf
 3 | import math
 4 | 
 5 | N_LANDMARK = 106
 6 | 
 7 | 
 8 | def normalized_mean_error(y_true, y_pred):
 9 |     '''
10 |     normalised mean error
11 |     '''
12 |     y_pred = K.reshape(y_pred, (-1, N_LANDMARK, 2))
13 |     y_true = K.reshape(y_true, (-1, N_LANDMARK, 2))
14 |     # Distance between pupils
15 |     interocular_distance = K.sqrt(
16 |         K.sum((y_true[:, 38, :] - y_true[:, 92, :]) ** 2, axis=-1))
17 |     return K.mean(K.sum(K.sqrt(K.sum((y_pred - y_true) ** 2, axis=-1)), axis=-1)) / \
18 |         K.mean((interocular_distance * N_LANDMARK))
19 | 
20 | 
21 | # def wing_loss(y_true, y_pred, w=10.0, epsilon=2.0):
22 | #     """
23 | #     Reference: wing loss for robust facial landmark localisation
24 | #     with convolutional neural networks
25 | #     """
26 | #     x = y_true - y_pred
27 | #     c = w * (1.0 - math.log(1.0 + w/epsilon))
28 | #     absolute_x = K.abs(x)
29 | #     losses = tf.where(
30 | #         K.greater(w, absolute_x),
31 | #         w * K.log(1.0 + absolute_x/epsilon),
32 | #         absolute_x - c
33 | #     )
34 | #     loss = K.mean(K.sum(losses, axis=-1), axis=0)
35 | 
36 | #     return loss
37 | 
38 | def wing_loss(y_true, y_pred, w=10.0, epsilon=2.0):
39 |     """
40 |     Arguments:
41 |         landmarks, labels: float tensors with shape [batch_size, num_landmarks, 2].
42 |         w, epsilon: a float numbers.
43 |     Returns:
44 |         a float tensor with shape [].
45 |     """
46 |     y_true = tf.reshape(y_true, [-1, N_LANDMARK, 2])
47 |     y_pred = tf.reshape(y_pred, [-1, N_LANDMARK, 2])
48 | 
49 |     x = y_true - y_pred
50 |     c = w * (1.0 - math.log(1.0 + w / epsilon))
51 |     absolute_x = tf.abs(x)
52 |     losses = tf.where(
53 |         tf.greater(w, absolute_x),
54 |         w * tf.log(1.0 + absolute_x/epsilon),
55 |         absolute_x - c
56 |     )
57 |     loss = tf.reduce_mean(tf.reduce_sum(losses, axis=[1, 2]), axis=0)
58 | 
59 |     return loss
60 | 
61 | 
62 | def smoothL1(y_true, y_pred):
63 |     """
64 |     More robust to noise
65 |     """
66 |     THRESHOLD = K.variable(1.0)
67 |     mae = K.abs(y_true - y_pred)
68 |     flag = K.greater(mae, THRESHOLD)
69 |     loss = K.mean(K.switch(flag, (mae - 0.5), K.pow(mae, 2)), axis=-1)
70 | 
71 |     return loss
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | # # PFLD: A Practical Facial Landmark Detector
  2 | 
  3 | # import sys
  4 | # import time
  5 | 
  6 | # from keras.models import Model
  7 | # from keras.layers import *
  8 | # from keras import backend as K
  9 | # from keras.utils.vis_utils import plot_model
 10 | # from keras.utils import vis_utils
 11 | 
 12 | 
 13 | # def _conv_block(inputs, filters, kernel, strides, dilation_rate=1, padding='same'):
 14 | #     """Convolution Block
 15 | #     This function defines a 2D convolution operation with BN and relu6.
 16 | #     # Arguments
 17 | #         inputs: Tensor, input tensor of conv layer.
 18 | #         filters: Integer, the dimensionality of the output space.
 19 | #         kernel: An integer or tuple/list of 2 integers, specifying the
 20 | #             width and height of the 2D convolution window.
 21 | #         strides: An integer or tuple/list of 2 integers,
 22 | #             specifying the strides of the convolution along the width and height.
 23 | #             Can be a single integer to specify the same value for
 24 | #             all spatial dimensions.
 25 | #     # Returns
 26 | #         Output tensor.
 27 | #     """
 28 | #     channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
 29 | #     x = Conv2D(filters, kernel, padding=padding, strides=strides,
 30 | #                dilation_rate=dilation_rate)(inputs)
 31 | #     x = BatchNormalization(axis=channel_axis)(x)
 32 | 
 33 | #     return Activation('relu')(x)
 34 | 
 35 | 
 36 | # def _depthwise_block(inputs, kernel, strides, padding='same'):
 37 | #     '''Depthwise separable 2D convolution block'''
 38 | 
 39 | #     assert isinstance(kernel, (tuple, int))
 40 | #     assert isinstance(strides, (tuple, int))
 41 | 
 42 | #     channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
 43 | #     x = DepthwiseConv2D(kernel_size=kernel, strides=strides,
 44 | #                         depth_multiplier=1, padding=padding)(inputs)
 45 | #     x = BatchNormalization(axis=channel_axis)(x)
 46 | 
 47 | #     return Activation('relu')(x)
 48 | 
 49 | 
 50 | # def _bottleneck(inputs, filters, kernel, t, s, alpha, r=False):
 51 | #     """Bottleneck
 52 | #     This function defines a basic bottleneck structure.
 53 | #     # Arguments
 54 | #         inputs: Tensor, input tensor of conv layer.
 55 | #         filters: Integer, the dimensionality of the output space.
 56 | #         kernel: An integer or tuple/list of 2 integers, specifying the
 57 | #             width and height of the 2D convolution window.
 58 | #         t: Integer, expansion factor.
 59 | #             t is always applied to the input size.
 60 | #         s: An integer or tuple/list of 2 integers,specifying the strides
 61 | #             of the convolution along the width and height.Can be a single
 62 | #             integer to specify the same value for all spatial dimensions.
 63 | #         r: Boolean, Whether to use the residuals.
 64 | #     # Returns
 65 | #         Output tensor.
 66 | #     """
 67 | 
 68 | #     channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
 69 | #     tchannel = K.int_shape(inputs)[channel_axis] * t
 70 | #     filters = _make_divisible(filters * alpha)
 71 | 
 72 | #     x = _conv_block(inputs, tchannel, (1, 1), (1, 1))
 73 | 
 74 | #     x = DepthwiseConv2D(kernel, strides=(
 75 | #         s, s), depth_multiplier=1, padding='same')(x)
 76 | #     x = BatchNormalization(axis=channel_axis)(x)
 77 | #     x = ReLU(max_value=6)(x)
 78 | 
 79 | #     x = Conv2D(filters, (1, 1), strides=(1, 1), padding='same')(x)
 80 | #     x = BatchNormalization(axis=channel_axis)(x)
 81 | 
 82 | #     if r:
 83 | #         x = add([x, inputs])
 84 | #     return x
 85 | 
 86 | 
 87 | # def _inverted_residual_block(inputs, filters, kernel, t, strides, n, alpha=1):
 88 | #     """Inverted Residual Block
 89 | #     This function defines a sequence of 1 or more identical layers.
 90 | #     # Arguments
 91 | #         inputs: Tensor, input tensor of conv layer.
 92 | #         filters: Integer, the dimensionality of the output space.
 93 | #         kernel: An integer or tuple/list of 2 integers, specifying the
 94 | #             width and height of the 2D convolution window.
 95 | #         t: Integer, expansion factor.
 96 | #             t is always applied to the input size.
 97 | #         s: An integer or tuple/list of 2 integers,specifying the strides
 98 | #             of the convolution along the width and height.Can be a single
 99 | #             integer to specify the same value for all spatial dimensions.
100 | #         n: Integer, layer repeat times.
101 | #     # Returns
102 | #         Output tensor.
103 | #     """
104 | #     x = _bottleneck(inputs, filters, kernel, t, strides, alpha=alpha)
105 | 
106 | #     for i in range(1, n):
107 | #         x = _bottleneck(x, filters, kernel, t, 1, alpha=alpha, r=True)
108 | 
109 | #     return x
110 | 
111 | # # https://github.com/titu1994/MobileNetworks/blob/master/mobilenets.py
112 | 
113 | 
114 | # def _make_divisible(v, divisor=8, min_value=8):
115 | #     if min_value is None:
116 | #         min_value = divisor
117 | 
118 | #     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
119 | #     # Make sure that round down does not go down by more than 10%.
120 | #     if new_v < 0.9 * v:
121 | #         new_v += divisor
122 | #     return new_v
123 | 
124 | 
125 | # def PFLDNetBackbone(input_shape, output_nodes, alpha=1):
126 | #     """
127 | #     This function defines a PFLDNet architectures.
128 | #     # Arguments
129 | #         input_shape: An integer or tuple/list of 3 integers, shape
130 | #             of input tensor.
131 | #         output_nodes: Integer, number of classes.
132 | #         alpha: width parameter.
133 | #     # Returns
134 | #         PFLDNet model.
135 | #     """
136 | 
137 | #     inputs = Input(shape=input_shape)
138 | #     # https://mp.weixin.qq.com/s/0oMqwQn2UlYYk557sbPBsQ
139 | #     x = ZeroPadding2D(padding=(1, 1))(inputs)
140 | #     x = _conv_block(x, 64, (3, 3), strides=1, dilation_rate=2)
141 | #     x = _depthwise_block(x, (3, 3), strides=2)
142 | #     s1_b = _inverted_residual_block(
143 | #         x, 64, (3, 3), t=2, strides=2, n=5, alpha=alpha)
144 | #     x = _inverted_residual_block(
145 | #         s1_b, 128, (3, 3), t=2, strides=2, n=1, alpha=alpha)
146 | #     x = _inverted_residual_block(
147 | #         x, 128, (3, 3), t=4, strides=1, n=6, alpha=alpha)
148 | #     s1 = _inverted_residual_block(
149 | #         x, 256, (3, 3), t=2, strides=1, n=1, alpha=alpha)
150 | #     s2 = _conv_block(s1, 256, (3, 3), strides=1, dilation_rate=2)
151 | #     s3 = _conv_block(s2, 256, (3, 3), strides=1, dilation_rate=2)
152 | 
153 | #     # 106 Landmarks branch
154 | #     # t1_g = Flatten()(s1)
155 | #     # t2_g = Flatten()(s2)
156 | #     # t3_g = Flatten()(s3)
157 | #     # t1_212 = Dense(units=output_nodes, name='b1_s1')(t1_g)
158 | #     # t2_212 = Dense(units=output_nodes, name='b1_s2')(t1_g)
159 | #     # t3_212 = Dense(units=output_nodes, name='b1_s3')(t1_g)
160 | #     # t1_out = Add(name='b1_s')([t1_212, t2_212, t3_212])
161 | #     t1_g = GlobalAveragePooling2D()(s1)
162 | #     t2_g = GlobalAveragePooling2D()(s2)
163 | #     t3_g = GlobalAveragePooling2D()(s3)
164 | #     concat = Concatenate()([t1_g, t2_g, t3_g])
165 | #     t1_out = Dense(units=output_nodes, name='b1_s')(concat)
166 | 
167 | #     # Pose branch
168 | #     v1 = _conv_block(s1_b, 128, (3, 3), strides=2)
169 | #     v2 = _conv_block(v1, 128, (3, 3), strides=1)
170 | #     v3 = _conv_block(v2, 32, (3, 3), strides=2)
171 | #     v4 = _conv_block(v3, 128, (7, 7), strides=1, padding='valid')
172 | #     t2_out = Dense(units=3, name='b2_s')(Flatten()(v4))
173 | 
174 | #     # TODO angle...
175 | 
176 | #     # Merge branch
177 | #     model = Model(inputs, [t1_out, t2_out])
178 | 
179 | #     return model
180 | 
181 | 
182 | # if __name__ == '__main__':
183 | 
184 | #     # Testing designed network
185 | #     model = PFLDNetBackbone((112, 112, 3), 212, alpha=1.0)
186 | #     vis = True
187 | 
188 | #     if vis:
189 | #         model.summary()
190 | #         # plot_model(model, to_file='PFLDNet.png', show_shapes=True)
191 | 
192 | #     # inputs = np.random.randn(1, 112, 112, 3)
193 | 
194 | #     # for i in range(100):
195 | #     #     start = time.time()
196 | #     #     model.predict(inputs, batch_size=1)
197 | #     #     print("[info] time use {}".format(time.time() - start))
198 | 
199 | 
200 | """MobileNet v3 small models for Keras.
201 | # Reference
202 |     [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244?context=cs)
203 | """
204 | from keras import backend as K
205 | from keras.layers import *
206 | from keras.models import Model
207 | from keras.utils.generic_utils import get_custom_objects
208 | 
209 | 
210 | def hard_swish(x):
211 |     return x * K.relu(x + 3.0, max_value=6.0) / 6.0
212 | 
213 | 
214 | def relu6(x):
215 |     return K.relu(x, max_value=6.0)
216 | 
217 | # Custom activation function
218 | get_custom_objects().update({'hard_swish': Activation(hard_swish)})
219 | get_custom_objects().update({'relu6': Activation(relu6)})
220 | 
221 | 
222 | class MobileNetBase:
223 |     def __init__(self, shape, n_class):
224 |         self.shape = shape
225 |         self.n_class = n_class
226 | 
227 |     # def _relu6(self, x):
228 |     #     """Relu 6
229 |     #     """
230 |     #     return K.relu(x, max_value=6.0)
231 | 
232 |     # def _hard_swish(self, x):
233 |     #     """Hard swish
234 |     #     """
235 |     #     return x * K.relu(x + 3.0, max_value=6.0) / 6.0
236 | 
237 |     def _return_activation(self, x, nl):
238 |         """Convolution Block
239 |         This function defines a activation choice.
240 | 
241 |         # Arguments
242 |             x: Tensor, input tensor of conv layer.
243 |             nl: String, nonlinearity activation type.
244 | 
245 |         # Returns
246 |             Output tensor.
247 |         """
248 |         if nl == 'HS':
249 |             x = Activation(hard_swish)(x)
250 |         if nl == 'RE':
251 |             x = Activation(relu6)(x)
252 | 
253 |         return x
254 | 
255 |     def _conv_block(self, inputs, filters, kernel, strides, nl):
256 |         """Convolution Block
257 |         This function defines a 2D convolution operation with BN and activation.
258 | 
259 |         # Arguments
260 |             inputs: Tensor, input tensor of conv layer.
261 |             filters: Integer, the dimensionality of the output space.
262 |             kernel: An integer or tuple/list of 2 integers, specifying the
263 |                 width and height of the 2D convolution window.
264 |             strides: An integer or tuple/list of 2 integers,
265 |                 specifying the strides of the convolution along the width and height.
266 |                 Can be a single integer to specify the same value for
267 |                 all spatial dimensions.
268 |             nl: String, nonlinearity activation type.
269 | 
270 |         # Returns
271 |             Output tensor.
272 |         """
273 | 
274 |         channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
275 | 
276 |         x = Conv2D(filters, kernel, padding='same', strides=strides)(inputs)
277 |         x = BatchNormalization(axis=channel_axis)(x)
278 | 
279 |         return self._return_activation(x, nl)
280 | 
281 |     def _squeeze(self, inputs):
282 |         """Squeeze and Excitation.
283 |         This function defines a squeeze structure.
284 | 
285 |         # Arguments
286 |             inputs: Tensor, input tensor of conv layer.
287 |         """
288 |         # input_channels = int(inputs.shape[-1])
289 |         input_channels = inputs._keras_shape[-1]
290 | 
291 |         x = GlobalAveragePooling2D()(inputs)
292 |         x = Dense(int(input_channels/4), activation='relu')(x)
293 |         x = Dense(input_channels, activation='hard_sigmoid')(x)
294 |         x = Reshape((1, 1, -1))(x)
295 |         x = multiply([inputs, x])
296 | 
297 |         return x
298 | 
299 |     def _bottleneck(self, inputs, filters, kernel, e, s, squeeze, nl):
300 |         """Bottleneck
301 |         This function defines a basic bottleneck structure.
302 | 
303 |         # Arguments
304 |             inputs: Tensor, input tensor of conv layer.
305 |             filters: Integer, the dimensionality of the output space.
306 |             kernel: An integer or tuple/list of 2 integers, specifying the
307 |                 width and height of the 2D convolution window.
308 |             e: Integer, expansion factor.
309 |                 t is always applied to the input size.
310 |             s: An integer or tuple/list of 2 integers,specifying the strides
311 |                 of the convolution along the width and height.Can be a single
312 |                 integer to specify the same value for all spatial dimensions.
313 |             squeeze: Boolean, Whether to use the squeeze.
314 |             nl: String, nonlinearity activation type.
315 | 
316 |         # Returns
317 |             Output tensor.
318 |         """
319 | 
320 |         channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
321 |         input_shape = K.int_shape(inputs)
322 |         tchannel = e
323 |         r = s == 1 and input_shape[3] == filters
324 |         x = self._conv_block(inputs, tchannel, (1, 1), (1, 1), nl)
325 | 
326 |         x = DepthwiseConv2D(kernel, strides=(
327 |             s, s), depth_multiplier=1, padding='same')(x)
328 |         x = BatchNormalization(axis=channel_axis)(x)
329 | 
330 |         if squeeze:
331 |             # x = Lambda(lambda x: x * self._squeeze(x))(x)
332 |             x = self._squeeze(x)
333 | 
334 |         x = self._return_activation(x, nl)
335 | 
336 |         x = Conv2D(filters, (1, 1), strides=(1, 1), padding='same')(x)
337 |         x = BatchNormalization(axis=channel_axis)(x)
338 | 
339 |         if r:
340 |             x = Add()([x, inputs])
341 | 
342 |         return x
343 | 
344 |     def build(self):
345 |         raise NotImplementedError
346 | 
347 | 
348 | class MobileNetV3(MobileNetBase):
349 |     def __init__(self, shape, n_class):
350 |         """Init.
351 | 
352 |         # Arguments
353 |             input_shape: An integer or tuple/list of 3 integers, shape
354 |                 of input tensor.
355 |             n_class: Integer, number of classes.
356 | 
357 |         # Returns
358 |             MobileNetv3 model.
359 |         """
360 |         super(MobileNetV3, self).__init__(shape, n_class)
361 | 
362 |     def build(self):
363 |         """build MobileNetV3 Small.
364 | 
365 |         # Arguments
366 |             plot: Boolean, weather to plot model.
367 | 
368 |         # Returns
369 |             model: Model, model.
370 |         """
371 |         inputs = Input(shape=self.shape)
372 | 
373 |         x = self._conv_block(inputs, 16, (3, 3), strides=(2, 2), nl='HS')
374 | 
375 |         # x = self._bottleneck(x, 16, (3, 3), e=16, s=2, squeeze=True, nl='RE')
376 |         # x = self._bottleneck(x, 24, (3, 3), e=72, s=2, squeeze=False, nl='RE')
377 |         # x = self._bottleneck(x, 24, (3, 3), e=88, s=1, squeeze=False, nl='RE')
378 |         # x = self._bottleneck(x, 40, (5, 5), e=96, s=2, squeeze=True, nl='HS')
379 |         # x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS')
380 |         # x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS')
381 |         # x = self._bottleneck(x, 48, (5, 5), e=120, s=1, squeeze=True, nl='HS')
382 |         # x = self._bottleneck(x, 48, (5, 5), e=144, s=1, squeeze=True, nl='HS')
383 |         # x = self._bottleneck(x, 96, (5, 5), e=288, s=2, squeeze=True, nl='HS')
384 |         # x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS')
385 |         # x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS')
386 |         x = self._bottleneck(x, 16, (3, 3), e=16, s=1, squeeze=False, nl='RE')
387 |         x = self._bottleneck(x, 24, (3, 3), e=64, s=2, squeeze=False, nl='RE')
388 |         x = self._bottleneck(x, 24, (3, 3), e=72, s=1, squeeze=False, nl='RE')
389 |         x = self._bottleneck(x, 40, (5, 5), e=72, s=2, squeeze=True, nl='RE')
390 |         x = self._bottleneck(x, 40, (5, 5), e=120, s=1, squeeze=True, nl='RE')
391 |         x = self._bottleneck(x, 40, (5, 5), e=120, s=1, squeeze=True, nl='RE')
392 |         x = self._bottleneck(x, 80, (3, 3), e=240, s=2, squeeze=False, nl='HS')
393 |         x = self._bottleneck(x, 80, (3, 3), e=200, s=1, squeeze=False, nl='HS')
394 |         x = self._bottleneck(x, 80, (3, 3), e=184, s=1, squeeze=False, nl='HS')
395 |         x = self._bottleneck(x, 80, (3, 3), e=184, s=1, squeeze=False, nl='HS')
396 |         x = self._bottleneck(x, 112, (3, 3), e=480, s=1, squeeze=True, nl='HS')
397 |         x = self._bottleneck(x, 112, (3, 3), e=672, s=1, squeeze=True, nl='HS')
398 |         x = self._bottleneck(x, 160, (5, 5), e=672, s=2, squeeze=True, nl='HS')
399 |         x = self._bottleneck(x, 160, (5, 5), e=960, s=1, squeeze=True, nl='HS')
400 |         x = self._bottleneck(x, 160, (5, 5), e=960, s=1, squeeze=True, nl='HS')
401 | 
402 |         # x = self._conv_block(x, 576, (1, 1), strides=(1, 1), nl='HS')
403 |         # x = GlobalAveragePooling2D()(x)
404 |         # x = Reshape((1, 1, 576))(x)
405 |         x = self._conv_block(x, 960, (1, 1), strides=(1, 1), nl='HS')
406 |         x = GlobalAveragePooling2D()(x)
407 |         x = Reshape((1, 1, 960))(x)
408 | 
409 |         x = Conv2D(1280, (1, 1), padding='same')(x)
410 |         t1_0 = self._return_activation(x, 'HS')
411 |         t1_1 = Conv2D(self.n_class, (1, 1), padding='same')(t1_0)
412 |         t1_out = Reshape((self.n_class,), name='b1_s')(t1_1)
413 | 
414 |         t2 = Conv2D(3, (1, 1), padding='same')(t1_0)
415 |         t2_out = Reshape((3,), name='b2_s')(t2)
416 |         # Merge branch
417 |         model = Model(inputs, [t1_out, t2_out])
418 | 
419 |         return model
420 | 
421 | 
422 | if __name__ == "__main__":
423 | 
424 |     model = MobileNetV3((112, 112, 3), 202).build()
425 |     model.summary()
426 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | # from model import PFLDNetBackbone
 3 | from model import MobileNetV3
 4 | from data_generator import DataGenerator
 5 | from loss import *
 6 | from keras.optimizers import Adam
 7 | from keras import callbacks
 8 | from keras import backend as K
 9 | import os
10 | import numpy as np
11 | 
12 | import tensorflow as tf
13 | tf.logging.set_verbosity(tf.logging.ERROR)
14 | 
15 | ap = argparse.ArgumentParser()
16 | ap.add_argument("--batch_size", type=int, default=128,
17 |                 help="batch size of data")
18 | # ap.add_argument("--alpha", type=float, default=1.0,
19 | #                 help="control width parameter of of MobileNet blocks")
20 | ap.add_argument("--lr", type=float, default=1e-3,
21 |                 help="learning rate")
22 | ap.add_argument("--checkpoints", type=str, default="./checkpoints/pfld.h5",
23 |                 help="checkpoint path")
24 | ap.add_argument("--fine_tune_path", type=str, default="./checkpoints/pfld.h5",
25 |                 help="fine tune checkpoint path")
26 | ap.add_argument('--fine_tune', action='store_true', help='fine tune or not')
27 | ap.add_argument('--epochs', type=int,
28 |                 default=100, help='epoch of training')
29 | ap.add_argument('--workers', type=int,
30 |                 default=4, help='how many workers')
31 | args = vars(ap.parse_args())
32 | 
33 | 
34 | class PolyDecay:
35 |     '''
36 |     Exponential decay strategy implementation
37 |     '''
38 | 
39 |     def __init__(self, initial_lr, power, n_epochs):
40 |         self.initial_lr = initial_lr
41 |         self.power = power
42 |         self.n_epochs = n_epochs
43 | 
44 |     def scheduler(self, epoch):
45 |         return self.initial_lr * np.power(1.0 - 1.0 * epoch / self.n_epochs, self.power)
46 | 
47 | 
48 | if __name__ == '__main__':
49 | 
50 |     # Set GPU variable
51 |     config = tf.ConfigProto()
52 |     config.gpu_options.allow_growth = True
53 |     session = tf.Session(config=config)
54 | 
55 |     train_generator = DataGenerator(
56 |         batch_size=args['batch_size'], root_dir='./new_dataset', csv_file='./new_dataset/face_mixed.csv',
57 |         shuffle=True, transformer=True)
58 |     val_generator = DataGenerator(
59 |         batch_size=args['batch_size'], root_dir='./new_test_dataset', csv_file='./new_test_dataset/face_mixed.csv')
60 | 
61 |     # model = PFLDNetBackbone(input_shape=(112, 112, 3),
62 |     #                         output_nodes=212, alpha=args['alpha'])
63 |     model = MobileNetV3(shape=(112, 112, 3), n_class=212).build()
64 | 
65 |     if args['fine_tune']:
66 |         model.load_weights(args['fine_tune_path'], by_name=True)
67 | 
68 |     # https://blog.csdn.net/laolu1573/article/details/83626555
69 |     # we can samply set 'b2_s' in loss_weights to 0...
70 |     model.compile(loss={'b1_s': wing_loss, 'b2_s': smoothL1}, loss_weights={'b1_s': 2, 'b2_s': 1},
71 |                   optimizer=Adam(lr=args['lr']),
72 |                   metrics={'b1_s': normalized_mean_error, 'b2_s': 'mae'})
73 | 
74 |     if not os.path.exists("checkpoints"):
75 |         os.mkdir("checkpoints")
76 | 
77 |     filepath = "./checkpoints/{epoch:02d}-{val_loss:.5f}.h5"
78 |     tensorboard = callbacks.TensorBoard(log_dir='./checkpoints/logs')
79 |     checkpoint = callbacks.ModelCheckpoint(
80 |         filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
81 |     lr_decay = callbacks.LearningRateScheduler(
82 |         PolyDecay(args['lr'], 0.9, args['epochs']).scheduler)
83 |     callbacks_list = [checkpoint, tensorboard, lr_decay]
84 | 
85 |     model.fit_generator(
86 |         train_generator,
87 |         len(train_generator),
88 |         validation_data=val_generator,
89 |         validation_steps=len(val_generator),
90 |         epochs=args['epochs'],
91 |         verbose=1,
92 |         callbacks=callbacks_list,
93 |         use_multiprocessing=True,
94 |         workers=args['workers']
95 |     )
96 |     model.save(args['checkpoints'])
97 | 
98 |     K.clear_session()
99 | 


--------------------------------------------------------------------------------
/transformer.py:
--------------------------------------------------------------------------------
  1 | # This algorithm is limited to algorithm verification
  2 | 
  3 | import argparse
  4 | import cv2
  5 | import os
  6 | import numpy as np
  7 | import pandas as pd
  8 | import sys
  9 | from tqdm import tqdm
 10 | from skimage import transform
 11 | from pprint import pprint
 12 | 
 13 | from mtcnn.mtcnn import MTCNN
 14 | 
 15 | import tensorflow as tf
 16 | tf.logging.set_verbosity(tf.logging.ERROR)
 17 | 
 18 | # from common.landmark_utils import LandmarkImageCrop
 19 | # from common.landmark_helper import LandmarkHelper
 20 | 
 21 | ap = argparse.ArgumentParser()
 22 | ap.add_argument("-l", "--landmark_txt", type=str, default='./new_dataset/landmarks.txt',
 23 |                 help="path to landmarks txt")
 24 | ap.add_argument("-c", "--landmark_csv", type=str, default='./new_dataset/face_landmarks.csv',
 25 |                 help="exist landmarks csv")
 26 | ap.add_argument("-b", "--base_dir", type=str, default='./new_dataset',
 27 |                 help="base dataset dir")
 28 | ap.add_argument("-s", "--output_size", type=int, default=112,
 29 |                 help="output image size")
 30 | ap.add_argument("-n", "--new_path", type=str, default='./align_new_dataset',
 31 |                 help="new save image file")
 32 | args = vars(ap.parse_args())
 33 | 
 34 | 
 35 | REFERENCE_FACIAL_POINTS = [[38.453125, 28.139446],
 36 |                            [70.8962, 27.549734],
 37 |                            [54.171013, 50.283226]]
 38 | 
 39 | 
 40 | # def scale_and_shift(image, landmarks, scale_range, output_size):
 41 | #     '''
 42 | #     Auto generate bbox and then random to scale and shift it.
 43 | #     Args:
 44 | #         image: a numpy type
 45 | #         landmarks: face landmarks with format [(x1, y1), ...]. range is 0-w or h in int
 46 | #         scale_range: scale bbox in (min, max). eg: (1.3, 1.5)
 47 | #         output_size: output size of image
 48 | #     Returns:
 49 | #         an image and landmarks will be returned
 50 | #     Raises:
 51 | #         No
 52 | #     '''
 53 | #     (x1, y1, x2, y2), new_size, need_pad, (p_x, p_y, p_w, p_h) = LandmarkImageCrop.get_bbox_of_landmarks(
 54 | #         image, landmarks, scale_range, shift_rate=0.3)
 55 | #     box_image = image[y1:y2, x1:x2]
 56 | #     if need_pad:
 57 | #         box_image = np.lib.pad(
 58 | #             box_image, ((p_y, p_h), (p_x, p_w), (0, 0)), 'constant')
 59 | #     box_image = cv2.resize(box_image, (output_size, output_size))
 60 | #     landmarks = (landmarks - (x1 - p_x, y1 - p_y))
 61 | 
 62 | #     return box_image, landmarks
 63 | 
 64 | class FaceAlign(object):
 65 |     '''Align face with MTCNN'''
 66 | 
 67 |     def __init__(self, out_size):
 68 |         self.detector = MTCNN()
 69 |         self.out_size = out_size
 70 | 
 71 |     def face_aligned_mtcnn(self, im):
 72 |         '''
 73 |         Function: Alignment with MTCNN Prior box
 74 |         im: BGR image array
 75 |         '''
 76 |         try:
 77 |             wrapper = self.detector.detect_faces(im[:, :, ::-1])[0]
 78 |         except:
 79 |             raise ValueError("No face...")
 80 | 
 81 |         points = wrapper['keypoints']
 82 |         values = list(points.values())
 83 |         gt_array = np.array(values).reshape((-1, 2))[:2]
 84 |         ref_array = np.array(REFERENCE_FACIAL_POINTS[:2], dtype=np.float32)
 85 | 
 86 |         tform = transform.SimilarityTransform()
 87 |         tform.estimate(gt_array, ref_array)
 88 |         tfm = tform.params[0: 2, :]
 89 | 
 90 |         return cv2.warpAffine(
 91 |             im, tfm, (self.out_size, self.out_size))
 92 | 
 93 |     def face_aligned(self, im, ldmarks):
 94 |         '''
 95 |         im: BGR array
 96 |         ldmarks: [(x0, y0), ...]
 97 |         '''
 98 |         gt_array = np.array(ldmarks)[:2]
 99 |         ref_array = np.array(REFERENCE_FACIAL_POINTS[:2], dtype=np.float32)
100 | 
101 |         tform = transform.SimilarityTransform()
102 |         tform.estimate(gt_array, ref_array)
103 |         tfm = tform.params[0: 2, :]
104 | 
105 |         return cv2.warpAffine(
106 |             im, tfm, (self.out_size, self.out_size)), tform
107 | 
108 | 
109 | if __name__ == '__main__':
110 | 
111 |     # with open('./dataset/landmarks.txt') as f:
112 | 
113 |     #     samples_list = []
114 | 
115 |     #     for line in f.readlines():
116 |     #         # Parse txt file
117 |     #         img_path, landmarks = LandmarkHelper.parse(line)
118 |     #         image_path = os.path.join("./dataset", img_path)
119 | 
120 |     #         im = cv2.imread(image_path)
121 |     #         image, landmarks = scale_and_shift(
122 |     #             im, landmarks, scale_range=(1.1, 1.5), output_size=112)
123 | 
124 |     #         cv2.imshow("image", image)
125 |     #         cv2.waitKey(0)
126 | 
127 |     if not os.path.exists(args['new_path']):
128 |         os.mkdir(args['new_path'])
129 | 
130 |     root_dir = args['base_dir']
131 |     df = pd.read_csv(args['landmark_csv'], header=None)
132 | 
133 |     ldmarks = np.array(df.iloc[:, 1:])
134 |     ldmarks = ldmarks.reshape((-1, 106, 2)) * \
135 |         (args['output_size'], args['output_size'])
136 | 
137 |     ref_leftpupil = np.mean(ldmarks[:, 34], axis=0)
138 |     ref_rightpupil = np.mean(ldmarks[:, 92], axis=0)
139 |     ref_nose = np.mean(ldmarks[:, 86], axis=0)
140 |     ref_array = np.stack(
141 |         [ref_leftpupil, ref_rightpupil, ref_nose], axis=0).astype(np.float32)
142 | 
143 |     boxes = np.empty(
144 |         (df.shape[0], args['output_size'], args['output_size'], 3), dtype=np.uint8)
145 |     landmarks = np.empty((df.shape[0], 212))
146 | 
147 |     for idx in tqdm(range(df.shape[0])):
148 | 
149 |         im = cv2.imread(os.path.join(root_dir, df.iloc[idx, 0]))
150 |         im = cv2.resize(im, (args['output_size'], args['output_size']))
151 |         gt_ldmarks = ldmarks[idx]
152 | 
153 |         gt = np.array(df.iloc[idx, 1:], dtype=np.float32).reshape(
154 |             (-1, 2)) * (args['output_size'], args['output_size'])
155 |         gt_leftpupil = gt[34]
156 |         gt_rightpupil = gt[92]
157 |         gt_nose = gt[86]
158 |         gt_array = np.stack(
159 |             [gt_leftpupil, gt_rightpupil, gt_nose], axis=0).astype(np.float32)
160 | 
161 |         # M = cv2.getAffineTransform(gt_array, ref_array)
162 |         # Similar transformation
163 |         tform = transform.SimilarityTransform()
164 |         tform.estimate(gt_array, ref_array)
165 |         tfm = tform.params[0: 2, :]
166 |         dst = cv2.warpAffine(
167 |             im, tfm, (args['output_size'], args['output_size']))
168 | 
169 |         b = np.ones((gt_ldmarks.shape[0], 1))
170 |         d = np.concatenate((gt_ldmarks, b), axis=1)
171 |         gt_ldmarks = np.dot(d, np.transpose(tfm))
172 | 
173 |         boxes[idx] = dst
174 |         landmarks[idx] = (gt_ldmarks / (args['output_size'])).flatten()
175 | 
176 |         # for ldmark in gt_ldmarks:
177 |         #     cv2.circle(
178 |         #         dst, (int(ldmark[0]), int(ldmark[1])), 2, (255, 0, 0), -1)
179 |         # cv2.imshow("image", dst)
180 |         # cv2.waitKey(0)
181 | 
182 |     # Save image and new landmarks
183 |     ldmark_dict = dict()
184 | 
185 |     for box, ldmark, num in tqdm(zip(boxes, landmarks, np.arange(df.shape[0]))):
186 |         cv2.imwrite("{}.png".format(
187 |             os.path.join(args['new_path'], str(num).zfill(5))), box)
188 |         ldmark_dict["{}.png".format(str(num).zfill(5))] = ldmark
189 | 
190 |     df = pd.DataFrame(ldmark_dict).T
191 |     df.to_csv("{}/face_landmarks.csv".format(args['new_path']),
192 |               encoding="utf-8", header=None)
193 | 
194 |     pprint("Complete conversion!!!")
195 | 


--------------------------------------------------------------------------------