├── .gitignore ├── LICENSE ├── README.md ├── datasets ├── __init__.py └── landmark.py ├── demo.py ├── detector.py ├── eval.py ├── face_onnx ├── __init__.py ├── detector.py └── version-RFB-320.onnx ├── images ├── ab.png ├── dlrb.gif ├── hg.png ├── hxm.png ├── samples │ └── lt.jfif └── ym.png ├── make_json.py ├── misc ├── __init__.py ├── prepare_300wlp.py └── view.py ├── mnn_detector.py ├── models ├── __init__.py ├── mobilenet.py ├── pose.py └── slim.py ├── onnx_detector.py ├── pretrained_weights ├── slim_160_latest.mnn ├── slim_160_latest.onnx └── slim_160_latest.pth ├── recon_dataset.py ├── tracker.py ├── train.py ├── utils ├── __init__.py ├── augmentation.py ├── consoler.py ├── headpose.py ├── onnx_util.py ├── turbo │ ├── TurboJPEG.py │ ├── __init__.py │ ├── libturbojpeg.so │ ├── turbo.py │ └── turbojpeg.dll ├── visual_augmentation.py └── wing_loss.py └── weights └── empty /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | bin 8 | lib 9 | include 10 | pip-* 11 | 12 | # C extensions 13 | 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *,cover 53 | .hypothesis/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # dotenv 89 | .env 90 | 91 | # virtualenv 92 | .venv 93 | venv/ 94 | ENV/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | ### JetBrains template 102 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 103 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 104 | 105 | # User-specific stuff: 106 | .idea/**/workspace.xml 107 | .idea/**/tasks.xml 108 | .idea/dictionaries 109 | 110 | # Sensitive or high-churn files: 111 | .idea/**/dataSources/ 112 | .idea/**/dataSources.ids 113 | .idea/**/dataSources.xml 114 | .idea/**/dataSources.local.xml 115 | .idea/**/sqlDataSources.xml 116 | .idea/**/dynamic.xml 117 | .idea/**/uiDesigner.xml 118 | 119 | # Gradle: 120 | .idea/**/gradle.xml 121 | .idea/**/libraries 122 | 123 | # Mongo Explorer plugin: 124 | .idea/**/mongoSettings.xml 125 | 126 | ## File-based project format: 127 | *.iws 128 | 129 | ## Plugin-specific files: 130 | 131 | # IntelliJ 132 | /out/ 133 | 134 | # mpeltonen/sbt-idea plugin 135 | .idea_modules/ 136 | 137 | # JIRA plugin 138 | atlassian-ide-plugin.xml 139 | 140 | # Crashlytics plugin (for Android Studio and IntelliJ) 141 | com_crashlytics_export_strings.xml 142 | crashlytics.properties 143 | crashlytics-build.properties 144 | fabric.properties 145 | ### macOS template 146 | *.DS_Store 147 | .AppleDouble 148 | .LSOverride 149 | 150 | # Icon must end with two \r 151 | Icon 152 | 153 | 154 | # Thumbnails 155 | ._* 156 | 157 | # Files that might appear in the root of a volume 158 | .DocumentRevisions-V100 159 | .fseventsd 160 | .Spotlight-V100 161 | .TemporaryItems 162 | .Trashes 163 | .VolumeIcon.icns 164 | .com.apple.timemachine.donotpresent 165 | 166 | # Directories potentially created on remote AFP share 167 | .AppleDB 168 | .AppleDesktop 169 | Network Trash Folder 170 | Temporary Items 171 | .apdisk 172 | ### VisualStudioCode template 173 | .vscode/* 174 | !.vscode/settings.json 175 | !.vscode/tasks.json 176 | !.vscode/launch.json 177 | !.vscode/extensions.json 178 | .idea/ 179 | Archive.zip 180 | migrate.log* 181 | *_failed_records.json 182 | *.lock 183 | taolu/demo/demo_images/obj_detect_data.npz 184 | taolu/demo/demo_images/*.pydata 185 | weights/*.pth 186 | train.json 187 | val.json 188 | *.mp4 189 | utils/*.onnx 190 | *.mkv -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 26M Flops Facial Landmark Detection 2 | This is a PyTorch Implementation of https://github.com/610265158/face_landmark which is built with Tensorflow. Thanks very much for his pioneer contribution and impressing results. Also respect towards PFLD https://arxiv.org/pdf/1902.10859.pdf. 3 | 4 | Here is a Demo with 23M file size :D. 女神迪丽热巴。 5 | 6 | ![](images/dlrb.gif) 7 | 8 | Other Chinese famous actors: 9 | 10 | ![](images/ab.png) 11 | ![](images/hg.png) 12 | ![](images/hxm.png) 13 | ![](images/ym.png) 14 | 15 | 这个库是610265158的face_landmark库的PyTorch实现,用户名是他的QQ号,赶紧去面基。 16 | 17 | ## Introduction 18 | The training and inference strategies are just copy of 610265158. WingLoss, Multi-layer feature concatenation, Headpose/Face classification assisted training are all used. The differences with 610265158 are: 19 | 1. I trained the model with the Slim network. The computation cost with 160 input is **26.5M Flops**. 20 | 2. I trained the model with 300W-LP dataset. 21 | 3. I added LK optical flow smoothing at inference stage. 22 | 4. Original PyTorch, ONNX, MNN python inference code are included. 23 | 24 | 训练和推理处理基本是抄上面那个人的。该用的策略都用了。不同点主要是用更轻量的Slim网络和用300W-LP训练模型。Slim网络的结构特别适合通过MNN在移动端上使用。光流跟踪主要是提升视觉稳定性。另外,工程提供了一个简易的人脸检测器来Demo效果,直接把 https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB 这个库里的模型包装了一下就拿来用了。你们可以换成自己的。 25 | 26 | #### Environment 27 | 1. The code is tested on Windows 10. Linux should also be OK. Inference code should work on Mac OS, but Training... 兄弟别想不开。 28 | 2. PyTorch 1.2.0. 29 | 3. OpenCV 4.1.2.30. 30 | 4. Python3 31 | 32 | 其它版本应该都问题不大,如果有问题,自行翻墙解决。 33 | 34 | ## Train 35 | Modify data_dir variable in make_json.py, and run it. It will generate train.json and val.json. 36 | The data_dir should be organized as: 37 | 38 | ``` 39 | ├── 300VW 40 | │ ├── 001_annot 41 | │ ├── 002_annot 42 | │ .... 43 | ├── 300W 44 | │ ├── 01_Indoor 45 | │ └── 02_Outdoor 46 | ├── AFW 47 | │ └── afw 48 | ├── HELEN 49 | │ ├── testset 50 | │ └── trainset 51 | ├── IBUG 52 | │ └── ibug 53 | ├── LFPW 54 | │ ├── testset 55 | │ └── trainset 56 | ``` 57 | 58 | Yes, the code above is also copied from 610265158. 59 | 60 | Then run `python train.py`. 61 | 62 | In my experience, the training is extremely slow, especially with the exploding balanced training sets. You can run `recon_dataset.py` before run `python train.py` to accelerate the training by reduce the size of the images. 63 | 64 | 这个`recon_dataset.py`的原理就是把图片裁一下,这样训练的时候读数据的时候能快一些。 65 | 66 | I trained with about 4 hours per epoch on my RTX2060. Sad... 67 | 68 | ## Demo 69 | Run `demo.py`. Change the code as you need. 70 | 71 | ## Others 72 | 谁如果训练出了效果更好的模型请分享一下,还有其它问题的话我们Issue见。 73 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/datasets/__init__.py -------------------------------------------------------------------------------- /datasets/landmark.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset 2 | from utils.visual_augmentation import ColorDistort, pixel_jitter 3 | import numpy as np 4 | import copy 5 | import json 6 | import random 7 | import cv2 8 | from utils.augmentation import Rotate_aug, Affine_aug, Mirror, Padding_aug, Img_dropout 9 | from utils.headpose import get_head_pose 10 | import time 11 | from utils.turbo.TurboJPEG import TurboJPEG 12 | 13 | jpeg = TurboJPEG() 14 | 15 | symmetry = [(0, 16), (1, 15), (2, 14), (3, 13), (4, 12), (5, 11), (6, 10), (7, 9), (8, 8), 16 | (17, 26), (18, 25), (19, 24), (20, 23), (21, 22), 17 | (31, 35), (32, 34), 18 | (36, 45), (37, 44), (38, 43), (39, 42), (40, 47), (41, 46), 19 | (48, 54), (49, 53), (50, 52), (55, 59), (56, 58), (60, 64), (61, 63), (65, 67)] 20 | base_extend_range = [0.2, 0.3] 21 | 22 | 23 | class data_info(object): 24 | def __init__(self, ann_json): 25 | self.ann_json = ann_json 26 | self.metas = [] 27 | self.load_anns() 28 | 29 | def load_anns(self): 30 | with open(self.ann_json, 'r') as f: 31 | train_json_list = json.load(f) 32 | self.metas = train_json_list 33 | 34 | def get_all_sample(self): 35 | random.shuffle(self.metas) 36 | return self.metas 37 | 38 | 39 | class Landmark(Dataset): 40 | def __init__(self, ann_file, input_size=(160, 160), training_flag=True): 41 | super(Landmark, self).__init__() 42 | self.counter = 0 43 | self.time_counter = 0 44 | self.training_flag = training_flag 45 | self.raw_data_set_size = None 46 | self.color_augmentor = ColorDistort() 47 | self.lst = self.parse_file(ann_file) 48 | self.input_size = input_size 49 | 50 | def __getitem__(self, item): 51 | """Data augmentation function.""" 52 | dp = self.lst[item] 53 | fname = dp['image_path'] 54 | keypoints = dp['keypoints'] 55 | bbox = dp['bbox'] 56 | if keypoints is not None: 57 | if ".jpg" in fname: 58 | image = jpeg.imread(fname) 59 | # image = cv2.imread(fname) 60 | else: 61 | image = cv2.imread(fname) 62 | label = np.array(keypoints, dtype=np.float).reshape((-1, 2)) 63 | bbox = np.array(bbox) 64 | crop_image, label = self.augmentationCropImage(image, bbox, label, self.training_flag) 65 | 66 | if self.training_flag: 67 | if random.uniform(0, 1) > 0.5: 68 | crop_image, label = Mirror(crop_image, label=label, symmetry=symmetry) 69 | if random.uniform(0, 1) > 0.0: 70 | angle = random.uniform(-45, 45) 71 | crop_image, label = Rotate_aug(crop_image, label=label, angle=angle) 72 | if random.uniform(0, 1) > 0.5: 73 | strength = random.uniform(0, 50) 74 | crop_image, label = Affine_aug(crop_image, strength=strength, label=label) 75 | if random.uniform(0, 1) > 0.5: 76 | crop_image = self.color_augmentor(crop_image) 77 | if random.uniform(0, 1) > 0.5: 78 | crop_image = pixel_jitter(crop_image, 15) 79 | if random.uniform(0, 1) > 0.5: 80 | crop_image = Img_dropout(crop_image, 0.2) 81 | if random.uniform(0, 1) > 0.5: 82 | crop_image = Padding_aug(crop_image, 0.3) 83 | reprojectdst, euler_angle = get_head_pose(label, crop_image) 84 | PRY = euler_angle.reshape([-1]).astype(np.float32) / 90. 85 | cla_label = np.zeros([4]) 86 | if dp['left_eye_close']: 87 | cla_label[0] = 1 88 | if dp['right_eye_close']: 89 | cla_label[1] = 1 90 | if dp['mouth_close']: 91 | cla_label[2] = 1 92 | if dp['big_mouth_open']: 93 | cla_label[3] = 1 94 | crop_image_height, crop_image_width, _ = crop_image.shape 95 | # for point in label: 96 | # crop_image = cv2.circle(crop_image, tuple(point.astype(np.int)), 3, (255, 0, 0), -1, 1) 97 | # cv2.imshow("", crop_image) 98 | # cv2.waitKey() 99 | 100 | label = label.astype(np.float32) 101 | label[:, 0] = label[:, 0] / crop_image_width 102 | label[:, 1] = label[:, 1] / crop_image_height 103 | 104 | crop_image = crop_image.astype(np.float32) 105 | label = label.reshape([-1]).astype(np.float32) 106 | cla_label = cla_label.astype(np.float32) 107 | label = np.concatenate([label, PRY, cla_label], axis=0) 108 | 109 | crop_image = (crop_image - 127.0) / 127.0 110 | crop_image = np.transpose(crop_image, (2, 0, 1)).astype(np.float32) 111 | return crop_image, label 112 | 113 | def __len__(self): 114 | return len(self.lst) 115 | 116 | def parse_file(self, ann_file): 117 | ann_info = data_info(ann_file) 118 | all_samples = ann_info.get_all_sample() 119 | self.raw_data_set_size = len(all_samples) 120 | print("Raw Samples: " + str(self.raw_data_set_size)) 121 | if self.training_flag: 122 | balanced_samples = self.balance(all_samples) 123 | print("Balanced Samples: " + str(len(balanced_samples))) 124 | # balanced_samples = all_samples 125 | pass 126 | else: 127 | balanced_samples = all_samples 128 | return balanced_samples 129 | 130 | def balance(self, anns): 131 | res_anns = copy.deepcopy(anns) 132 | lar_count = 0 133 | for ann in anns: 134 | if ann['keypoints'] is not None: 135 | bbox = ann['bbox'] 136 | bbox_width = bbox[2] - bbox[0] 137 | bbox_height = bbox[3] - bbox[1] 138 | if bbox_width < 50 or bbox_height < 50: 139 | res_anns.remove(ann) 140 | left_eye_close = ann['left_eye_close'] 141 | right_eye_close = ann['right_eye_close'] 142 | if left_eye_close or right_eye_close: 143 | for i in range(10): 144 | res_anns.append(ann) 145 | if ann['small_eye_distance']: 146 | for i in range(20): 147 | res_anns.append(ann) 148 | if ann['small_mouth_open']: 149 | for i in range(20): 150 | res_anns.append(ann) 151 | if ann['big_mouth_open']: 152 | for i in range(50): 153 | res_anns.append(ann) 154 | if left_eye_close and not right_eye_close: 155 | for i in range(40): 156 | res_anns.append(ann) 157 | lar_count += 1 158 | if not left_eye_close and right_eye_close: 159 | for i in range(40): 160 | res_anns.append(ann) 161 | lar_count += 1 162 | return res_anns 163 | 164 | def augmentationCropImage(self, img, bbox, joints=None, is_training=True): 165 | bbox = np.array(bbox).reshape(4, ).astype(np.float32) 166 | add = max(img.shape[0], img.shape[1]) 167 | bimg = cv2.copyMakeBorder(img, add, add, add, add, borderType=cv2.BORDER_CONSTANT, value=[127., 127., 127.]) 168 | objcenter = np.array([(bbox[0] + bbox[2]) / 2., (bbox[1] + bbox[3]) / 2.]) 169 | bbox += add 170 | objcenter += add 171 | joints[:, :2] += add 172 | gt_width = (bbox[2] - bbox[0]) 173 | gt_height = (bbox[3] - bbox[1]) 174 | crop_width_half = gt_width * (1 + base_extend_range[0] * 2) // 2 175 | crop_height_half = gt_height * (1 + base_extend_range[1] * 2) // 2 176 | if is_training: 177 | min_x = int(objcenter[0] - crop_width_half + \ 178 | random.uniform(-base_extend_range[0], base_extend_range[0]) * gt_width) 179 | max_x = int(objcenter[0] + crop_width_half + \ 180 | random.uniform(-base_extend_range[0], base_extend_range[0]) * gt_width) 181 | min_y = int(objcenter[1] - crop_height_half + \ 182 | random.uniform(-base_extend_range[1], base_extend_range[1]) * gt_height) 183 | max_y = int(objcenter[1] + crop_height_half + \ 184 | random.uniform(-base_extend_range[1], base_extend_range[1]) * gt_height) 185 | else: 186 | min_x = int(objcenter[0] - crop_width_half) 187 | max_x = int(objcenter[0] + crop_width_half) 188 | min_y = int(objcenter[1] - crop_height_half) 189 | max_y = int(objcenter[1] + crop_height_half) 190 | joints[:, 0] = joints[:, 0] - min_x 191 | joints[:, 1] = joints[:, 1] - min_y 192 | img = bimg[min_y:max_y, min_x:max_x, :] 193 | crop_image_height, crop_image_width, _ = img.shape 194 | joints[:, 0] = joints[:, 0] / crop_image_width 195 | joints[:, 1] = joints[:, 1] / crop_image_height 196 | interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, 197 | cv2.INTER_LANCZOS4] 198 | interp_method = random.choice(interp_methods) 199 | img = cv2.resize(img, (self.input_size[0], self.input_size[1]), interpolation=interp_method) 200 | joints[:, 0] = joints[:, 0] * self.input_size[0] 201 | joints[:, 1] = joints[:, 1] * self.input_size[1] 202 | return img, joints 203 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from face_onnx.detector import Detector as FaceDetector 3 | 4 | ''' 5 | Three detector options: 6 | 1. Original PyTorch inference detector 7 | 2. MNN Python inference detector (experimental) 8 | 3. ONNX inference detector based on onnxruntime 9 | 10 | MNN detector is only tested on Windows 10 and Centos7. 11 | ''' 12 | 13 | # from detector import Detector 14 | from mnn_detector import Detector 15 | # from onnx_detector import Detector 16 | import numpy as np 17 | 18 | face_detector = FaceDetector() 19 | lmk_detector = Detector() 20 | cap = cv2.VideoCapture(0) 21 | ret, frame = cap.read() 22 | out = cv2.VideoWriter("output.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 20.0, (frame.shape[1], frame.shape[0])) 23 | while True: 24 | ret, frame = cap.read() 25 | if frame is None: 26 | break 27 | bboxes, _ = face_detector.detect(frame) 28 | if len(bboxes) != 0: 29 | bbox = bboxes[0] 30 | bbox = bbox.astype(np.int) 31 | lmks, PRY_3d = lmk_detector.detect(frame, bbox) 32 | lmks = lmks.astype(np.int) 33 | frame = cv2.rectangle(frame, tuple(bbox[0:2]), tuple(bbox[2:4]), (0, 0, 255), 1, 1) 34 | for point in lmks: 35 | frame = cv2.circle(frame, tuple(point), 2, (0, 255, 0), -1, 1) 36 | frame = cv2.putText(frame, "Pitch: {:.4f}".format(PRY_3d[0]), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 37 | (0, 255, 0), 1, 1) 38 | frame = cv2.putText(frame, "Yaw: {:.4f}".format(PRY_3d[1]), (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 39 | (0, 255, 0), 1, 1) 40 | frame = cv2.putText(frame, "Roll: {:.4f}".format(PRY_3d[2]), (20, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 41 | (0, 255, 0), 1, 1) 42 | cv2.imshow("Peppa Landmark Detection", frame) 43 | if cv2.waitKey(27) == ord("q"): 44 | break 45 | out.write(frame) 46 | 47 | out.release() 48 | -------------------------------------------------------------------------------- /detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | from models.slim import Slim 4 | import numpy as np 5 | from tracker import Tracker 6 | from utils.headpose import get_head_pose 7 | import time 8 | 9 | 10 | class Detector: 11 | def __init__(self, detection_size=(160, 160)): 12 | self.model = Slim() 13 | self.model.load_state_dict(torch.load(open("pretrained_weights/slim_160_latest.pth", "rb"), map_location="cpu")) 14 | self.model.eval() 15 | self.model.cuda() 16 | self.tracker = Tracker() 17 | self.detection_size = detection_size 18 | 19 | def crop_image(self, orig, bbox): 20 | bbox = bbox.copy() 21 | image = orig.copy() 22 | bbox_width = bbox[2] - bbox[0] 23 | bbox_height = bbox[3] - bbox[1] 24 | face_width = (1 + 2 * 0.25) * bbox_width 25 | face_height = (1 + 2 * 0.25) * bbox_height 26 | center = [(bbox[0] + bbox[2]) // 2, (bbox[1] + bbox[3]) // 2] 27 | bbox[0] = max(0, center[0] - face_width // 2) 28 | bbox[1] = max(0, center[1] - face_height // 2) 29 | bbox[2] = min(image.shape[1], center[0] + face_width // 2) 30 | bbox[3] = min(image.shape[0], center[1] + face_height // 2) 31 | bbox = bbox.astype(np.int) 32 | crop_image = image[bbox[1]:bbox[3], bbox[0]:bbox[2], :] 33 | h, w, _ = crop_image.shape 34 | crop_image = cv2.resize(crop_image, self.detection_size) 35 | return crop_image, ([h, w, bbox[1], bbox[0]]) 36 | 37 | def detect(self, img, bbox): 38 | crop_image, detail = self.crop_image(img, bbox) 39 | crop_image = (crop_image - 127.0) / 127.0 40 | crop_image = np.array([np.transpose(crop_image, (2, 0, 1))]) 41 | crop_image = torch.tensor(crop_image).float().cuda() 42 | with torch.no_grad(): 43 | start = time.time() 44 | raw = self.model(crop_image)[0].cpu().numpy() 45 | end = time.time() 46 | print("PyTorch Inference Time: {:.6f}".format(end - start)) 47 | landmark = raw[0:136].reshape((-1, 2)) 48 | landmark[:, 0] = landmark[:, 0] * detail[1] + detail[3] 49 | landmark[:, 1] = landmark[:, 1] * detail[0] + detail[2] 50 | landmark = self.tracker.track(img, landmark) 51 | _, PRY_3d = get_head_pose(landmark, img) 52 | return landmark, PRY_3d[:, 0] 53 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/eval.py -------------------------------------------------------------------------------- /face_onnx/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/face_onnx/__init__.py -------------------------------------------------------------------------------- /face_onnx/detector.py: -------------------------------------------------------------------------------- 1 | import time 2 | import cv2 3 | import numpy as np 4 | import onnxruntime as ort 5 | import os 6 | 7 | 8 | def area_of(left_top, right_bottom): 9 | hw = np.clip(right_bottom - left_top, 0.0, None) 10 | return hw[..., 0] * hw[..., 1] 11 | 12 | 13 | def iou_of(boxes0, boxes1, eps=1e-5): 14 | overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) 15 | overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) 16 | 17 | overlap_area = area_of(overlap_left_top, overlap_right_bottom) 18 | area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) 19 | area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) 20 | return overlap_area / (area0 + area1 - overlap_area + eps) 21 | 22 | 23 | def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): 24 | scores = box_scores[:, -1] 25 | boxes = box_scores[:, :-1] 26 | picked = [] 27 | indexes = np.argsort(scores) 28 | indexes = indexes[-candidate_size:] 29 | while len(indexes) > 0: 30 | current = indexes[-1] 31 | picked.append(current) 32 | if 0 < top_k == len(picked) or len(indexes) == 1: 33 | break 34 | current_box = boxes[current, :] 35 | indexes = indexes[:-1] 36 | rest_boxes = boxes[indexes, :] 37 | iou = iou_of( 38 | rest_boxes, 39 | np.expand_dims(current_box, axis=0), 40 | ) 41 | indexes = indexes[iou <= iou_threshold] 42 | return box_scores[picked, :] 43 | 44 | 45 | def predict(width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1): 46 | boxes = boxes[0] 47 | confidences = confidences[0] 48 | picked_box_probs = [] 49 | picked_labels = [] 50 | for class_index in range(1, confidences.shape[1]): 51 | probs = confidences[:, class_index] 52 | mask = probs > prob_threshold 53 | probs = probs[mask] 54 | if probs.shape[0] == 0: 55 | continue 56 | subset_boxes = boxes[mask, :] 57 | box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1) 58 | box_probs = hard_nms(box_probs, 59 | iou_threshold=iou_threshold, 60 | top_k=top_k, 61 | ) 62 | picked_box_probs.append(box_probs) 63 | picked_labels.extend([class_index] * box_probs.shape[0]) 64 | if not picked_box_probs: 65 | return np.array([]), np.array([]), np.array([]) 66 | picked_box_probs = np.concatenate(picked_box_probs) 67 | picked_box_probs[:, 0] *= width 68 | picked_box_probs[:, 1] *= height 69 | picked_box_probs[:, 2] *= width 70 | picked_box_probs[:, 3] *= height 71 | return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4] 72 | 73 | 74 | class Detector: 75 | def __init__(self): 76 | onnx_path = os.path.join(os.path.dirname(__file__), "version-RFB-320.onnx") 77 | self.sess = ort.InferenceSession(onnx_path) 78 | self.input_name = self.sess.get_inputs()[0].name 79 | 80 | def detect(self, orig_image): 81 | image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB) 82 | image = cv2.resize(image, (320, 240)) 83 | image_mean = np.array([127, 127, 127]) 84 | image = (image - image_mean) / 128 85 | image = np.transpose(image, [2, 0, 1]) 86 | image = np.expand_dims(image, axis=0) 87 | image = image.astype(np.float32) 88 | time_time = time.time() 89 | confidences, boxes = self.sess.run(None, {self.input_name: image}) 90 | print("Face Detector inference time:{}".format(time.time() - time_time)) 91 | boxes, labels, probs = predict(orig_image.shape[1], orig_image.shape[0], confidences, boxes, 0.8) 92 | return boxes, probs 93 | -------------------------------------------------------------------------------- /face_onnx/version-RFB-320.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/face_onnx/version-RFB-320.onnx -------------------------------------------------------------------------------- /images/ab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/images/ab.png -------------------------------------------------------------------------------- /images/dlrb.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/images/dlrb.gif -------------------------------------------------------------------------------- /images/hg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/images/hg.png -------------------------------------------------------------------------------- /images/hxm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/images/hxm.png -------------------------------------------------------------------------------- /images/samples/lt.jfif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/images/samples/lt.jfif -------------------------------------------------------------------------------- /images/ym.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/images/ym.png -------------------------------------------------------------------------------- /make_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | import json 5 | import traceback 6 | 7 | from tqdm import tqdm 8 | 9 | ''' 10 | i decide to merge more data from CelebA, the data anns will be complex, so json maybe a better way. 11 | ''' 12 | 13 | data_dir = 'H:/datasets/300W_All_Orig' ########points to your director,300w 14 | 15 | train_json = 'train.json' 16 | val_json = 'val.json' 17 | img_size = 160 18 | eye_close_thres = 0.02 19 | mouth_close_thres = 0.02 20 | big_mouth_open_thres = 0.08 21 | 22 | 23 | def GetFileList(dir, fileList): 24 | newDir = dir 25 | if os.path.isfile(dir): 26 | fileList.append(dir) 27 | elif os.path.isdir(dir): 28 | for s in os.listdir(dir): 29 | # if s == "pts": 30 | # continue 31 | newDir = os.path.join(dir, s) 32 | GetFileList(newDir, fileList) 33 | return fileList 34 | 35 | 36 | pic_list = [] 37 | GetFileList(data_dir, pic_list) 38 | pic_list = [x for x in pic_list if '.jpg' in x or 'png' in x or 'jpeg' in x] 39 | random.shuffle(pic_list) 40 | ratio = 0.95 41 | train_list = pic_list[:int(ratio * len(pic_list))] 42 | val_list = pic_list[int(ratio * len(pic_list)):] 43 | 44 | train_json_list = [] 45 | for pic in tqdm(train_list): 46 | one_image_ann = {} 47 | one_image_ann['image_path'] = pic 48 | pts = pic.rsplit('.', 1)[0] + '.pts' 49 | if os.access(pic, os.F_OK) and os.access(pts, os.F_OK): 50 | try: 51 | tmp = [] 52 | with open(pts) as p_f: 53 | labels = p_f.readlines()[3:-1] 54 | for _one_p in labels: 55 | xy = _one_p.rstrip().split(' ') 56 | tmp.append([float(xy[0]), float(xy[1])]) 57 | one_image_ann['keypoints'] = tmp 58 | label = np.array(tmp).reshape((-1, 2)) 59 | bbox = [float(np.min(label[:, 0])), float(np.min(label[:, 1])), float(np.max(label[:, 0])), 60 | float(np.max(label[:, 1]))] 61 | 62 | bbox_width = bbox[2] - bbox[0] 63 | bbox_height = bbox[3] - bbox[1] 64 | 65 | left_eye_close = np.sqrt( 66 | np.square(label[37, 0] - label[41, 0]) + 67 | np.square(label[37, 1] - label[41, 1])) / bbox_height < eye_close_thres \ 68 | or np.sqrt(np.square(label[38, 0] - label[40, 0]) + 69 | np.square(label[38, 1] - label[40, 1])) / bbox_height < eye_close_thres 70 | right_eye_close = np.sqrt( 71 | np.square(label[43, 0] - label[47, 0]) + 72 | np.square(label[43, 1] - label[47, 1])) / bbox_height < eye_close_thres \ 73 | or np.sqrt(np.square(label[44, 0] - label[46, 0]) + 74 | np.square( 75 | label[44, 1] - label[46, 1])) / bbox_height < eye_close_thres 76 | small_eye_distance = np.sqrt(np.square(label[36, 0] - label[45, 0]) + 77 | np.square(label[36, 1] - label[45, 1])) / bbox_width < 0.5 78 | small_mouth_open = np.sqrt(np.square(label[62, 0] - label[66, 0]) + 79 | np.square(label[62, 1] - label[66, 1])) / bbox_height > 0.15 80 | big_mouth_open = np.sqrt(np.square(label[62, 0] - label[66, 0]) + 81 | np.square(label[62, 1] - label[66, 1])) / img_size > big_mouth_open_thres 82 | mouth_close = np.sqrt(np.square(label[61, 0] - label[67, 0]) + 83 | np.square(label[61, 1] - label[67, 1])) / img_size < mouth_close_thres \ 84 | or np.sqrt(np.square(label[62, 0] - label[66, 0]) + 85 | np.square(label[62, 1] - label[66, 1])) / img_size < mouth_close_thres \ 86 | or np.sqrt(np.square(label[63, 0] - label[65, 0]) + 87 | np.square(label[63, 1] - label[65, 1])) / img_size < mouth_close_thres 88 | one_image_ann['left_eye_close'] = bool(left_eye_close) 89 | one_image_ann['right_eye_close'] = bool(right_eye_close) 90 | one_image_ann['small_eye_distance'] = bool(small_eye_distance) 91 | one_image_ann['small_mouth_open'] = bool(small_mouth_open) 92 | one_image_ann['big_mouth_open'] = bool(big_mouth_open) 93 | one_image_ann['mouth_close'] = bool(mouth_close) 94 | 95 | one_image_ann['bbox'] = bbox 96 | one_image_ann['attr'] = None 97 | train_json_list.append(one_image_ann) 98 | except: 99 | print(pic) 100 | traceback.print_exc() 101 | 102 | with open(train_json, 'w') as f: 103 | json.dump(train_json_list, f, indent=2) 104 | 105 | val_json_list = [] 106 | for pic in tqdm(val_list): 107 | one_image_ann = {} 108 | 109 | ### image_path 110 | one_image_ann['image_path'] = pic 111 | 112 | #### keypoints 113 | pts = pic.rsplit('.', 1)[0] + '.pts' 114 | if os.access(pic, os.F_OK) and os.access(pts, os.F_OK): 115 | try: 116 | tmp = [] 117 | with open(pts) as p_f: 118 | labels = p_f.readlines()[3:-1] 119 | for _one_p in labels: 120 | xy = _one_p.rstrip().split(' ') 121 | tmp.append([float(xy[0]), float(xy[1])]) 122 | 123 | one_image_ann['keypoints'] = tmp 124 | 125 | label = np.array(tmp).reshape((-1, 2)) 126 | bbox = [float(np.min(label[:, 0])), float(np.min(label[:, 1])), float(np.max(label[:, 0])), 127 | float(np.max(label[:, 1]))] 128 | bbox_width = bbox[2] - bbox[0] 129 | bbox_height = bbox[3] - bbox[1] 130 | 131 | left_eye_close = np.sqrt( 132 | np.square(label[37, 0] - label[41, 0]) + 133 | np.square(label[37, 1] - label[41, 1])) / bbox_height < eye_close_thres \ 134 | or np.sqrt(np.square(label[38, 0] - label[40, 0]) + 135 | np.square(label[38, 1] - label[40, 1])) / bbox_height < eye_close_thres 136 | right_eye_close = np.sqrt( 137 | np.square(label[43, 0] - label[47, 0]) + 138 | np.square(label[43, 1] - label[47, 1])) / bbox_height < eye_close_thres \ 139 | or np.sqrt(np.square(label[44, 0] - label[46, 0]) + 140 | np.square( 141 | label[44, 1] - label[46, 1])) / bbox_height < eye_close_thres 142 | small_eye_distance = np.sqrt(np.square(label[36, 0] - label[45, 0]) + 143 | np.square(label[36, 1] - label[45, 1])) / bbox_width < 0.5 144 | small_mouth_open = np.sqrt(np.square(label[62, 0] - label[66, 0]) + 145 | np.square(label[62, 1] - label[66, 1])) / bbox_height > 0.15 146 | big_mouth_open = np.sqrt(np.square(label[62, 0] - label[66, 0]) + 147 | np.square(label[62, 1] - label[66, 1])) / img_size > big_mouth_open_thres 148 | mouth_close = np.sqrt(np.square(label[61, 0] - label[67, 0]) + 149 | np.square(label[61, 1] - label[67, 1])) / img_size < mouth_close_thres \ 150 | or np.sqrt(np.square(label[62, 0] - label[66, 0]) + 151 | np.square(label[62, 1] - label[66, 1])) / img_size < mouth_close_thres \ 152 | or np.sqrt(np.square(label[63, 0] - label[65, 0]) + 153 | np.square(label[63, 1] - label[65, 1])) / img_size < mouth_close_thres 154 | one_image_ann['left_eye_close'] = bool(left_eye_close) 155 | one_image_ann['right_eye_close'] = bool(right_eye_close) 156 | one_image_ann['small_eye_distance'] = bool(small_eye_distance) 157 | one_image_ann['small_mouth_open'] = bool(small_mouth_open) 158 | one_image_ann['big_mouth_open'] = bool(big_mouth_open) 159 | one_image_ann['mouth_close'] = bool(mouth_close) 160 | one_image_ann['bbox'] = bbox 161 | ### placeholder 162 | one_image_ann['attr'] = None 163 | val_json_list.append(one_image_ann) 164 | 165 | except: 166 | print(pic) 167 | 168 | with open(val_json, 'w') as f: 169 | json.dump(val_json_list, f, indent=2) 170 | -------------------------------------------------------------------------------- /misc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/misc/__init__.py -------------------------------------------------------------------------------- /misc/prepare_300wlp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import numpy as np 4 | import cv2 5 | from scipy.io import loadmat 6 | import shutil 7 | 8 | 9 | def flip_points(landmarks): 10 | result = landmarks.copy() 11 | result[0:8] = landmarks[9:17][::-1] 12 | result[9:17] = landmarks[0:8][::-1] 13 | result[17:22] = landmarks[22:27][::-1] 14 | result[22:27] = landmarks[17:22][::-1] 15 | result[36] = landmarks[45] 16 | result[45] = landmarks[36] 17 | result[37] = landmarks[44] 18 | result[44] = landmarks[37] 19 | result[38] = landmarks[43] 20 | result[43] = landmarks[38] 21 | result[39] = landmarks[42] 22 | result[42] = landmarks[39] 23 | result[40] = landmarks[47] 24 | result[47] = landmarks[40] 25 | result[41] = landmarks[46] 26 | result[46] = landmarks[41] 27 | result[31:33] = landmarks[34:36][::-1] 28 | result[34:36] = landmarks[31:33][::-1] 29 | result[50] = landmarks[52] 30 | result[52] = landmarks[50] 31 | result[49] = landmarks[53] 32 | result[53] = landmarks[49] 33 | result[48] = landmarks[54] 34 | result[54] = landmarks[48] 35 | result[59] = landmarks[55] 36 | result[59] = landmarks[55] 37 | result[50] = landmarks[52] 38 | result[52] = landmarks[50] 39 | result[58] = landmarks[56] 40 | result[56] = landmarks[58] 41 | result[60] = landmarks[64] 42 | result[64] = landmarks[60] 43 | result[61] = landmarks[63] 44 | result[63] = landmarks[61] 45 | result[67] = landmarks[65] 46 | result[65] = landmarks[67] 47 | return result.astype(np.int) 48 | 49 | 50 | subsets = ["AFW", "AFW_Flip", "HELEN", "HELEN_Flip", "IBUG", "IBUG_Flip", "LFPW", "LFPW_Flip"] 51 | 52 | base_path = "H:\\datasets\\300W_LP" 53 | output_base = "H:\\datasets\\300W_LP_Out" 54 | for subset in subsets: 55 | subset_path = os.path.join(base_path, subset) 56 | img_files = filter(lambda x: ".jpg" in x, os.listdir(subset_path)) 57 | output_subset = os.path.join(output_base, subset) 58 | if not os.path.exists(output_subset): 59 | os.makedirs(output_subset) 60 | for img_file in img_files: 61 | mat_file = img_file.replace(".jpg", ".mat") 62 | output_mat_path = os.path.join(output_subset, img_file.replace(".jpg", ".pts")) 63 | out = open(output_mat_path, "w") 64 | out.write("version: 1\nn_points: 68\n{\n") 65 | mat = loadmat( 66 | os.path.join(base_path, "landmarks", subset.replace("_Flip", ""), img_file.replace(".jpg", "_pts.mat")))[ 67 | 'pts_2d'] 68 | if "Flip" in subset: 69 | mat[:, 0] = 450 - mat[:, 0] 70 | mat = flip_points(mat) 71 | for point in mat: 72 | out.write(str(point[0])) 73 | out.write(" ") 74 | out.write(str(point[1])) 75 | out.write("\n") 76 | out.write("}\n") 77 | img_src = os.path.join(subset_path, img_file) 78 | img_dst = os.path.join(output_subset, img_file) 79 | shutil.copy(img_src, img_dst) 80 | # img = cv2.imread(img_dst) 81 | # for point in mat: 82 | # img = cv2.circle(img,tuple(point),2,(255,0,0),-1,1) 83 | # cv2.imshow("",img) 84 | # cv2.waitKey() 85 | -------------------------------------------------------------------------------- /misc/view.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import json 3 | import numpy as np 4 | 5 | labels = json.load(open("../val.json")) 6 | for label in labels: 7 | img= cv2.imread(label['image_path']) 8 | landmarks = np.array(label['keypoints']).reshape((68,2)).astype(np.int) 9 | for point in landmarks: 10 | img = cv2.circle(img,tuple(point),2,(255,0,0),-1,1) 11 | cv2.imshow("",img) 12 | cv2.waitKey() -------------------------------------------------------------------------------- /mnn_detector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import MNN 3 | import cv2 4 | import time 5 | from utils.headpose import get_head_pose 6 | from tracker import Tracker 7 | 8 | 9 | class Detector: 10 | def __init__(self, detection_size=(160, 160)): 11 | self.interpreter = MNN.Interpreter("pretrained_weights/slim_160_latest.mnn") 12 | self.session = self.interpreter.createSession() 13 | self.input_tensor = self.interpreter.getSessionInput(self.session) 14 | self.detection_size = detection_size 15 | self.tracker = Tracker() 16 | 17 | def crop_image(self, orig, bbox): 18 | bbox = bbox.copy() 19 | image = orig.copy() 20 | bbox_width = bbox[2] - bbox[0] 21 | bbox_height = bbox[3] - bbox[1] 22 | face_width = (1 + 2 * 0.2) * bbox_width 23 | face_height = (1 + 2 * 0.2) * bbox_height 24 | center = [(bbox[0] + bbox[2]) // 2, (bbox[1] + bbox[3]) // 2] 25 | bbox[0] = max(0, center[0] - face_width // 2) 26 | bbox[1] = max(0, center[1] - face_height // 2) 27 | bbox[2] = min(image.shape[1], center[0] + face_width // 2) 28 | bbox[3] = min(image.shape[0], center[1] + face_height // 2) 29 | bbox = bbox.astype(np.int) 30 | crop_image = image[bbox[1]:bbox[3], bbox[0]:bbox[2], :] 31 | h, w, _ = crop_image.shape 32 | crop_image = cv2.resize(crop_image, self.detection_size) 33 | return crop_image, ([h, w, bbox[1], bbox[0]]) 34 | 35 | def detect(self, img, bbox): 36 | crop_image, detail = self.crop_image(img, bbox) 37 | crop_image = (crop_image - 127.0) / 127.0 38 | crop_image = np.array([np.transpose(crop_image, (2, 0, 1))]).astype(np.float32) 39 | tmp_input = MNN.Tensor((1, 3, *self.detection_size), MNN.Halide_Type_Float, crop_image, 40 | MNN.Tensor_DimensionType_Caffe) 41 | self.input_tensor.copyFrom(tmp_input) 42 | start = time.time() 43 | self.interpreter.runSession(self.session) 44 | raw = np.array(self.interpreter.getSessionOutput(self.session).getData()) 45 | end = time.time() 46 | print("MNN Inference Time: {:.6f}".format(end - start)) 47 | landmark = raw[0:136].reshape((-1, 2)) 48 | landmark[:, 0] = landmark[:, 0] * detail[1] + detail[3] 49 | landmark[:, 1] = landmark[:, 1] * detail[0] + detail[2] 50 | landmark = self.tracker.track(img, landmark) 51 | _, PRY_3d = get_head_pose(landmark, img) 52 | return landmark, PRY_3d[:, 0] 53 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/models/__init__.py -------------------------------------------------------------------------------- /models/mobilenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class MobileNet(torch.nn.Module): 6 | def __init__(self): 7 | super(MobileNet, self).__init__() 8 | -------------------------------------------------------------------------------- /models/pose.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | 8 | 9 | 10 | # object_pts = np.float32([[6.825897, 6.760612, 4.402142], 11 | # [1.330353, 7.122144, 6.903745], 12 | # [-1.330353, 7.122144, 6.903745], 13 | # [-6.825897, 6.760612, 4.402142], 14 | # [5.311432, 5.485328, 3.987654], 15 | # [1.789930, 5.393625, 4.413414], 16 | # [-1.789930, 5.393625, 4.413414], 17 | # [-5.311432, 5.485328, 3.987654], 18 | # [2.005628, 1.409845, 6.165652], 19 | # [-2.005628, 1.409845, 6.165652], 20 | # [2.774015, -2.080775, 5.048531], 21 | # [-2.774015, -2.080775, 5.048531], 22 | # [0.000000, -3.116408, 6.097667], 23 | # [0.000000, -7.415691, 4.070434]]) 24 | object_pts = np.float32([[6.825897, 6.760612, 4.402142], 25 | [1.330353, 7.122144, 6.903745], 26 | [-1.330353, 7.122144, 6.903745], 27 | [-6.825897, 6.760612, 4.402142], 28 | [5.311432, 5.485328, 3.987654], 29 | [1.789930, 5.393625, 4.413414], 30 | [-1.789930, 5.393625, 4.413414], 31 | [-5.311432, 5.485328, 3.987654], 32 | [2.005628, 1.409845, 6.165652], 33 | [-2.005628, 1.409845, 6.165652]]) 34 | reprojectsrc = np.float32([[10.0, 10.0, 10.0], 35 | [10.0, 10.0, -10.0], 36 | [10.0, -10.0, -10.0], 37 | [10.0, -10.0, 10.0], 38 | [-10.0, 10.0, 10.0], 39 | [-10.0, 10.0, -10.0], 40 | [-10.0, -10.0, -10.0], 41 | [-10.0, -10.0, 10.0]]) 42 | 43 | line_pairs = [[0, 1], [1, 2], [2, 3], [3, 0], 44 | [4, 5], [5, 6], [6, 7], [7, 4], 45 | [0, 4], [1, 5], [2, 6], [3, 7]] 46 | 47 | 48 | def get_head_pose(shape,img): 49 | h,w,_=img.shape 50 | K = [w, 0.0, w//2, 51 | 0.0, w, h//2, 52 | 0.0, 0.0, 1.0] 53 | # Assuming no lens distortion 54 | D = [0, 0, 0.0, 0.0, 0] 55 | 56 | cam_matrix = np.array(K).reshape(3, 3).astype(np.float32) 57 | dist_coeffs = np.array(D).reshape(5, 1).astype(np.float32) 58 | 59 | 60 | 61 | # image_pts = np.float32([shape[17], shape[21], shape[22], shape[26], shape[36], 62 | # shape[39], shape[42], shape[45], shape[31], shape[35], 63 | # shape[48], shape[54], shape[57], shape[8]]) 64 | image_pts = np.float32([shape[17], shape[21], shape[22], shape[26], shape[36], 65 | shape[39], shape[42], shape[45], shape[31], shape[35]]) 66 | _, rotation_vec, translation_vec = cv2.solvePnP(object_pts, image_pts, cam_matrix, dist_coeffs) 67 | 68 | reprojectdst, _ = cv2.projectPoints(reprojectsrc, rotation_vec, translation_vec, cam_matrix, 69 | dist_coeffs) 70 | 71 | reprojectdst = tuple(map(tuple, reprojectdst.reshape(8, 2))) 72 | 73 | # calc euler angle 74 | rotation_mat, _ = cv2.Rodrigues(rotation_vec) 75 | pose_mat = cv2.hconcat((rotation_mat, translation_vec)) 76 | _, _, _, _, _, _, euler_angle = cv2.decomposeProjectionMatrix(pose_mat) 77 | 78 | return reprojectdst, euler_angle -------------------------------------------------------------------------------- /models/slim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from thop import profile 4 | 5 | 6 | def conv_bn(inp, oup, stride=1): 7 | return nn.Sequential( 8 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 9 | nn.BatchNorm2d(oup), 10 | nn.ReLU(inplace=True) 11 | ) 12 | 13 | 14 | def depth_conv2d(inp, oup, kernel=1, stride=1, pad=0): 15 | return nn.Sequential( 16 | nn.Conv2d(inp, inp, kernel_size=kernel, stride=stride, padding=pad, groups=inp), 17 | nn.ReLU(inplace=True), 18 | nn.Conv2d(inp, oup, kernel_size=1) 19 | ) 20 | 21 | 22 | def conv_dw(inp, oup, stride, padding=1): 23 | return nn.Sequential( 24 | nn.Conv2d(inp, inp, 3, stride, padding, groups=inp, bias=False), 25 | nn.BatchNorm2d(inp), 26 | nn.ReLU(inplace=True), 27 | 28 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 29 | nn.BatchNorm2d(oup), 30 | nn.ReLU(inplace=True) 31 | ) 32 | 33 | 34 | class Slim(nn.Module): 35 | def __init__(self): 36 | super(Slim, self).__init__() 37 | self.num_classes = 2 38 | 39 | self.conv1 = conv_bn(3, 16, 2) 40 | self.conv2 = conv_dw(16, 32, 1) 41 | self.conv3 = conv_dw(32, 32, 2) 42 | self.conv4 = conv_dw(32, 32, 1) 43 | self.conv5 = conv_dw(32, 64, 2) 44 | self.conv6 = conv_dw(64, 64, 1) 45 | self.conv7 = conv_dw(64, 64, 1) 46 | self.conv8 = conv_dw(64, 64, 1) 47 | 48 | self.conv9 = conv_dw(64, 128, 2) 49 | self.conv10 = conv_dw(128, 128, 1) 50 | self.conv11 = conv_dw(128, 128, 1) 51 | 52 | self.conv12 = conv_dw(128, 256, 2) 53 | self.conv13 = conv_dw(256, 256, 1) 54 | 55 | self.fc = nn.Linear(448, 143) 56 | 57 | def forward(self, inputs): 58 | x1 = self.conv1(inputs) 59 | x2 = self.conv2(x1) 60 | x3 = self.conv3(x2) 61 | x4 = self.conv4(x3) 62 | x5 = self.conv5(x4) 63 | x6 = self.conv6(x5) 64 | x7 = self.conv7(x6) 65 | x8 = self.conv8(x7) 66 | output1 = x8 67 | x9 = self.conv9(x8) 68 | x10 = self.conv10(x9) 69 | x11 = self.conv11(x10) 70 | output2 = x11 71 | x12 = self.conv12(x11) 72 | x13 = self.conv13(x12) 73 | output3 = x13 74 | output1 = output1.mean(3).mean(2) 75 | output2 = output2.mean(3).mean(2) 76 | output3 = output3.mean(3).mean(2) 77 | output = self.fc(torch.cat((output1, output2, output3), 1)) 78 | return output 79 | 80 | 81 | if __name__ == '__main__': 82 | model = Slim() 83 | model.eval() 84 | x = torch.randn(1, 3, 160, 160) 85 | flops, params = profile(model, inputs=(x,)) 86 | print(flops) 87 | -------------------------------------------------------------------------------- /onnx_detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | import numpy as np 4 | from tracker import Tracker 5 | from utils.headpose import get_head_pose 6 | import time 7 | import onnxruntime as rt 8 | 9 | 10 | class Detector: 11 | def __init__(self, detection_size=(160, 160)): 12 | self.sess = rt.InferenceSession("pretrained_weights/slim_160_latest.onnx") 13 | self.input_name = self.sess.get_inputs()[0].name 14 | self.detection_size = detection_size 15 | self.tracker = Tracker() 16 | 17 | def crop_image(self, orig, bbox): 18 | bbox = bbox.copy() 19 | image = orig.copy() 20 | bbox_width = bbox[2] - bbox[0] 21 | bbox_height = bbox[3] - bbox[1] 22 | face_width = (1 + 2 * 0.2) * bbox_width 23 | face_height = (1 + 2 * 0.2) * bbox_height 24 | center = [(bbox[0] + bbox[2]) // 2, (bbox[1] + bbox[3]) // 2] 25 | bbox[0] = max(0, center[0] - face_width // 2) 26 | bbox[1] = max(0, center[1] - face_height // 2) 27 | bbox[2] = min(image.shape[1], center[0] + face_width // 2) 28 | bbox[3] = min(image.shape[0], center[1] + face_height // 2) 29 | bbox = bbox.astype(np.int) 30 | crop_image = image[bbox[1]:bbox[3], bbox[0]:bbox[2], :] 31 | h, w, _ = crop_image.shape 32 | crop_image = cv2.resize(crop_image, self.detection_size) 33 | return crop_image, ([h, w, bbox[1], bbox[0]]) 34 | 35 | def detect(self, img, bbox): 36 | crop_image, detail = self.crop_image(img, bbox) 37 | crop_image = (crop_image - 127.0) / 127.0 38 | crop_image = np.array([np.transpose(crop_image, (2, 0, 1))]).astype(np.float32) 39 | start = time.time() 40 | raw = self.sess.run(None, {self.input_name: crop_image})[0][0] 41 | end = time.time() 42 | print("ONNX Inference Time: {:.6f}".format(end - start)) 43 | landmark = raw[0:136].reshape((-1, 2)) 44 | landmark[:, 0] = landmark[:, 0] * detail[1] + detail[3] 45 | landmark[:, 1] = landmark[:, 1] * detail[0] + detail[2] 46 | landmark = self.tracker.track(img, landmark) 47 | _, PRY_3d = get_head_pose(landmark, img) 48 | return landmark, PRY_3d[:, 0] 49 | -------------------------------------------------------------------------------- /pretrained_weights/slim_160_latest.mnn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/pretrained_weights/slim_160_latest.mnn -------------------------------------------------------------------------------- /pretrained_weights/slim_160_latest.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/pretrained_weights/slim_160_latest.onnx -------------------------------------------------------------------------------- /pretrained_weights/slim_160_latest.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/pretrained_weights/slim_160_latest.pth -------------------------------------------------------------------------------- /recon_dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import cv2 4 | import numpy as np 5 | from tqdm import tqdm 6 | 7 | 8 | def do(filename): 9 | gogo = json.load(open(filename)) 10 | result = [] 11 | for data in tqdm(gogo): 12 | image_path = data['image_path'].replace("\\", "/") 13 | landmarks = np.array(data['keypoints']) 14 | bbox = np.array(data['bbox']) 15 | img = cv2.imread(image_path) 16 | img_h, img_w = img.shape[0], img.shape[1] 17 | center = np.array([bbox[2] + bbox[0], bbox[1] + bbox[3]]) / 2 18 | w, h = bbox[2] - bbox[0], bbox[3] - bbox[1] 19 | ex_w, ex_h = np.array([w, h]) * [0.4, 0.6] / 2 20 | x1 = center[0] - w / 2 - ex_w * 2 21 | y1 = center[1] - h / 2 - ex_h * 2 22 | x2 = center[0] + w / 2 + ex_w * 2 23 | y2 = center[1] + h / 2 + ex_h * 2 24 | x1 = max(0, x1) 25 | y1 = max(0, y1) 26 | x2 = min(img_w, x2) 27 | y2 = min(img_h, y2) 28 | new_bbox = np.array([x1, y1, x2, y2]).astype(np.int) 29 | landmarks = landmarks - new_bbox[0:2] 30 | bbox[0:2] = bbox[0:2] - new_bbox[0:2] 31 | bbox[2:4] = bbox[2:4] - new_bbox[0:2] 32 | data['bbox'] = bbox.tolist() 33 | data['keypoints'] = landmarks.tolist() 34 | img = img[new_bbox[1]:new_bbox[3], new_bbox[0]:new_bbox[2]] 35 | output_path = os.path.join("/".join(image_path.split("/")[0:-4]), image_path.split("/")[-4] + "_Output", 36 | "/".join(image_path.split("/")[-3:])) 37 | data['image_path'] = output_path 38 | output_dir = "/".join(output_path.split("/")[0:-1]) 39 | if not os.path.exists(output_dir): 40 | os.makedirs(output_dir) 41 | cv2.imwrite(output_path, img) 42 | result.append(data) 43 | json.dump(result, open(filename, "w")) 44 | 45 | 46 | do("train.json") 47 | do("val.json") 48 | -------------------------------------------------------------------------------- /tracker.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import math 4 | 5 | lk_params = dict(winSize=(40, 40), maxLevel=8, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 5, 0.1)) 6 | 7 | 8 | def dist(a, b): 9 | return np.sum(np.power(a - b, 2), 1) 10 | 11 | 12 | class LKTracker: 13 | def __init__(self): 14 | self.prev_frame = None 15 | self.prev_points = None 16 | 17 | def delta_fn(self, prev_points, new_detected, lk_tracked): 18 | result = np.zeros(new_detected.shape) 19 | dist_detect = dist(new_detected, prev_points) 20 | dist_lk = dist(new_detected,lk_tracked) 21 | eye_indices = list(set(range(36, 48))) 22 | rest_indices = np.array(list(set(range(68)) - set(range(36, 48)))) 23 | eye_indices = np.array(eye_indices) 24 | 25 | dist_detect_eyes = dist_detect[eye_indices] 26 | dist_detect_rest = dist_detect[rest_indices] 27 | 28 | detect_eyes = new_detected[eye_indices] 29 | lk_eyes = lk_tracked[eye_indices] 30 | 31 | detect_rest = new_detected[rest_indices] 32 | lk_rest = lk_tracked[rest_indices] 33 | temp = result[eye_indices] 34 | thres = 1 35 | weight1 = 0.80 # Trust lk when less than thres 36 | weight2 = 0.85 # Trust Detector when more than thres 37 | temp[dist_detect_eyes >= thres] = detect_eyes[dist_detect_eyes >= thres] * weight2 + lk_eyes[ 38 | dist_detect_eyes >= thres] * (1 - weight2) 39 | temp[dist_detect_eyes < thres] = lk_eyes[dist_detect_eyes < thres] * weight1 + detect_eyes[ 40 | dist_detect_eyes <= thres] * (1 - weight1) 41 | result[eye_indices] = temp 42 | 43 | thres = 10 44 | temp = result[rest_indices] 45 | temp[dist_detect_rest < thres] = lk_rest[dist_detect_rest < thres] * weight1 + detect_rest[ 46 | dist_detect_rest < thres] * (1 - weight1) 47 | temp[dist_detect_rest >= thres] = detect_rest[dist_detect_rest >= thres] * weight2 + lk_rest[ 48 | dist_detect_rest >= thres] * (1 - weight2) 49 | result[rest_indices] = temp 50 | return np.array(result) 51 | 52 | def lk_track(self, next_frame, new_detected_points): 53 | if self.prev_frame is None: 54 | self.prev_frame = next_frame 55 | self.prev_points = new_detected_points 56 | return new_detected_points 57 | new_points, status, error = cv2.calcOpticalFlowPyrLK(self.prev_frame, next_frame, 58 | self.prev_points.astype(np.float32), 59 | None, **lk_params) 60 | result = self.delta_fn(self.prev_points, new_detected_points, new_points) 61 | 62 | self.prev_points = result 63 | self.prev_frame = next_frame.copy() 64 | return result 65 | 66 | 67 | class FilterTracker(): 68 | def __init__(self): 69 | self.old_frame = None 70 | self.previous_landmarks_set = None 71 | self.with_landmark = True 72 | self.thres = 1.0 73 | self.alpha = 0.95 74 | self.iou_thres = 0.5 75 | self.filter = OneEuroFilter() 76 | 77 | def calculate(self, now_landmarks_set): 78 | if self.previous_landmarks_set is None or self.previous_landmarks_set.shape[0] == 0: 79 | self.previous_landmarks_set = now_landmarks_set 80 | result = now_landmarks_set 81 | else: 82 | if self.previous_landmarks_set.shape[0] == 0: 83 | return now_landmarks_set 84 | else: 85 | result = [] 86 | for i in range(now_landmarks_set.shape[0]): 87 | not_in_flag = True 88 | for j in range(self.previous_landmarks_set.shape[0]): 89 | if self.iou(now_landmarks_set[i], self.previous_landmarks_set[j]) > self.iou_thres: 90 | result.append(self.smooth(now_landmarks_set[i], self.previous_landmarks_set[j])) 91 | not_in_flag = False 92 | break 93 | if not_in_flag: 94 | result.append(now_landmarks_set[i]) 95 | result = np.array(result) 96 | self.previous_landmarks_set = result 97 | return result 98 | 99 | def iou(self, p_set0, p_set1): 100 | rec1 = [np.min(p_set0[:, 0]), np.min(p_set0[:, 1]), np.max(p_set0[:, 0]), np.max(p_set0[:, 1])] 101 | rec2 = [np.min(p_set1[:, 0]), np.min(p_set1[:, 1]), np.max(p_set1[:, 0]), np.max(p_set1[:, 1])] 102 | # computing area of each rectangles 103 | S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1]) 104 | S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1]) 105 | # computing the sum_area 106 | sum_area = S_rec1 + S_rec2 107 | # find the each edge of intersect rectangle 108 | x1 = max(rec1[0], rec2[0]) 109 | y1 = max(rec1[1], rec2[1]) 110 | x2 = min(rec1[2], rec2[2]) 111 | y2 = min(rec1[3], rec2[3]) 112 | # judge if there is an intersect 113 | intersect = max(0, x2 - x1) * max(0, y2 - y1) 114 | return intersect / (sum_area - intersect) 115 | 116 | def smooth(self, now_landmarks, previous_landmarks): 117 | result = [] 118 | for i in range(now_landmarks.shape[0]): 119 | dis = np.sqrt(np.square(now_landmarks[i][0] - previous_landmarks[i][0]) + np.square( 120 | now_landmarks[i][1] - previous_landmarks[i][1])) 121 | if dis < self.thres: 122 | result.append(previous_landmarks[i]) 123 | else: 124 | result.append(self.filter(now_landmarks[i], previous_landmarks[i])) 125 | return np.array(result) 126 | 127 | def do_moving_average(self, p_now, p_previous): 128 | p = self.alpha * p_now + (1 - self.alpha) * p_previous 129 | return p 130 | 131 | 132 | def smoothing_factor(t_e, cutoff): 133 | r = 2 * math.pi * cutoff * t_e 134 | return r / (r + 1) 135 | 136 | 137 | def exponential_smoothing(a, x, x_prev): 138 | return a * x + (1 - a) * x_prev 139 | 140 | 141 | class OneEuroFilter: 142 | def __init__(self, dx0=0.0, min_cutoff=1.0, beta=0.0, 143 | d_cutoff=1.0): 144 | """Initialize the one euro filter.""" 145 | self.min_cutoff = float(min_cutoff) 146 | self.beta = float(beta) 147 | self.d_cutoff = float(d_cutoff) 148 | self.dx_prev = float(dx0) 149 | 150 | def __call__(self, x, x_prev): 151 | if x_prev is None: 152 | return x 153 | t_e = 1 154 | a_d = smoothing_factor(t_e, self.d_cutoff) 155 | dx = (x - x_prev) / t_e 156 | dx_hat = exponential_smoothing(a_d, dx, self.dx_prev) 157 | cutoff = self.min_cutoff + self.beta * abs(dx_hat) 158 | a = smoothing_factor(t_e, cutoff) 159 | x_hat = exponential_smoothing(a, x, x_prev) 160 | self.dx_prev = dx_hat 161 | return x_hat 162 | 163 | 164 | class Tracker: 165 | def __init__(self): 166 | self.filter = FilterTracker() 167 | self.lk_tracker = LKTracker() 168 | 169 | def track(self, next_frame, landmarks): 170 | landmarks = self.lk_tracker.lk_track(next_frame, landmarks) 171 | landmarks = self.filter.calculate(np.array([landmarks]))[0] 172 | return landmarks 173 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DataLoader 3 | from datasets.landmark import Landmark 4 | from utils.wing_loss import WingLoss 5 | from models.slim import Slim 6 | import sys 7 | import time 8 | from utils.consoler import rewrite, next_line 9 | 10 | lr_decay_every_epoch = [1, 25, 35, 75, 150] 11 | lr_value_every_epoch = [0.00001, 0.0001, 0.00005, 0.00001, 0.000001] 12 | weight_decay_factor = 5.e-4 13 | l2_regularization = weight_decay_factor 14 | if "win32" in sys.platform: 15 | input_size = (160, 160) 16 | batch_size = 128 17 | else: 18 | input_size = (128, 128) 19 | batch_size = 256 20 | 21 | 22 | class Metrics: 23 | def __init__(self): 24 | self.landmark_loss = 0 25 | self.loss_pose = 0 26 | self.leye_loss = 0 27 | self.reye_loss = 0 28 | self.mouth_loss = 0 29 | self.counter = 0 30 | 31 | def update(self, landmark_loss, loss_pose, leye_loss, reye_loss, mouth_loss): 32 | self.landmark_loss += landmark_loss.item() 33 | self.loss_pose += loss_pose.item() 34 | self.leye_loss += leye_loss.item() 35 | self.reye_loss += reye_loss.item() 36 | self.mouth_loss += mouth_loss.item() 37 | self.counter += 1 38 | 39 | def summary(self): 40 | total = (self.landmark_loss + self.loss_pose + self.leye_loss + self.reye_loss + self.mouth_loss) / self.counter 41 | return total, self.landmark_loss / self.counter, self.loss_pose / self.counter, self.leye_loss / self.counter, self.reye_loss / self.counter, self.mouth_loss / self.counter 42 | 43 | 44 | def decay(epoch): 45 | if epoch < lr_decay_every_epoch[0]: 46 | return lr_value_every_epoch[0] 47 | if epoch >= lr_decay_every_epoch[0] and epoch < lr_decay_every_epoch[1]: 48 | return lr_value_every_epoch[1] 49 | if epoch >= lr_decay_every_epoch[1] and epoch < lr_decay_every_epoch[2]: 50 | return lr_value_every_epoch[2] 51 | if epoch >= lr_decay_every_epoch[2] and epoch < lr_decay_every_epoch[3]: 52 | return lr_value_every_epoch[3] 53 | if epoch >= lr_decay_every_epoch[3] and epoch < lr_decay_every_epoch[4]: 54 | return lr_value_every_epoch[4] 55 | 56 | 57 | def calculate_loss(predict_keypoints, label_keypoints): 58 | landmark_label = label_keypoints[:, 0:136] 59 | pose_label = label_keypoints[:, 136:139] 60 | leye_cls_label = label_keypoints[:, 139] 61 | reye_cls_label = label_keypoints[:, 140] 62 | mouth_cls_label = label_keypoints[:, 141] 63 | big_mouth_cls_label = label_keypoints[:, 142] 64 | landmark_predict = predict_keypoints[:, 0:136] 65 | pose_predict = predict_keypoints[:, 136:139] 66 | leye_cls_predict = predict_keypoints[:, 139] 67 | reye_cls_predict = predict_keypoints[:, 140] 68 | mouth_cls_predict = predict_keypoints[:, 141] 69 | big_mouth_cls_predict = predict_keypoints[:, 142] 70 | landmark_loss = 2 * wing_loss_fn(landmark_predict, landmark_label) 71 | loss_pose = mse_loss_fn(pose_predict, pose_label) 72 | leye_loss = 0.8 * bce_loss_fn(leye_cls_predict, leye_cls_label) 73 | reye_loss = 0.8 * bce_loss_fn(reye_cls_predict, reye_cls_label) 74 | mouth_loss = bce_loss_fn(mouth_cls_predict, mouth_cls_label) 75 | mouth_loss_big = bce_loss_fn(big_mouth_cls_predict, big_mouth_cls_label) 76 | mouth_loss = 0.5 * (mouth_loss + mouth_loss_big) 77 | return landmark_loss + loss_pose + leye_loss + reye_loss + mouth_loss, landmark_loss, loss_pose, leye_loss, reye_loss, mouth_loss 78 | 79 | 80 | def train(epoch): 81 | model.train() 82 | metrics = Metrics() 83 | total_samples = 0 84 | start = time.time() 85 | print("==================================Training Phase=================================") 86 | print("Current LR:{}".format(list(optim.param_groups)[0]['lr'])) 87 | for i, (imgs, labels) in enumerate(train_loader): 88 | imgs = imgs.cuda() 89 | labels = labels.cuda() 90 | optim.zero_grad() 91 | preds = model(imgs) 92 | loss, landmark_loss, loss_pose, leye_loss, reye_loss, mouth_loss = calculate_loss(preds, labels) 93 | metrics.update(landmark_loss, loss_pose, leye_loss, reye_loss, mouth_loss) 94 | loss.backward() 95 | optim.step() 96 | total_samples += len(imgs) 97 | end = time.time() 98 | speed = (i + 1) / (end - start) 99 | progress = total_samples / len(train_dataset) 100 | rewrite( 101 | "Epoch: {} Loss -- Total: {:.4f} Landmark: {:.4f} Pose: {:.4f} LEye: {:.4f} REye: {:.4f} Mouth: {:.4f} Progress: {:.4f} Speed: {:.4f}it/s".format( 102 | epoch, loss.item(), landmark_loss.item(), loss_pose.item(), leye_loss.item(), reye_loss.item(), 103 | mouth_loss.item(), progress, speed)) 104 | next_line() 105 | avg_total_loss, avg_landmark_loss, avg_loss_pose, avg_leye_loss, avg_reye_loss, avg_mouth_loss = metrics.summary() 106 | print( 107 | "Train Avg Loss -- Total: {:.4f} Landmark: {:.4f} Poss: {:.4f} LEye: {:.4f} REye: {:.4f} Mouth: {:.4f}".format( 108 | avg_total_loss, avg_landmark_loss, avg_loss_pose, avg_leye_loss, avg_reye_loss, avg_mouth_loss)) 109 | 110 | 111 | def eval(epoch): 112 | model.eval() 113 | metrics = Metrics() 114 | start = time.time() 115 | total_samples = 0 116 | print("==================================Eval Phase=================================") 117 | for i, (imgs, labels) in enumerate(val_loader): 118 | imgs = imgs.cuda() 119 | labels = labels.cuda() 120 | with torch.no_grad(): 121 | preds = model(imgs) 122 | loss, landmark_loss, loss_pose, leye_loss, reye_loss, mouth_loss = calculate_loss(preds, labels) 123 | metrics.update(landmark_loss, loss_pose, leye_loss, reye_loss, mouth_loss) 124 | total_samples += len(imgs) 125 | end = time.time() 126 | speed = (i + 1) / (end - start) 127 | progress = total_samples / len(val_dataset) 128 | rewrite( 129 | "Epoch: {} Loss -- Total: {:.4f} Landmark: {:.4f} Pose: {:.4f} LEye: {:.4f} REye: {:.4f} Mouth: {:.4f} Progress: {:.4f} Speed: {:.4f}it/s".format( 130 | epoch, loss.item(), landmark_loss.item(), loss_pose.item(), leye_loss.item(), reye_loss.item(), 131 | mouth_loss.item(), progress, speed)) 132 | 133 | next_line() 134 | avg_total_loss, avg_landmark_loss, avg_loss_pose, avg_leye_loss, avg_reye_loss, avg_mouth_loss = metrics.summary() 135 | print( 136 | "Eval Avg Loss -- Total: {:.4f} Landmark: {:.4f} Poss: {:.4f} LEye: {:.4f} REye: {:.4f} Mouth: {:.4f}".format( 137 | avg_total_loss, avg_landmark_loss, avg_loss_pose, avg_leye_loss, avg_reye_loss, avg_mouth_loss)) 138 | torch.save(model.state_dict(), open("weights/slim128_epoch_{}_{:.4f}.pth".format(epoch, avg_landmark_loss), "wb")) 139 | 140 | 141 | if __name__ == '__main__': 142 | checkpoint = None 143 | torch.backends.cudnn.benchmark = True 144 | train_dataset = Landmark("train.json", input_size, True) 145 | train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) 146 | val_dataset = Landmark("val.json", input_size, False) 147 | val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4) 148 | 149 | model = Slim() 150 | model.train() 151 | model.cuda() 152 | if checkpoint is not None: 153 | model.load_state_dict(torch.load(checkpoint)) 154 | start_epoch = int(checkpoint.split("_")[-2]) + 1 155 | else: 156 | start_epoch = 0 157 | 158 | wing_loss_fn = WingLoss() 159 | mse_loss_fn = torch.nn.MSELoss() 160 | bce_loss_fn = torch.nn.BCEWithLogitsLoss() 161 | 162 | optim = torch.optim.Adam(model.parameters(), lr=lr_value_every_epoch[0], weight_decay=5e-4) 163 | for epoch in range(start_epoch, 150): 164 | for param_group in optim.param_groups: 165 | param_group['lr'] = decay(epoch) 166 | train(epoch) 167 | eval(epoch) 168 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/utils/__init__.py -------------------------------------------------------------------------------- /utils/augmentation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cv2 4 | import random 5 | import math 6 | 7 | 8 | def Rotate_aug(src, angle, label=None, center=None, scale=1.0): 9 | image = src 10 | (h, w) = image.shape[:2] 11 | if center is None: 12 | center = (w / 2, h / 2) 13 | M = cv2.getRotationMatrix2D(center, angle, scale) 14 | if label is None: 15 | for i in range(image.shape[2]): 16 | image[:, :, i] = cv2.warpAffine(image[:, :, i], M, (w, h), flags=cv2.INTER_CUBIC, 17 | borderMode=cv2.BORDER_CONSTANT, borderValue=[127.,127.,127.]) 18 | return image, None 19 | else: 20 | label = label.T 21 | full_M = np.row_stack((M, np.asarray([0, 0, 1]))) 22 | img_rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, 23 | borderValue=[127.,127.,127.]) 24 | full_label = np.row_stack((label, np.ones(shape=(1, label.shape[1])))) 25 | label_rotated = np.dot(full_M, full_label) 26 | label_rotated = label_rotated[0:2, :] 27 | label_rotated = label_rotated.T 28 | return img_rotated, label_rotated 29 | 30 | 31 | def Rotate_coordinate(label, rt_matrix): 32 | if rt_matrix.shape[0] == 2: 33 | rt_matrix = np.row_stack((rt_matrix, np.asarray([0, 0, 1]))) 34 | full_label = np.row_stack((label, np.ones(shape=(1, label.shape[1])))) 35 | label_rotated = np.dot(rt_matrix, full_label) 36 | label_rotated = label_rotated[0:2, :] 37 | return label_rotated 38 | 39 | 40 | def box_to_point(boxes): 41 | points_set = np.zeros(shape=[4 * boxes.shape[0], 2]) 42 | for i in range(boxes.shape[0]): 43 | points_set[4 * i] = np.array([boxes[i][0], boxes[i][1]]) 44 | points_set[4 * i + 1] = np.array([boxes[i][0], boxes[i][3]]) 45 | points_set[4 * i + 2] = np.array([boxes[i][2], boxes[i][3]]) 46 | points_set[4 * i + 3] = np.array([boxes[i][2], boxes[i][1]]) 47 | return points_set 48 | 49 | 50 | def point_to_box(points): 51 | boxes = [] 52 | points = points.reshape([-1, 4, 2]) 53 | for i in range(points.shape[0]): 54 | box = [np.min(points[i][:, 0]), np.min(points[i][:, 1]), np.max(points[i][:, 0]), np.max(points[i][:, 1])] 55 | boxes.append(box) 56 | return np.array(boxes) 57 | 58 | 59 | def Rotate_with_box(src, angle, boxes=None, center=None, scale=1.0): 60 | label = box_to_point(boxes) 61 | image = src 62 | (h, w) = image.shape[:2] 63 | if center is None: 64 | center = (w / 2, h / 2) 65 | M = cv2.getRotationMatrix2D(center, angle, scale) 66 | new_size = Rotate_coordinate(np.array([[0, w, w, 0], [0, 0, h, h]]), M) 67 | new_h, new_w = np.max(new_size[1]) - np.min(new_size[1]), np.max(new_size[0]) - np.min(new_size[0]) 68 | scale = min(h / new_h, w / new_w) 69 | M = cv2.getRotationMatrix2D(center, angle, scale) 70 | if boxes is None: 71 | for i in range(image.shape[2]): 72 | image[:, :, i] = cv2.warpAffine(image[:, :, i], M, (w, h), flags=cv2.INTER_CUBIC, 73 | borderMode=cv2.BORDER_CONSTANT) 74 | return image, None 75 | else: 76 | label = label.T 77 | full_M = np.row_stack((M, np.asarray([0, 0, 1]))) 78 | img_rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT) 79 | full_label = np.row_stack((label, np.ones(shape=(1, label.shape[1])))) 80 | label_rotated = np.dot(full_M, full_label) 81 | label_rotated = label_rotated[0:2, :] 82 | label_rotated = label_rotated.T 83 | boxes_rotated = point_to_box(label_rotated) 84 | return img_rotated, boxes_rotated 85 | 86 | 87 | def Perspective_aug(src, strength, label=None): 88 | image = src 89 | pts_base = np.float32([[0, 0], [300, 0], [0, 300], [300, 300]]) 90 | pts1 = np.random.rand(4, 2) * random.uniform(-strength, strength) + pts_base 91 | pts1 = pts1.astype(np.float32) 92 | M = cv2.getPerspectiveTransform(pts1, pts_base) 93 | trans_img = cv2.warpPerspective(image, M, (src.shape[1], src.shape[0])) 94 | label_rotated = None 95 | if label is not None: 96 | label = label.T 97 | full_label = np.row_stack((label, np.ones(shape=(1, label.shape[1])))) 98 | label_rotated = np.dot(M, full_label) 99 | label_rotated = label_rotated.astype(np.int32) 100 | label_rotated = label_rotated.T 101 | return trans_img, label_rotated 102 | 103 | 104 | def Affine_aug(src, strength, label=None): 105 | image = src 106 | pts_base = np.float32([[10, 100], [200, 50], [100, 250]]) 107 | pts1 = np.random.rand(3, 2) * random.uniform(-strength, strength) + pts_base 108 | pts1 = pts1.astype(np.float32) 109 | M = cv2.getAffineTransform(pts1, pts_base) 110 | trans_img = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]), borderMode=cv2.BORDER_CONSTANT, 111 | borderValue=[127.,127.,127.]) 112 | label_rotated = None 113 | if label is not None: 114 | label = label.T 115 | full_label = np.row_stack((label, np.ones(shape=(1, label.shape[1])))) 116 | label_rotated = np.dot(M, full_label) 117 | label_rotated = label_rotated.T 118 | return trans_img, label_rotated 119 | 120 | 121 | def Padding_aug(src, max_pattern_ratio=0.05): 122 | src = src.astype(np.float32) 123 | pattern = np.ones_like(src) 124 | ratio = random.uniform(0, max_pattern_ratio) 125 | height, width, _ = src.shape 126 | if random.uniform(0, 1) > 0.5: 127 | if random.uniform(0, 1) > 0.5: 128 | pattern[0:int(ratio * height), :, :] = 0 129 | else: 130 | pattern[height - int(ratio * height):, :, :] = 0 131 | else: 132 | if random.uniform(0, 1) > 0.5: 133 | pattern[:, 0:int(ratio * width), :] = 0 134 | else: 135 | pattern[:, width - int(ratio * width):, :] = 0 136 | bias_pattern = (1 - pattern) * [127.,127.,127.] 137 | img = src * pattern + bias_pattern 138 | img = img.astype(np.uint8) 139 | return img 140 | 141 | 142 | def Blur_heatmaps(src, ksize=(3, 3)): 143 | for i in range(src.shape[2]): 144 | src[:, :, i] = cv2.GaussianBlur(src[:, :, i], ksize, 0) 145 | amin, amax = src[:, :, i].min(), src[:, :, i].max() # 求最大最小值 146 | if amax > 0: 147 | src[:, :, i] = (src[:, :, i] - amin) / (amax - amin) # (矩阵元素-最小值)/(最大值-最小值) 148 | return src 149 | 150 | 151 | def Blur_aug(src, ksize=(3, 3)): 152 | for i in range(src.shape[2]): 153 | src[:, :, i] = cv2.GaussianBlur(src[:, :, i], ksize, 1.5) 154 | return src 155 | 156 | 157 | def Img_dropout(src, max_pattern_ratio=0.05): 158 | width_ratio = random.uniform(0, max_pattern_ratio) 159 | height_ratio = random.uniform(0, max_pattern_ratio) 160 | width = src.shape[1] 161 | height = src.shape[0] 162 | block_width = width * width_ratio 163 | block_height = height * height_ratio 164 | width_start = int(random.uniform(0, width - block_width)) 165 | width_end = int(width_start + block_width) 166 | height_start = int(random.uniform(0, height - block_height)) 167 | height_end = int(height_start + block_height) 168 | src[height_start:height_end, width_start:width_end, :] = np.array([127.,127.,127.], dtype=src.dtype) 169 | return src 170 | 171 | 172 | def Fill_img(img_raw, target_height, target_width, label=None): 173 | channel = img_raw.shape[2] 174 | raw_height = img_raw.shape[0] 175 | raw_width = img_raw.shape[1] 176 | if raw_width / raw_height >= target_width / target_height: 177 | shape_need = [int(target_height / target_width * raw_width), raw_width, channel] 178 | img_fill = np.zeros(shape_need, dtype=img_raw.dtype) + np.array([127.,127.,127.], dtype=img_raw.dtype) 179 | shift_x = (img_fill.shape[1] - raw_width) // 2 180 | shift_y = (img_fill.shape[0] - raw_height) // 2 181 | for i in range(channel): 182 | img_fill[shift_y:raw_height + shift_y, shift_x:raw_width + shift_x, i] = img_raw[:, :, i] 183 | else: 184 | shape_need = [raw_height, int(target_width / target_height * raw_height), channel] 185 | img_fill = np.zeros(shape_need, dtype=img_raw.dtype) + np.array([127.,127.,127.], dtype=img_raw.dtype) 186 | shift_x = (img_fill.shape[1] - raw_width) // 2 187 | shift_y = (img_fill.shape[0] - raw_height) // 2 188 | for i in range(channel): 189 | img_fill[shift_y:raw_height + shift_y, shift_x:raw_width + shift_x, i] = img_raw[:, :, i] 190 | if label is None: 191 | return img_fill, shift_x, shift_y 192 | else: 193 | label[:, 0] += shift_x 194 | label[:, 1] += shift_y 195 | return img_fill, label 196 | 197 | 198 | def Random_crop(src, shrink): 199 | h, w, _ = src.shape 200 | h_shrink = int(h * shrink) 201 | w_shrink = int(w * shrink) 202 | bimg = cv2.copyMakeBorder(src, h_shrink, h_shrink, w_shrink, w_shrink, borderType=cv2.BORDER_CONSTANT, 203 | value=(0, 0, 0)) 204 | start_h = random.randint(0, 2 * h_shrink) 205 | start_w = random.randint(0, 2 * w_shrink) 206 | target_img = bimg[start_h:start_h + h, start_w:start_w + w, :] 207 | return target_img 208 | 209 | 210 | def box_in_img(img, boxes, min_overlap=0.5): 211 | raw_bboxes = np.array(boxes) 212 | face_area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) 213 | h, w, _ = img.shape 214 | boxes[:, 0][boxes[:, 0] <= 0] = 0 215 | boxes[:, 0][boxes[:, 0] >= w] = w 216 | boxes[:, 2][boxes[:, 2] <= 0] = 0 217 | boxes[:, 2][boxes[:, 2] >= w] = w 218 | boxes[:, 1][boxes[:, 1] <= 0] = 0 219 | boxes[:, 1][boxes[:, 1] >= h] = h 220 | boxes[:, 3][boxes[:, 3] <= 0] = 0 221 | boxes[:, 3][boxes[:, 3] >= h] = h 222 | boxes_in = [] 223 | for i in range(boxes.shape[0]): 224 | box = boxes[i] 225 | if ((box[3] - box[1]) * (box[2] - box[0])) / face_area[i] > min_overlap: 226 | boxes_in.append(boxes[i]) 227 | boxes_in = np.array(boxes_in) 228 | return boxes_in 229 | 230 | 231 | def Random_scale_withbbox(image, bboxes, target_shape, jitter=0.5): 232 | hi, wi, _ = image.shape 233 | while 1: 234 | if len(bboxes) == 0: 235 | print('errrrrrr') 236 | bboxes_ = np.array(bboxes) 237 | crop_h = int(hi * random.uniform(0.2, 1)) 238 | crop_w = int(wi * random.uniform(0.2, 1)) 239 | start_h = random.randint(0, hi - crop_h) 240 | start_w = random.randint(0, wi - crop_w) 241 | croped = image[start_h:start_h + crop_h, start_w:start_w + crop_w, :] 242 | bboxes_[:, 0] = bboxes_[:, 0] - start_w 243 | bboxes_[:, 1] = bboxes_[:, 1] - start_h 244 | bboxes_[:, 2] = bboxes_[:, 2] - start_w 245 | bboxes_[:, 3] = bboxes_[:, 3] - start_h 246 | bboxes_fix = box_in_img(croped, bboxes_) 247 | if len(bboxes_fix) > 0: 248 | break 249 | h, w = target_shape 250 | croped_h, croped_w, _ = croped.shape 251 | croped_h_w_ratio = croped_h / croped_w 252 | rescale_h = int(h * random.uniform(0.5, 1)) 253 | rescale_w = int(rescale_h / (random.uniform(0.7, 1.3) * croped_h_w_ratio)) 254 | rescale_w = np.clip(rescale_w, 0, w) 255 | image = cv2.resize(croped, (rescale_w, rescale_h)) 256 | new_image = np.zeros(shape=[h, w, 3], dtype=np.uint8) 257 | dx = int(random.randint(0, w - rescale_w)) 258 | dy = int(random.randint(0, h - rescale_h)) 259 | new_image[dy:dy + rescale_h, dx:dx + rescale_w, :] = image 260 | bboxes_fix[:, 0] = bboxes_fix[:, 0] * rescale_w / croped_w + dx 261 | bboxes_fix[:, 1] = bboxes_fix[:, 1] * rescale_h / croped_h + dy 262 | bboxes_fix[:, 2] = bboxes_fix[:, 2] * rescale_w / croped_w + dx 263 | bboxes_fix[:, 3] = bboxes_fix[:, 3] * rescale_h / croped_h + dy 264 | return new_image, bboxes_fix 265 | 266 | 267 | def Random_flip(im, boxes): 268 | im_lr = np.fliplr(im).copy() 269 | h, w, _ = im.shape 270 | xmin = w - boxes[:, 2] 271 | xmax = w - boxes[:, 0] 272 | boxes[:, 0] = xmin 273 | boxes[:, 2] = xmax 274 | return im_lr, boxes 275 | 276 | 277 | def Mirror(src, label=None, symmetry=None): 278 | img = cv2.flip(src, 1) 279 | if label is None: 280 | return img, label 281 | 282 | width = img.shape[1] 283 | cod = [] 284 | allc = [] 285 | for i in range(label.shape[0]): 286 | x, y = label[i][0], label[i][1] 287 | if x >= 0: 288 | x = width - 1 - x 289 | cod.append((x, y)) 290 | for (q, w) in symmetry: 291 | cod[q], cod[w] = cod[w], cod[q] 292 | for i in range(label.shape[0]): 293 | allc.append(cod[i][0]) 294 | allc.append(cod[i][1]) 295 | label = np.array(allc).reshape(label.shape[0], 2) 296 | return img, label 297 | 298 | 299 | def produce_heat_maps(label, map_size, stride, sigma): 300 | def produce_heat_map(center, map_size, stride, sigma): 301 | grid_y = map_size[0] // stride 302 | grid_x = map_size[1] // stride 303 | start = stride / 2.0 - 0.5 304 | y_range = [i for i in range(grid_y)] 305 | x_range = [i for i in range(grid_x)] 306 | xx, yy = np.meshgrid(x_range, y_range) 307 | xx = xx * stride + start 308 | yy = yy * stride + start 309 | d2 = (xx - center[0]) ** 2 + (yy - center[1]) ** 2 310 | exponent = d2 / 2.0 / sigma / sigma 311 | heatmap = np.exp(-exponent) 312 | am = np.amax(heatmap) 313 | if am > 0: 314 | heatmap /= am / 255. 315 | return heatmap 316 | 317 | all_keypoints = label 318 | point_num = all_keypoints.shape[0] 319 | heatmaps_this_img = np.zeros([map_size[0] // stride, map_size[1] // stride, point_num]) 320 | for k in range(point_num): 321 | heatmap = produce_heat_map([all_keypoints[k][0], all_keypoints[k][1]], map_size, stride, sigma) 322 | heatmaps_this_img[:, :, k] = heatmap 323 | return heatmaps_this_img 324 | 325 | 326 | def visualize_heatmap_target(heatmap): 327 | map_size = heatmap.shape[0:2] 328 | frame_num = heatmap.shape[2] 329 | heat_ = np.zeros([map_size[0], map_size[1]]) 330 | for i in range(frame_num): 331 | heat_ = heat_ + heatmap[:, :, i] 332 | cv2.namedWindow('heat_map', 0) 333 | cv2.imshow('heat_map', heat_) 334 | cv2.waitKey(0) 335 | 336 | 337 | def produce_heatmaps_with_bbox(image, label, h_out, w_out, num_klass, ksize=9, sigma=0): 338 | heatmap = np.zeros(shape=[h_out, w_out, num_klass]) 339 | h, w, _ = image.shape 340 | for single_box in label: 341 | if single_box[4] >= 0: 342 | center = [(single_box[0] + single_box[2]) / 2 / w, (single_box[1] + single_box[3]) / 2 / h] ###0-1 343 | heatmap[round(center[1] * h_out), round(center[0] * w_out), int(single_box[4])] = 1. 344 | heatmap = cv2.GaussianBlur(heatmap, (ksize, ksize), sigma) 345 | am = np.amax(heatmap) 346 | if am > 0: 347 | heatmap /= am / 255. 348 | heatmap = np.expand_dims(heatmap, -1) 349 | return heatmap 350 | 351 | 352 | def produce_heatmaps_with_keypoint(image, label, h_out, w_out, num_klass, ksize=7, sigma=0): 353 | heatmap = np.zeros(shape=[h_out, w_out, num_klass]) 354 | h, w, _ = image.shape 355 | for i in range(label.shape[0]): 356 | single_point = label[i] 357 | if single_point[0] > 0 and single_point[1] > 0: 358 | heatmap[int(single_point[1] * (h_out - 1)), int(single_point[0] * (w_out - 1)), i] = 1. 359 | heatmap = cv2.GaussianBlur(heatmap, (ksize, ksize), sigma) 360 | am = np.amax(heatmap) 361 | if am > 0: 362 | heatmap /= am / 255. 363 | return heatmap 364 | 365 | 366 | if __name__ == '__main__': 367 | pass 368 | -------------------------------------------------------------------------------- /utils/consoler.py: -------------------------------------------------------------------------------- 1 | def rewrite(text): 2 | print("\r" + text, end="") 3 | 4 | 5 | def next_line(text=""): 6 | print(text) 7 | -------------------------------------------------------------------------------- /utils/headpose.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | 3 | 4 | import cv2 5 | import numpy as np 6 | 7 | object_pts = np.float32([[6.825897, 6.760612, 4.402142], 8 | [1.330353, 7.122144, 6.903745], 9 | [-1.330353, 7.122144, 6.903745], 10 | [-6.825897, 6.760612, 4.402142], 11 | [5.311432, 5.485328, 3.987654], 12 | [1.789930, 5.393625, 4.413414], 13 | [-1.789930, 5.393625, 4.413414], 14 | [-5.311432, 5.485328, 3.987654], 15 | [2.005628, 1.409845, 6.165652], 16 | [-2.005628, 1.409845, 6.165652]]) 17 | reprojectsrc = np.float32([[10.0, 10.0, 10.0], 18 | [10.0, 10.0, -10.0], 19 | [10.0, -10.0, -10.0], 20 | [10.0, -10.0, 10.0], 21 | [-10.0, 10.0, 10.0], 22 | [-10.0, 10.0, -10.0], 23 | [-10.0, -10.0, -10.0], 24 | [-10.0, -10.0, 10.0]]) 25 | 26 | line_pairs = [[0, 1], [1, 2], [2, 3], [3, 0], 27 | [4, 5], [5, 6], [6, 7], [7, 4], 28 | [0, 4], [1, 5], [2, 6], [3, 7]] 29 | 30 | 31 | def get_head_pose(shape, img): 32 | h, w, _ = img.shape 33 | K = [w, 0.0, w // 2, 34 | 0.0, w, h // 2, 35 | 0.0, 0.0, 1.0] 36 | D = [0, 0, 0.0, 0.0, 0] 37 | cam_matrix = np.array(K).reshape(3, 3).astype(np.float32) 38 | dist_coeffs = np.array(D).reshape(5, 1).astype(np.float32) 39 | image_pts = np.float32([shape[17], shape[21], shape[22], shape[26], shape[36], 40 | shape[39], shape[42], shape[45], shape[31], shape[35]]) 41 | _, rotation_vec, translation_vec = cv2.solvePnP(object_pts, image_pts, cam_matrix, dist_coeffs) 42 | reprojectdst, _ = cv2.projectPoints(reprojectsrc, rotation_vec, translation_vec, cam_matrix, dist_coeffs) 43 | reprojectdst = tuple(map(tuple, reprojectdst.reshape(8, 2))) 44 | rotation_mat, _ = cv2.Rodrigues(rotation_vec) 45 | pose_mat = cv2.hconcat((rotation_mat, translation_vec)) 46 | _, _, _, _, _, _, euler_angle = cv2.decomposeProjectionMatrix(pose_mat) 47 | return reprojectdst, euler_angle 48 | -------------------------------------------------------------------------------- /utils/onnx_util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.onnx 3 | from models.slim import Slim 4 | 5 | x = torch.randn(1, 3, 160, 160) 6 | model = Slim() 7 | model.load_state_dict(torch.load("../pretrained_weights/slim_160_latest.pth", map_location="cpu")) 8 | model.eval() 9 | torch.onnx.export(model, x, "../pretrained_weights/slim_160_latest.onnx", input_names=["input1"], output_names=['output1']) 10 | -------------------------------------------------------------------------------- /utils/turbo/TurboJPEG.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | # 3 | # PyTurboJPEG - A Python wrapper of libjpeg-turbo for decoding and encoding JPEG image. 4 | # 5 | # Copyright (c) 2019, LiloHuang. All rights reserved. 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | __author__ = 'Lilo Huang ' 26 | __version__ = '1.3.2' 27 | 28 | from ctypes import * 29 | from ctypes.util import find_library 30 | import platform 31 | import numpy as np 32 | import math 33 | import warnings 34 | import os 35 | 36 | # default libTurboJPEG library path 37 | base_dir = os.path.dirname(__file__) 38 | DEFAULT_LIB_PATHS = { 39 | 'Darwin': [os.path.join(base_dir,'libturbojpeg.dylib')], 40 | 'Linux': [ 41 | os.path.join(base_dir,'libturbojpeg.so') 42 | ], 43 | 'Windows': os.path.join(base_dir,'turbojpeg.dll') 44 | } 45 | 46 | # error codes 47 | # see details in https://github.com/libjpeg-turbo/libjpeg-turbo/blob/master/turbojpeg.h 48 | TJERR_WARNING = 0 49 | TJERR_FATAL = 1 50 | 51 | # color spaces 52 | # see details in https://github.com/libjpeg-turbo/libjpeg-turbo/blob/master/turbojpeg.h 53 | TJCS_RGB = 0 54 | TJCS_YCbCr = 1 55 | TJCS_GRAY = 2 56 | TJCS_CMYK = 3 57 | TJCS_YCCK = 4 58 | 59 | # pixel formats 60 | # see details in https://github.com/libjpeg-turbo/libjpeg-turbo/blob/master/turbojpeg.h 61 | TJPF_RGB = 0 62 | TJPF_BGR = 1 63 | TJPF_RGBX = 2 64 | TJPF_BGRX = 3 65 | TJPF_XBGR = 4 66 | TJPF_XRGB = 5 67 | TJPF_GRAY = 6 68 | TJPF_RGBA = 7 69 | TJPF_BGRA = 8 70 | TJPF_ABGR = 9 71 | TJPF_ARGB = 10 72 | TJPF_CMYK = 11 73 | 74 | # chrominance subsampling options 75 | # see details in https://github.com/libjpeg-turbo/libjpeg-turbo/blob/master/turbojpeg.h 76 | TJSAMP_444 = 0 77 | TJSAMP_422 = 1 78 | TJSAMP_420 = 2 79 | TJSAMP_GRAY = 3 80 | TJSAMP_440 = 4 81 | TJSAMP_411 = 5 82 | 83 | # miscellaneous flags 84 | # see details in https://github.com/libjpeg-turbo/libjpeg-turbo/blob/master/turbojpeg.h 85 | # note: TJFLAG_NOREALLOC cannot be supported due to reallocation is needed by PyTurboJPEG. 86 | TJFLAG_BOTTOMUP = 2 87 | TJFLAG_FASTUPSAMPLE = 256 88 | TJFLAG_FASTDCT = 2048 89 | TJFLAG_ACCURATEDCT = 4096 90 | TJFLAG_STOPONWARNING = 8192 91 | TJFLAG_PROGRESSIVE = 16384 92 | 93 | 94 | class TurboJPEG(object): 95 | """A Python wrapper of libjpeg-turbo for decoding and encoding JPEG image.""" 96 | 97 | def __init__(self, lib_path=None): 98 | turbo_jpeg = cdll.LoadLibrary( 99 | self.__find_turbojpeg() if lib_path is None else lib_path) 100 | self.__init_decompress = turbo_jpeg.tjInitDecompress 101 | self.__init_decompress.restype = c_void_p 102 | self.__init_compress = turbo_jpeg.tjInitCompress 103 | self.__init_compress.restype = c_void_p 104 | self.__destroy = turbo_jpeg.tjDestroy 105 | self.__destroy.argtypes = [c_void_p] 106 | self.__destroy.restype = c_int 107 | self.__decompress_header = turbo_jpeg.tjDecompressHeader3 108 | self.__decompress_header.argtypes = [ 109 | c_void_p, POINTER(c_ubyte), c_ulong, POINTER(c_int), 110 | POINTER(c_int), POINTER(c_int), POINTER(c_int)] 111 | self.__decompress_header.restype = c_int 112 | self.__decompress = turbo_jpeg.tjDecompress2 113 | self.__decompress.argtypes = [ 114 | c_void_p, POINTER(c_ubyte), c_ulong, POINTER(c_ubyte), 115 | c_int, c_int, c_int, c_int, c_int] 116 | self.__decompress.restype = c_int 117 | self.__compress = turbo_jpeg.tjCompress2 118 | self.__compress.argtypes = [ 119 | c_void_p, POINTER(c_ubyte), c_int, c_int, c_int, c_int, 120 | POINTER(c_void_p), POINTER(c_ulong), c_int, c_int, c_int] 121 | self.__compress.restype = c_int 122 | self.__free = turbo_jpeg.tjFree 123 | self.__free.argtypes = [c_void_p] 124 | self.__free.restype = None 125 | self.__get_error_str = turbo_jpeg.tjGetErrorStr 126 | self.__get_error_str.restype = c_char_p 127 | # tjGetErrorStr2 is only available in newer libjpeg-turbo 128 | self.__get_error_str2 = getattr(turbo_jpeg, 'tjGetErrorStr2', None) 129 | if self.__get_error_str2 is not None: 130 | self.__get_error_str2.argtypes = [c_void_p] 131 | self.__get_error_str2.restype = c_char_p 132 | # tjGetErrorCode is only available in newer libjpeg-turbo 133 | self.__get_error_code = getattr(turbo_jpeg, 'tjGetErrorCode', None) 134 | if self.__get_error_code is not None: 135 | self.__get_error_code.argtypes = [c_void_p] 136 | self.__get_error_code.restype = c_int 137 | self.__scaling_factors = [] 138 | 139 | class ScalingFactor(Structure): 140 | _fields_ = ('num', c_int), ('denom', c_int) 141 | 142 | get_scaling_factors = turbo_jpeg.tjGetScalingFactors 143 | get_scaling_factors.argtypes = [POINTER(c_int)] 144 | get_scaling_factors.restype = POINTER(ScalingFactor) 145 | num_scaling_factors = c_int() 146 | scaling_factors = get_scaling_factors(byref(num_scaling_factors)) 147 | for i in range(num_scaling_factors.value): 148 | self.__scaling_factors.append( 149 | (scaling_factors[i].num, scaling_factors[i].denom)) 150 | 151 | def decode_header(self, jpeg_buf): 152 | """decodes JPEG header and returns image properties as a tuple. 153 | e.g. (width, height, jpeg_subsample, jpeg_colorspace) 154 | """ 155 | handle = self.__init_decompress() 156 | try: 157 | width = c_int() 158 | height = c_int() 159 | jpeg_subsample = c_int() 160 | jpeg_colorspace = c_int() 161 | jpeg_array = np.frombuffer(jpeg_buf, dtype=np.uint8) 162 | src_addr = self.__getaddr(jpeg_array) 163 | status = self.__decompress_header( 164 | handle, src_addr, jpeg_array.size, byref(width), byref(height), 165 | byref(jpeg_subsample), byref(jpeg_colorspace)) 166 | if status != 0: 167 | self.__report_error(handle) 168 | return (width.value, height.value, jpeg_subsample.value, jpeg_colorspace.value) 169 | finally: 170 | self.__destroy(handle) 171 | 172 | def imread(self, filename): 173 | return self.decode(open(filename, 'rb').read()) 174 | 175 | def decode(self, jpeg_buf, pixel_format=TJPF_BGR, scaling_factor=None, flags=0): 176 | """decodes JPEG memory buffer to numpy array.""" 177 | handle = self.__init_decompress() 178 | try: 179 | if scaling_factor is not None and \ 180 | scaling_factor not in self.__scaling_factors: 181 | raise ValueError('supported scaling factors are ' + 182 | str(self.__scaling_factors)) 183 | pixel_size = [3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4] 184 | width = c_int() 185 | height = c_int() 186 | jpeg_subsample = c_int() 187 | jpeg_colorspace = c_int() 188 | jpeg_array = np.frombuffer(jpeg_buf, dtype=np.uint8) 189 | src_addr = self.__getaddr(jpeg_array) 190 | status = self.__decompress_header( 191 | handle, src_addr, jpeg_array.size, byref(width), byref(height), 192 | byref(jpeg_subsample), byref(jpeg_colorspace)) 193 | if status != 0: 194 | self.__report_error(handle) 195 | scaled_width = width.value 196 | scaled_height = height.value 197 | if scaling_factor is not None: 198 | def get_scaled_value(dim, num, denom): 199 | return (dim * num + denom - 1) // denom 200 | 201 | scaled_width = get_scaled_value( 202 | scaled_width, scaling_factor[0], scaling_factor[1]) 203 | scaled_height = get_scaled_value( 204 | scaled_height, scaling_factor[0], scaling_factor[1]) 205 | img_array = np.empty( 206 | [scaled_height, scaled_width, pixel_size[pixel_format]], 207 | dtype=np.uint8) 208 | dest_addr = self.__getaddr(img_array) 209 | status = self.__decompress( 210 | handle, src_addr, jpeg_array.size, dest_addr, scaled_width, 211 | 0, scaled_height, pixel_format, flags) 212 | if status != 0: 213 | self.__report_error(handle) 214 | return img_array 215 | finally: 216 | self.__destroy(handle) 217 | 218 | def encode(self, img_array, quality=85, pixel_format=TJPF_BGR, jpeg_subsample=TJSAMP_422, flags=0): 219 | """encodes numpy array to JPEG memory buffer.""" 220 | handle = self.__init_compress() 221 | try: 222 | jpeg_buf = c_void_p() 223 | jpeg_size = c_ulong() 224 | height, width, _ = img_array.shape 225 | src_addr = self.__getaddr(img_array) 226 | status = self.__compress( 227 | handle, src_addr, width, img_array.strides[0], height, pixel_format, 228 | byref(jpeg_buf), byref(jpeg_size), jpeg_subsample, quality, flags) 229 | if status != 0: 230 | self.__report_error(handle) 231 | dest_buf = create_string_buffer(jpeg_size.value) 232 | memmove(dest_buf, jpeg_buf.value, jpeg_size.value) 233 | self.__free(jpeg_buf) 234 | return dest_buf.raw 235 | finally: 236 | self.__destroy(handle) 237 | 238 | def __report_error(self, handle): 239 | """reports error while error occurred""" 240 | if self.__get_error_code is not None: 241 | # using new error handling logic if possible 242 | if self.__get_error_code(handle) == TJERR_WARNING: 243 | warnings.warn(self.__get_error_string(handle)) 244 | return 245 | # fatal error occurred 246 | raise IOError(self.__get_error_string(handle)) 247 | 248 | def __get_error_string(self, handle): 249 | """returns error string""" 250 | if self.__get_error_str2 is not None: 251 | # using new interface if possible 252 | return self.__get_error_str2(handle).decode() 253 | # fallback to old interface 254 | return self.__get_error_str().decode() 255 | 256 | def __find_turbojpeg(self): 257 | """returns default turbojpeg library path if possible""" 258 | lib_path = find_library('turbojpeg') 259 | if lib_path is not None: 260 | return lib_path 261 | for lib_path in DEFAULT_LIB_PATHS[platform.system()]: 262 | if os.path.exists(lib_path): 263 | return lib_path 264 | if platform.system() == 'Linux' and 'LD_LIBRARY_PATH' in os.environ: 265 | ld_library_path = os.environ['LD_LIBRARY_PATH'] 266 | for path in ld_library_path.split(':'): 267 | lib_path = os.path.join(path, 'libturbojpeg.so.0') 268 | if os.path.exists(lib_path): 269 | return lib_path 270 | raise RuntimeError( 271 | 'Unable to locate turbojpeg library automatically. ' 272 | 'You may specify the turbojpeg library path manually.\n' 273 | 'e.g. jpeg = TurboJPEG(lib_path)') 274 | 275 | def __getaddr(self, nda): 276 | """returns the memory address for a given ndarray""" 277 | return cast(nda.__array_interface__['data'][0], POINTER(c_ubyte)) 278 | 279 | 280 | if __name__ == '__main__': 281 | jpeg = TurboJPEG() 282 | in_file = open('input.jpg', 'rb') 283 | img_array = jpeg.decode(in_file.read()) 284 | in_file.close() 285 | out_file = open('output.jpg', 'wb') 286 | out_file.write(jpeg.encode(img_array)) 287 | out_file.close() 288 | import cv2 289 | 290 | cv2.imshow('image', img_array) 291 | cv2.waitKey(0) 292 | -------------------------------------------------------------------------------- /utils/turbo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/utils/turbo/__init__.py -------------------------------------------------------------------------------- /utils/turbo/libturbojpeg.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/utils/turbo/libturbojpeg.so -------------------------------------------------------------------------------- /utils/turbo/turbo.py: -------------------------------------------------------------------------------- 1 | from utils.turbo.TurboJPEG import TurboJPEG 2 | 3 | reader = TurboJPEG() 4 | -------------------------------------------------------------------------------- /utils/turbo/turbojpeg.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/utils/turbo/turbojpeg.dll -------------------------------------------------------------------------------- /utils/visual_augmentation.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import random 4 | 5 | 6 | def pixel_jitter(src, p=0.5, max_=5.): 7 | src = src.astype(np.float32) 8 | pattern = (np.random.rand(src.shape[0], src.shape[1], src.shape[2]) - 0.5) * 2 * max_ 9 | img = src + pattern 10 | img[img < 0] = 0 11 | img[img > 255] = 255 12 | img = img.astype(np.uint8) 13 | return img 14 | 15 | 16 | def gray(src): 17 | g_img = cv2.cvtColor(src, cv2.COLOR_RGB2GRAY) 18 | src[:, :, 0] = g_img 19 | src[:, :, 1] = g_img 20 | src[:, :, 2] = g_img 21 | return src 22 | 23 | 24 | def swap_change(src): 25 | a = [0, 1, 2] 26 | k = random.sample(a, 3) 27 | res = src.copy() 28 | res[:, :, 0] = src[:, :, k[0]] 29 | res[:, :, 1] = src[:, :, k[1]] 30 | res[:, :, 2] = src[:, :, k[2]] 31 | return res 32 | 33 | 34 | def Img_dropout(src, max_pattern_ratio=0.05): 35 | pattern = np.ones_like(src) 36 | width_ratio = random.uniform(0, max_pattern_ratio) 37 | height_ratio = random.uniform(0, max_pattern_ratio) 38 | width = src.shape[1] 39 | height = src.shape[0] 40 | block_width = width * width_ratio 41 | block_height = height * height_ratio 42 | width_start = int(random.uniform(0, width - block_width)) 43 | width_end = int(width_start + block_width) 44 | height_start = int(random.uniform(0, height - block_height)) 45 | height_end = int(height_start + block_height) 46 | pattern[height_start:height_end, width_start:width_end, :] = 0 47 | img = src * pattern 48 | return img 49 | 50 | 51 | def blur_heatmap(src, ksize=(3, 3)): 52 | for i in range(src.shape[2]): 53 | src[:, :, i] = cv2.GaussianBlur(src[:, :, i], ksize, 0) 54 | amin, amax = src[:, :, i].min(), src[:, :, i].max() # 求最大最小值 55 | if amax > 0: 56 | src[:, :, i] = (src[:, :, i] - amin) / (amax - amin) # (矩阵元素-最小值)/(最大值-最小值) 57 | return src 58 | 59 | 60 | def blur(src, ksize=(3, 3)): 61 | for i in range(src.shape[2]): 62 | src[:, :, i] = cv2.GaussianBlur(src[:, :, i], ksize, 1.5) 63 | return src 64 | 65 | 66 | def adjust_contrast(image, factor): 67 | mean = image.mean(axis=0).mean(axis=0) 68 | return _clip((image - mean) * factor + mean) 69 | 70 | 71 | def adjust_brightness(image, delta): 72 | return _clip(image + delta * 255) 73 | 74 | 75 | def adjust_hue(image, delta): 76 | image[..., 0] = np.mod(image[..., 0] + delta * 180, 180) 77 | return image 78 | 79 | 80 | def adjust_saturation(image, factor): 81 | image[..., 1] = np.clip(image[..., 1] * factor, 0, 255) 82 | return image 83 | 84 | 85 | def _clip(image): 86 | return np.clip(image, 0, 255).astype(np.uint8) 87 | 88 | 89 | def _uniform(val_range): 90 | return np.random.uniform(val_range[0], val_range[1]) 91 | 92 | 93 | class ColorDistort(): 94 | 95 | def __init__(self, contrast_range=(0.8, 1.2), brightness_range=(-.2, .2), hue_range=(-0.1, 0.1), 96 | saturation_range=(0.8, 1.2)): 97 | self.contrast_range = contrast_range 98 | self.brightness_range = brightness_range 99 | self.hue_range = hue_range 100 | self.saturation_range = saturation_range 101 | 102 | def __call__(self, image): 103 | if self.contrast_range is not None: 104 | contrast_factor = _uniform(self.contrast_range) 105 | image = adjust_contrast(image, contrast_factor) 106 | if self.brightness_range is not None: 107 | brightness_delta = _uniform(self.brightness_range) 108 | image = adjust_brightness(image, brightness_delta) 109 | if self.hue_range is not None or self.saturation_range is not None: 110 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 111 | if self.hue_range is not None: 112 | hue_delta = _uniform(self.hue_range) 113 | image = adjust_hue(image, hue_delta) 114 | if self.saturation_range is not None: 115 | saturation_factor = _uniform(self.saturation_range) 116 | image = adjust_saturation(image, saturation_factor) 117 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 118 | return image 119 | 120 | 121 | class DsfdVisualAug(): 122 | pass 123 | -------------------------------------------------------------------------------- /utils/wing_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | from torch.nn.modules.loss import _Loss 4 | 5 | 6 | class WingLoss(_Loss): 7 | def __init__(self, width=10, curvature=2.0, reduction="mean"): 8 | super(WingLoss, self).__init__(reduction=reduction) 9 | self.width = width 10 | self.curvature = curvature 11 | 12 | def forward(self, prediction, target): 13 | return self.wing_loss(prediction, target, self.width, self.curvature, self.reduction) 14 | 15 | def wing_loss(self, prediction, target, width=10, curvature=2.0, reduction="mean"): 16 | diff_abs = (target - prediction).abs() 17 | loss = diff_abs.clone() 18 | idx_smaller = diff_abs < width 19 | idx_bigger = diff_abs >= width 20 | # loss[idx_smaller] = width * torch.log(1 + diff_abs[idx_smaller] / curvature) 21 | loss_smaller = width * torch.log(1 + diff_abs[idx_smaller] / curvature) 22 | C = width - width * math.log(1 + width / curvature) 23 | # loss[idx_bigger] = loss[idx_bigger] - C 24 | loss_biger = loss[idx_bigger] - C 25 | loss = torch.cat((loss_smaller, loss_biger), 0) 26 | if reduction == "sum": 27 | loss = loss.sum() 28 | if reduction == "mean": 29 | loss = loss.mean() 30 | return loss 31 | -------------------------------------------------------------------------------- /weights/empty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ainrichman/Peppa-Facial-Landmark-PyTorch/238063317fd31c4c21c5c43692e6a5d769970370/weights/empty --------------------------------------------------------------------------------