├── .gitignore ├── LICENSE ├── README.md ├── README_zh.md ├── __init__.py ├── anno_store ├── __init__.py ├── info └── wider_origin_anno.txt ├── dface ├── __init__.py ├── config.py ├── core │ ├── __init__.py │ ├── detect.py │ ├── image_reader.py │ ├── image_tools.py │ ├── imagedb.py │ ├── models.py │ ├── nms.py │ ├── resnet_inception_v2.py │ ├── roc.py │ ├── utils.py │ └── vision.py ├── prepare_data │ ├── __init__.py │ ├── assemble.py │ ├── assemble_onet_imglist.py │ ├── assemble_pnet_imglist.py │ ├── assemble_rnet_imglist.py │ ├── gen_Onet_train_data.py │ ├── gen_Pnet_train_data.py │ ├── gen_Rnet_train_data.py │ ├── gen_landmark_12.py │ ├── gen_landmark_24.py │ ├── gen_landmark_48.py │ ├── gen_landmark_net_48.py │ └── widerface_annotation_gen │ │ ├── __init__.py │ │ ├── transform.py │ │ ├── wider_face_train.mat │ │ └── wider_loader.py └── train_net │ ├── __init__.py │ ├── train.py │ ├── train_o_net.py │ ├── train_p_net.py │ └── train_r_net.py ├── environment-win64.yml ├── environment.yml ├── environment_osx.yaml ├── log ├── __init__.py └── info ├── model_store ├── __init__.py ├── info ├── onet_epoch.pt ├── pnet_epoch.pt └── rnet_epoch.pt ├── test.jpg └── test_image.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.pyc 3 | log/*.log 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017- Jin Kuaikuai(314127900@qq.com) 2 | All rights reserved. 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | APPENDIX: How to apply the Apache License to your work. 181 | 182 | To apply the Apache License to your work, attach the following 183 | boilerplate notice, with the fields enclosed by brackets "{}" 184 | replaced with your own identifying information. (Don't include 185 | the brackets!) The text should be enclosed in the appropriate 186 | comment syntax for the file format. We also recommend that a 187 | file or class name and description of purpose be included on the 188 | same "printed page" as the copyright notice for easier 189 | identification within third-party archives. 190 | 191 | Copyright Jin Kuaikuai 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. 204 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 | ----------------- 6 | # Dface • [![License](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/apache_2.svg)](https://opensource.org/licenses/Apache-2.0) 7 | 8 | 9 | | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | 10 | |-----------------|---------------------|------------------|-------------------| 11 | | [![Build Status](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg)](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg) | [![Build Status](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg)](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg) | [![Build Status](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg)](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg) | [![Build Status](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg)](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg) | 12 | 13 | 14 | **Free and open source face detection. Based on the MTCNN** 15 | 16 | [Official Website(https://dface.tech)](https://dface.tech) 17 | 18 | **We also provide fully face recognize SDK, Contains tracking, detection, face recognition, face anti-spoofing and so on. See [dface.tech](https://dface.tech) for details.** 19 | ![DFACE SDK](https://media.giphy.com/media/v1.Y2lkPTc5MGI3NjExYXI5aGt5NDFxamk2cTdkZWlpMml1bDhqNnAyc2lyOXV3bnF2Y3RnYyZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/MkHTo4WQ5VuPzegTFV/giphy-downsized.gif) 20 | 21 | 22 | **Dface** is an open source software for face detection and recognition. All features implemented by the **[pytorch](https://github.com/pytorch/pytorch)** (the facebook deeplearning framework). With PyTorch, we use a technique called reverse-mode auto-differentiation, which allows developer to change the way your network behaves arbitrarily with zero lag or overhead. 23 | DFace inherit these advanced characteristic, that make it dynamic and ease code review. 24 | 25 | DFace support GPU acceleration with NVIDIA cuda. We highly recommend you use the linux GPU version.It's very fast and extremely realtime. 26 | 27 | Our inspiration comes from several research papers on this topic, as well as current and past work such as [Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878) and face recognition topic [FaceNet: A Unified Embedding for Face Recognition and Clustering](https://arxiv.org/abs/1503.03832) 28 | 29 | **MTCNN Structure**   30 | 31 | ![Pnet](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/pnet.jpg) 32 | ![Rnet](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/rnet.jpg) 33 | ![Onet](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/onet.jpg) 34 | 35 | **If you want to contribute to DFace, please review the CONTRIBUTING.md in the project.We use [Slack](https://dfaceio.slack.com/) for tracking requests and bugs. Also you can following the QQ group 681403076 or my wechat jinkuaikuai005** 36 | 37 | 38 | ## TODO(contribute to DFace) 39 | - Based on cener loss or triplet loss implement the face conpare. Recommended Model is ResNet inception v2. Refer this [Paper](https://arxiv.org/abs/1503.03832) and [FaceNet](https://github.com/davidsandberg/facenet) 40 | - Face Anti-Spoofing, distinguish from face light and texture。Recomend with the LBP algorithm and SVM. 41 | - 3D mask Anti-Spoofing. 42 | - Mobile first with caffe2 and c++. 43 | - Tensor rt migration. 44 | - Docker support, gpu version 45 | 46 | ## Installation 47 | 48 | DFace has two major module, detection and recognition.In these two, We provide all tutorials about how to train a model and running. 49 | First setting a pytorch and cv2. We suggest Anaconda to make a virtual and independent python envirment.**If you want to train on GPU,please install Nvidia cuda and cudnn.** 50 | 51 | ### Requirements 52 | * cuda 8.0 53 | * anaconda 54 | * pytorch 55 | * torchvision 56 | * cv2 57 | * matplotlib 58 | 59 | 60 | ```shell 61 | git clone https://github.com/kuaikuaikim/dface.git 62 | ``` 63 | 64 | 65 | Also we provide a anaconda environment dependency list called environment.yml (windows please use environment-win64.yml,Mac environment_osx.yaml) in the root path. 66 | You can create your DFace environment very easily. 67 | ```shell 68 | cd DFace 69 | 70 | conda env create -f path/to/environment.yml 71 | ``` 72 | 73 | Add Dface to your local python path 74 | 75 | ```shell 76 | export PYTHONPATH=$PYTHONPATH:{your local DFace root path} 77 | ``` 78 | 79 | 80 | ### Face Detetion and Recognition 81 | 82 | If you are interested in how to train a mtcnn model, you can follow next step. 83 | 84 | #### Train mtcnn Model 85 | MTCNN have three networks called **PNet**, **RNet** and **ONet**.So we should train it on three stage, and each stage depend on previous network which will generate train data to feed current train net, also propel the minimum loss between two networks. 86 | Please download the train face **datasets** before your training. We use **[WIDER FACE](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/)** and **[CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html)** .WIDER FACE is used for training face classification and face bounding box, also CelebA is used for face landmarks. The original wider face annotation file is matlab format, you must transform it to text. I have put the transformed annotation text file into [anno_store/wider_origin_anno.txt](https://github.com/kuaikuaikim/DFace/blob/master/anno_store/wider_origin_anno.txt). This file is related to the following parameter called --anno_file. 87 | 88 | 89 | * Create the DFace train data temporary folder, this folder is involved in the following parameter --dface_traindata_store 90 | 91 | ```shell 92 | mkdir {your dface traindata folder} 93 | ``` 94 | 95 | 96 | * Generate PNet Train data and annotation file 97 | 98 | ```shell 99 | python dface/prepare_data/gen_Pnet_train_data.py --prefix_path {annotation file image prefix path, just your local wider face images folder} --dface_traindata_store {dface train data temporary folder you made before } --anno_file {wider face original combined annotation file, default anno_store/wider_origin_anno.txt} 100 | ``` 101 | * Assemble annotation file and shuffle it 102 | 103 | ```shell 104 | python dface/prepare_data/assemble_pnet_imglist.py 105 | ``` 106 | * Train PNet model 107 | 108 | ```shell 109 | python dface/train_net/train_p_net.py 110 | ``` 111 | * Generate RNet Train data and annotation file 112 | 113 | ```shell 114 | python dface/prepare_data/gen_Rnet_train_data.py --prefix_path {annotation file image prefix path, just your local wider face images folder} --dface_traindata_store {dface train data temporary folder you made before } --anno_file {wider face original combined annotation file, default anno_store/wider_origin_anno.txt} --pmodel_file {your PNet model file trained before} 115 | ``` 116 | * Assemble annotation file and shuffle it 117 | 118 | ```shell 119 | python dface/prepare_data/assemble_rnet_imglist.py 120 | ``` 121 | * Train RNet model 122 | 123 | ```shell 124 | python dface/train_net/train_r_net.py 125 | ``` 126 | * Generate ONet Train data and annotation file 127 | 128 | ```shell 129 | python dface/prepare_data/gen_Onet_train_data.py --prefix_path {annotation file image prefix path, just your local wider face images folder} --dface_traindata_store {dface train data temporary folder you made before } --anno_file {wider face original combined annotation file, default anno_store/wider_origin_anno.txt} --pmodel_file {your PNet model file trained before} --rmodel_file {your RNet model file trained before} 130 | ``` 131 | * Generate ONet Train landmarks data 132 | 133 | ```shell 134 | python dface/prepare_data/gen_landmark_48.py 135 | ``` 136 | * Assemble annotation file and shuffle it 137 | 138 | ```shell 139 | python dface/prepare_data/assemble_onet_imglist.py 140 | ``` 141 | * Train ONet model 142 | 143 | ```shell 144 | python dface/train_net/train_o_net.py 145 | ``` 146 | 147 | #### Test face detection 148 | **If you don't want to train,i have put onet_epoch.pt,pnet_epoch.pt,rnet_epoch.pt in model_store folder.You just try test_image.py** 149 | 150 | ```shell 151 | python test_image.py 152 | ``` 153 | 154 | 155 | ## Demo   156 | 157 | ![mtcnn](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/dface_demoall.PNG) 158 | 159 | 160 | ### QQ交流群 161 | ![](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/dfaceqqsm.png) 162 | 163 | 164 | #### 681403076 165 | 166 | #### 本人微信(wechat) 167 | ##### cobbestne 168 | 169 | 170 | ## License 171 | 172 | [Apache License 2.0](LICENSE) 173 | 174 | 175 | ## Reference 176 | 177 | * [OpenFace](https://github.com/cmusatyalab/openface) 178 | -------------------------------------------------------------------------------- /README_zh.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 | ----------------- 6 | # DFace (Deeplearning Face)• [![License](http://pic.dface.io/apache2.svg)](https://opensource.org/licenses/Apache-2.0) 7 | 8 | 9 | | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | 10 | |-----------------|---------------------|------------------|-------------------| 11 | | [![Build Status](http://pic.dface.io/pass.svg)](http://pic.dface.io/pass.svg) | [![Build Status](http://pic.dface.io/pass.svg)](http://pic.dface.io/pass.svg) | [![Build Status](http://pic.dface.io/pass.svg)](http://pic.dface.io/pass.svg) | [![Build Status](http://pic.dface.io/pass.svg)](http://pic.dface.io/pass.svg) | 12 | 13 | 14 | **基于多任务卷积网络(MTCNN)和Center-Loss的多人实时人脸检测和人脸识别系统。** 15 | 16 | 17 | [Slack 聊天组](https://dfaceio.slack.com/) 18 | 19 | 20 | 21 | **DFace** 是个开源的深度学习人脸检测和人脸识别系统。所有功能都采用 **[pytorch](https://github.com/pytorch/pytorch)** 框架开发。pytorch是一个由facebook开发的深度学习框架,它包含了一些比较有趣的高级特性,例如自动求导,动态构图等。DFace天然的继承了这些优点,使得它的训练过程可以更加简单方便,并且实现的代码可以更加清晰易懂。 22 | DFace可以利用CUDA来支持GPU加速模式。我们建议尝试linux GPU这种模式,它几乎可以实现实时的效果。 23 | 所有的灵感都来源于学术界最近的一些研究成果,例如 [Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878) 和 [FaceNet: A Unified Embedding for Face Recognition and Clustering](https://arxiv.org/abs/1503.03832) 24 | 25 | 26 | **MTCNN 结构**   27 | 28 | ![mtcnn](http://affluent.oss-cn-hangzhou.aliyuncs.com/html/images/mtcnn_st.png) 29 | 30 | 31 | ** 如果你对DFace感兴趣并且想参与到这个项目中, 以下TODO是一些需要实现的功能,我定期会更新,它会实时展示一些需要开发的清单。提交你的fork request,我会用issues来跟踪和反馈所有的问题。也可以加DFace的官方Q群 681403076 也可以加本人微信 jinkuaikuai005 ** 32 | 33 | ### TODO(需要开发的功能) 34 | - 基于center loss 或者triplet loss原理开发人脸对比功能,模型采用ResNet inception v2. 该功能能够比较两张人脸图片的相似性。具体可以参考 [Paper](https://arxiv.org/abs/1503.03832)和[FaceNet](https://github.com/davidsandberg/facenet) 35 | - 反欺诈功能,根据光线,质地等人脸特性来防止照片攻击,视频攻击,回放攻击等。具体可参考LBP算法和SVM训练模型。 36 | - 3D人脸反欺诈。 37 | - mobile移植,根据ONNX标准把pytorch训练好的模型迁移到caffe2,一些numpy算法改用c++实现。 38 | - Tensor RT移植,高并发。 39 | - Docker支持,gpu版 40 | 41 | ## 安装 42 | DFace主要有两大模块,人脸检测和人脸识别。我会提供所有模型训练和运行的详细步骤。你首先需要构建一个pytorch和cv2的python环境,我推荐使用Anaconda来设置一个独立的虚拟环境。目前作者倾向于Linux Ubuntu安装环境。感谢山东一位网友提供windows DFace安装体验,windos安装教程具体 43 | 可参考他的[博客](http://www.alearner.top/index.php/2017/12/23/dface-pytorch-win64-gpu) 44 | 45 | 46 | ### 依赖 47 | * cuda 8.0 48 | * anaconda 49 | * pytorch 50 | * torchvision 51 | * cv2 52 | * matplotlib 53 | 54 | ```shell 55 | git clone https://github.com/kuaikuaikim/DFace.git 56 | ``` 57 | 58 | 在这里我提供了一个anaconda的环境依赖文件environment.yml (windows请用environment-win64.yml),它能方便你构建自己的虚拟环境。 59 | 60 | ```shell 61 | cd dface 62 | 63 | conda env create -f environment.yml 64 | ``` 65 | 66 | 添加python搜索模块路径 67 | 68 | ```shell 69 | export PYTHONPATH=$PYTHONPATH:{your local DFace root path} 70 | ``` 71 | 72 | 73 | 74 | ### 人脸识别和检测 75 | 76 | 如果你对mtcnn模型感兴趣,以下过程可能会帮助到你。 77 | 78 | #### 训练mtcnn模型 79 | 80 | MTCNN主要有三个网络,叫做**PNet**, **RNet** 和 **ONet**。因此我们的训练过程也需要分三步先后进行。为了更好的实现效果,当前被训练的网络都将依赖于上一个训练好的网络来生成数据。所有的人脸数据集都来自 **[WIDER FACE](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/)** 和 **[CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html)**。WIDER FACE仅提供了大量的人脸边框定位数据,而CelebA包含了人脸关键点定位数据。以下训练除了 生成ONet的人脸关键点训练数据和标注文件 该步骤使用CelebA数据集,其他一律使用WIDER FACE。如果使用wider face的 wider_face_train.mat 注解文件需要转换成txt格式的,我这里用h5py写了个 [转换脚本](https://github.com/kuaikuaikim/DFace/blob/master/dface/prepare_data/widerface_annotation_gen/transform.py). 这里我提供一个已经转换好的wider face注解文件 [anno_store/wider_origin_anno.txt](https://github.com/kuaikuaikim/DFace/blob/master/anno_store/info/wider_origin_anno.txt), 以下训练过程参数名--anno_file默认就是使用该转换好的注解文件。 81 | 82 | 83 | * 创建 dface 训练数据临时目录,对应于以下所有的参数名 --dface_traindata_store 84 | ```shell 85 | mkdir {your dface traindata folder} 86 | ``` 87 | 88 | 89 | * 生成PNet训练数据和标注文件 90 | 91 | ```shell 92 | python dface/prepare_data/gen_Pnet_train_data.py --prefix_path {注解文件中图片的目录前缀,就是wider face图片所在目录} --dface_traindata_store {之前创建的dface训练数据临时目录} --anno_file {wider face 注解文件,可以不填,默认使用anno_store/wider_origin_anno.txt} 93 | ``` 94 | * 乱序合并标注文件 95 | 96 | ```shell 97 | python dface/prepare_data/assemble_pnet_imglist.py 98 | ``` 99 | 100 | * 训练PNet模型 101 | 102 | 103 | ```shell 104 | python dface/train_net/train_p_net.py 105 | ``` 106 | * 生成RNet训练数据和标注文件 107 | 108 | ```shell 109 | python dface/prepare_data/gen_Rnet_train_data.py --prefix_path {注解文件中图片的目录前缀,就是wider face图片所在目录} --dface_traindata_store {之前创建的dface训练数据临时目录} --anno_file {wider face 注解文件,可以不填,默认使用anno_store/wider_origin_anno.txt} --pmodel_file {之前训练的Pnet模型文件} 110 | ``` 111 | * 乱序合并标注文件 112 | 113 | ```shell 114 | python dface/prepare_data/assemble_rnet_imglist.py 115 | ``` 116 | 117 | * 训练RNet模型 118 | 119 | ```shell 120 | python dface/train_net/train_r_net.py 121 | ``` 122 | 123 | * 生成ONet训练数据和标注文件 124 | 125 | ```shell 126 | python dface/prepare_data/gen_Onet_train_data.py --prefix_path {注解文件中图片的目录前缀,就是wider face图片所在目录} --dface_traindata_store {之前创建的dface训练数据临时目录} --anno_file {wider face 注解文件,可以不填,默认使用anno_store/wider_origin_anno.txt} --pmodel_file {之前训练的Pnet模型文件} --rmodel_file {之前训练的Rnet模型文件} 127 | ``` 128 | 129 | * 生成ONet的人脸五官关键点训练数据和标注文件 130 | 131 | ```shell 132 | python dface/prepare_data/gen_landmark_48.py 133 | ``` 134 | 135 | * 乱序合并标注文件(包括人脸五官关键点) 136 | 137 | ```shell 138 | python dface/prepare_data/assemble_onet_imglist.py 139 | ``` 140 | 141 | * 训练ONet模型 142 | 143 | ```shell 144 | python dface/train_net/train_o_net.py 145 | ``` 146 | 147 | #### 测试人脸检测 148 | ```shell 149 | python test_image.py 150 | ``` 151 | 152 | ### 人脸对比 153 | 154 | @TODO 根据center loss实现人脸识别 155 | 156 | ## 测试效果 157 | ![mtcnn](http://affluent.oss-cn-hangzhou.aliyuncs.com/html/images/dface_demoall.PNG) 158 | 159 | 160 | ### QQ交流群(模型获取请加群) 161 | 162 | #### 681403076 163 | 164 | ![](http://affluent.oss-cn-hangzhou.aliyuncs.com/html/images/dfaceqqsm.png) 165 | 166 | #### 本人微信 167 | 168 | ##### jinkuaikuai005 169 | 170 | ![](http://affluent.oss-cn-hangzhou.aliyuncs.com/html/images/perqr.jpg) 171 | 172 | 173 | 174 | ## License 175 | 176 | [Apache License 2.0](LICENSE) 177 | 178 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/__init__.py -------------------------------------------------------------------------------- /anno_store/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/anno_store/__init__.py -------------------------------------------------------------------------------- /anno_store/info: -------------------------------------------------------------------------------- 1 | This directory store the annotation files of train data -------------------------------------------------------------------------------- /dface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/__init__.py -------------------------------------------------------------------------------- /dface/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | MODEL_STORE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/model_store" 5 | 6 | 7 | ANNO_STORE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/anno_store" 8 | 9 | 10 | LOG_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/log" 11 | 12 | 13 | USE_CUDA = True 14 | 15 | 16 | TRAIN_BATCH_SIZE = 512 17 | 18 | TRAIN_LR = 0.01 19 | 20 | END_EPOCH = 10 21 | 22 | 23 | PNET_POSTIVE_ANNO_FILENAME = "pos_12.txt" 24 | PNET_NEGATIVE_ANNO_FILENAME = "neg_12.txt" 25 | PNET_PART_ANNO_FILENAME = "part_12.txt" 26 | PNET_LANDMARK_ANNO_FILENAME = "landmark_12.txt" 27 | 28 | 29 | RNET_POSTIVE_ANNO_FILENAME = "pos_24.txt" 30 | RNET_NEGATIVE_ANNO_FILENAME = "neg_24.txt" 31 | RNET_PART_ANNO_FILENAME = "part_24.txt" 32 | RNET_LANDMARK_ANNO_FILENAME = "landmark_24.txt" 33 | 34 | 35 | ONET_POSTIVE_ANNO_FILENAME = "pos_48.txt" 36 | ONET_NEGATIVE_ANNO_FILENAME = "neg_48.txt" 37 | ONET_PART_ANNO_FILENAME = "part_48.txt" 38 | ONET_LANDMARK_ANNO_FILENAME = "landmark_48.txt" 39 | 40 | PNET_TRAIN_IMGLIST_FILENAME = "imglist_anno_12.txt" 41 | RNET_TRAIN_IMGLIST_FILENAME = "imglist_anno_24.txt" 42 | ONET_TRAIN_IMGLIST_FILENAME = "imglist_anno_48.txt" -------------------------------------------------------------------------------- /dface/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/core/__init__.py -------------------------------------------------------------------------------- /dface/core/detect.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import time 3 | import numpy as np 4 | import torch 5 | from torch.autograd.variable import Variable 6 | from dface.core.models import PNet,RNet,ONet 7 | import dface.core.utils as utils 8 | import dface.core.image_tools as image_tools 9 | 10 | 11 | def create_mtcnn_net(p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True): 12 | 13 | pnet, rnet, onet = None, None, None 14 | 15 | if p_model_path is not None: 16 | pnet = PNet(use_cuda=use_cuda) 17 | if(use_cuda): 18 | pnet.load_state_dict(torch.load(p_model_path)) 19 | pnet.cuda() 20 | else: 21 | # forcing all GPU tensors to be in CPU while loading 22 | pnet.load_state_dict(torch.load(p_model_path, map_location=lambda storage, loc: storage)) 23 | pnet.eval() 24 | 25 | if r_model_path is not None: 26 | rnet = RNet(use_cuda=use_cuda) 27 | if (use_cuda): 28 | rnet.load_state_dict(torch.load(r_model_path)) 29 | rnet.cuda() 30 | else: 31 | rnet.load_state_dict(torch.load(r_model_path, map_location=lambda storage, loc: storage)) 32 | rnet.eval() 33 | 34 | if o_model_path is not None: 35 | onet = ONet(use_cuda=use_cuda) 36 | if (use_cuda): 37 | onet.load_state_dict(torch.load(o_model_path)) 38 | onet.cuda() 39 | else: 40 | onet.load_state_dict(torch.load(o_model_path, map_location=lambda storage, loc: storage)) 41 | onet.eval() 42 | 43 | return pnet,rnet,onet 44 | 45 | 46 | 47 | 48 | class MtcnnDetector(object): 49 | """ 50 | P,R,O net face detection and landmarks align 51 | """ 52 | def __init__(self, 53 | pnet = None, 54 | rnet = None, 55 | onet = None, 56 | min_face_size=12, 57 | stride=2, 58 | threshold=[0.6, 0.7, 0.7], 59 | scale_factor=0.709, 60 | ): 61 | 62 | self.pnet_detector = pnet 63 | self.rnet_detector = rnet 64 | self.onet_detector = onet 65 | self.min_face_size = min_face_size 66 | self.stride=stride 67 | self.thresh = threshold 68 | self.scale_factor = scale_factor 69 | 70 | 71 | def unique_image_format(self,im): 72 | if not isinstance(im,np.ndarray): 73 | if im.mode == 'I': 74 | im = np.array(im, np.int32, copy=False) 75 | elif im.mode == 'I;16': 76 | im = np.array(im, np.int16, copy=False) 77 | else: 78 | im = np.asarray(im) 79 | return im 80 | 81 | def square_bbox(self, bbox): 82 | """ 83 | convert bbox to square 84 | Parameters: 85 | ---------- 86 | bbox: numpy array , shape n x m 87 | input bbox 88 | Returns: 89 | ------- 90 | square bbox 91 | """ 92 | square_bbox = bbox.copy() 93 | 94 | h = bbox[:, 3] - bbox[:, 1] + 1 95 | w = bbox[:, 2] - bbox[:, 0] + 1 96 | l = np.maximum(h,w) 97 | square_bbox[:, 0] = bbox[:, 0] + w*0.5 - l*0.5 98 | square_bbox[:, 1] = bbox[:, 1] + h*0.5 - l*0.5 99 | 100 | square_bbox[:, 2] = square_bbox[:, 0] + l - 1 101 | square_bbox[:, 3] = square_bbox[:, 1] + l - 1 102 | return square_bbox 103 | 104 | 105 | def generate_bounding_box(self, map, reg, scale, threshold): 106 | """ 107 | generate bbox from feature map 108 | Parameters: 109 | ---------- 110 | map: numpy array , n x m x 1 111 | detect score for each position 112 | reg: numpy array , n x m x 4 113 | bbox 114 | scale: float number 115 | scale of this detection 116 | threshold: float number 117 | detect threshold 118 | Returns: 119 | ------- 120 | bbox array 121 | """ 122 | stride = 2 123 | cellsize = 12 124 | 125 | t_index = np.where(map > threshold) 126 | 127 | # find nothing 128 | if t_index[0].size == 0: 129 | return np.array([]) 130 | 131 | dx1, dy1, dx2, dy2 = [reg[0, t_index[0], t_index[1], i] for i in range(4)] 132 | reg = np.array([dx1, dy1, dx2, dy2]) 133 | 134 | # lefteye_dx, lefteye_dy, righteye_dx, righteye_dy, nose_dx, nose_dy, \ 135 | # leftmouth_dx, leftmouth_dy, rightmouth_dx, rightmouth_dy = [landmarks[0, t_index[0], t_index[1], i] for i in range(10)] 136 | # 137 | # landmarks = np.array([lefteye_dx, lefteye_dy, righteye_dx, righteye_dy, nose_dx, nose_dy, leftmouth_dx, leftmouth_dy, rightmouth_dx, rightmouth_dy]) 138 | 139 | 140 | 141 | score = map[t_index[0], t_index[1], 0] 142 | boundingbox = np.vstack([np.round((stride * t_index[1]) / scale), 143 | np.round((stride * t_index[0]) / scale), 144 | np.round((stride * t_index[1] + cellsize) / scale), 145 | np.round((stride * t_index[0] + cellsize) / scale), 146 | score, 147 | reg, 148 | # landmarks 149 | ]) 150 | 151 | return boundingbox.T 152 | 153 | 154 | def resize_image(self, img, scale): 155 | """ 156 | resize image and transform dimention to [batchsize, channel, height, width] 157 | Parameters: 158 | ---------- 159 | img: numpy array , height x width x channel 160 | input image, channels in BGR order here 161 | scale: float number 162 | scale factor of resize operation 163 | Returns: 164 | ------- 165 | transformed image tensor , 1 x channel x height x width 166 | """ 167 | height, width, channels = img.shape 168 | new_height = int(height * scale) # resized new height 169 | new_width = int(width * scale) # resized new width 170 | new_dim = (new_width, new_height) 171 | img_resized = cv2.resize(img, new_dim, interpolation=cv2.INTER_LINEAR) # resized image 172 | return img_resized 173 | 174 | 175 | def pad(self, bboxes, w, h): 176 | """ 177 | pad the the boxes 178 | Parameters: 179 | ---------- 180 | bboxes: numpy array, n x 5 181 | input bboxes 182 | w: float number 183 | width of the input image 184 | h: float number 185 | height of the input image 186 | Returns : 187 | ------ 188 | dy, dx : numpy array, n x 1 189 | start point of the bbox in target image 190 | edy, edx : numpy array, n x 1 191 | end point of the bbox in target image 192 | y, x : numpy array, n x 1 193 | start point of the bbox in original image 194 | ex, ex : numpy array, n x 1 195 | end point of the bbox in original image 196 | tmph, tmpw: numpy array, n x 1 197 | height and width of the bbox 198 | """ 199 | 200 | tmpw = (bboxes[:, 2] - bboxes[:, 0] + 1).astype(np.int32) 201 | tmph = (bboxes[:, 3] - bboxes[:, 1] + 1).astype(np.int32) 202 | numbox = bboxes.shape[0] 203 | 204 | dx = np.zeros((numbox, )) 205 | dy = np.zeros((numbox, )) 206 | edx, edy = tmpw.copy()-1, tmph.copy()-1 207 | 208 | x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3] 209 | 210 | tmp_index = np.where(ex > w-1) 211 | edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index] 212 | ex[tmp_index] = w - 1 213 | 214 | tmp_index = np.where(ey > h-1) 215 | edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index] 216 | ey[tmp_index] = h - 1 217 | 218 | tmp_index = np.where(x < 0) 219 | dx[tmp_index] = 0 - x[tmp_index] 220 | x[tmp_index] = 0 221 | 222 | tmp_index = np.where(y < 0) 223 | dy[tmp_index] = 0 - y[tmp_index] 224 | y[tmp_index] = 0 225 | 226 | return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] 227 | return_list = [item.astype(np.int32) for item in return_list] 228 | 229 | return return_list 230 | 231 | 232 | def detect_pnet(self, im): 233 | """Get face candidates through pnet 234 | 235 | Parameters: 236 | ---------- 237 | im: numpy array 238 | input image array 239 | 240 | Returns: 241 | ------- 242 | boxes: numpy array 243 | detected boxes before calibration 244 | boxes_align: numpy array 245 | boxes after calibration 246 | """ 247 | 248 | # im = self.unique_image_format(im) 249 | 250 | h, w, c = im.shape 251 | net_size = 12 252 | 253 | current_scale = float(net_size) / self.min_face_size # find initial scale 254 | im_resized = self.resize_image(im, current_scale) 255 | current_height, current_width, _ = im_resized.shape 256 | 257 | # fcn 258 | all_boxes = list() 259 | while min(current_height, current_width) > net_size: 260 | feed_imgs = [] 261 | image_tensor = image_tools.convert_image_to_tensor(im_resized) 262 | feed_imgs.append(image_tensor) 263 | feed_imgs = torch.stack(feed_imgs) 264 | feed_imgs = Variable(feed_imgs) 265 | 266 | 267 | if self.pnet_detector.use_cuda: 268 | feed_imgs = feed_imgs.cuda() 269 | 270 | cls_map, reg = self.pnet_detector(feed_imgs) 271 | 272 | cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(cls_map.cpu()) 273 | reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu()) 274 | # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu()) 275 | 276 | 277 | boxes = self.generate_bounding_box(cls_map_np[ 0, :, :], reg_np, current_scale, self.thresh[0]) 278 | 279 | current_scale *= self.scale_factor 280 | im_resized = self.resize_image(im, current_scale) 281 | current_height, current_width, _ = im_resized.shape 282 | 283 | if boxes.size == 0: 284 | continue 285 | keep = utils.nms(boxes[:, :5], 0.5, 'Union') 286 | boxes = boxes[keep] 287 | all_boxes.append(boxes) 288 | 289 | if len(all_boxes) == 0: 290 | return None, None 291 | 292 | all_boxes = np.vstack(all_boxes) 293 | 294 | # merge the detection from first stage 295 | keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union') 296 | all_boxes = all_boxes[keep] 297 | # boxes = all_boxes[:, :5] 298 | 299 | bw = all_boxes[:, 2] - all_boxes[:, 0] + 1 300 | bh = all_boxes[:, 3] - all_boxes[:, 1] + 1 301 | 302 | # landmark_keep = all_boxes[:, 9:].reshape((5,2)) 303 | 304 | 305 | boxes = np.vstack([all_boxes[:,0], 306 | all_boxes[:,1], 307 | all_boxes[:,2], 308 | all_boxes[:,3], 309 | all_boxes[:,4], 310 | # all_boxes[:, 0] + all_boxes[:, 9] * bw, 311 | # all_boxes[:, 1] + all_boxes[:,10] * bh, 312 | # all_boxes[:, 0] + all_boxes[:, 11] * bw, 313 | # all_boxes[:, 1] + all_boxes[:, 12] * bh, 314 | # all_boxes[:, 0] + all_boxes[:, 13] * bw, 315 | # all_boxes[:, 1] + all_boxes[:, 14] * bh, 316 | # all_boxes[:, 0] + all_boxes[:, 15] * bw, 317 | # all_boxes[:, 1] + all_boxes[:, 16] * bh, 318 | # all_boxes[:, 0] + all_boxes[:, 17] * bw, 319 | # all_boxes[:, 1] + all_boxes[:, 18] * bh 320 | ]) 321 | 322 | boxes = boxes.T 323 | 324 | align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw 325 | align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh 326 | align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw 327 | align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh 328 | 329 | # refine the boxes 330 | boxes_align = np.vstack([ align_topx, 331 | align_topy, 332 | align_bottomx, 333 | align_bottomy, 334 | all_boxes[:, 4], 335 | # align_topx + all_boxes[:,9] * bw, 336 | # align_topy + all_boxes[:,10] * bh, 337 | # align_topx + all_boxes[:,11] * bw, 338 | # align_topy + all_boxes[:,12] * bh, 339 | # align_topx + all_boxes[:,13] * bw, 340 | # align_topy + all_boxes[:,14] * bh, 341 | # align_topx + all_boxes[:,15] * bw, 342 | # align_topy + all_boxes[:,16] * bh, 343 | # align_topx + all_boxes[:,17] * bw, 344 | # align_topy + all_boxes[:,18] * bh, 345 | ]) 346 | boxes_align = boxes_align.T 347 | 348 | return boxes, boxes_align 349 | 350 | def detect_rnet(self, im, dets): 351 | """Get face candidates using rnet 352 | 353 | Parameters: 354 | ---------- 355 | im: numpy array 356 | input image array 357 | dets: numpy array 358 | detection results of pnet 359 | 360 | Returns: 361 | ------- 362 | boxes: numpy array 363 | detected boxes before calibration 364 | boxes_align: numpy array 365 | boxes after calibration 366 | """ 367 | h, w, c = im.shape 368 | 369 | if dets is None: 370 | return None,None 371 | 372 | dets = self.square_bbox(dets) 373 | dets[:, 0:4] = np.round(dets[:, 0:4]) 374 | 375 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) 376 | num_boxes = dets.shape[0] 377 | 378 | ''' 379 | # helper for setting RNet batch size 380 | batch_size = self.rnet_detector.batch_size 381 | ratio = float(num_boxes) / batch_size 382 | if ratio > 3 or ratio < 0.3: 383 | print "You may need to reset RNet batch size if this info appears frequently, \ 384 | face candidates:%d, current batch_size:%d"%(num_boxes, batch_size) 385 | ''' 386 | 387 | # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) 388 | cropped_ims_tensors = [] 389 | for i in range(num_boxes): 390 | tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) 391 | tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = im[y[i]:ey[i]+1, x[i]:ex[i]+1, :] 392 | crop_im = cv2.resize(tmp, (24, 24)) 393 | crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) 394 | # cropped_ims_tensors[i, :, :, :] = crop_im_tensor 395 | cropped_ims_tensors.append(crop_im_tensor) 396 | feed_imgs = Variable(torch.stack(cropped_ims_tensors)) 397 | 398 | if self.rnet_detector.use_cuda: 399 | feed_imgs = feed_imgs.cuda() 400 | 401 | cls_map, reg = self.rnet_detector(feed_imgs) 402 | 403 | cls_map = cls_map.cpu().data.numpy() 404 | reg = reg.cpu().data.numpy() 405 | # landmark = landmark.cpu().data.numpy() 406 | 407 | 408 | keep_inds = np.where(cls_map > self.thresh[1])[0] 409 | 410 | if len(keep_inds) > 0: 411 | boxes = dets[keep_inds] 412 | cls = cls_map[keep_inds] 413 | reg = reg[keep_inds] 414 | # landmark = landmark[keep_inds] 415 | else: 416 | return None, None 417 | 418 | keep = utils.nms(boxes, 0.7) 419 | 420 | if len(keep) == 0: 421 | return None, None 422 | 423 | keep_cls = cls[keep] 424 | keep_boxes = boxes[keep] 425 | keep_reg = reg[keep] 426 | # keep_landmark = landmark[keep] 427 | 428 | 429 | bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 430 | bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 431 | 432 | 433 | boxes = np.vstack([ keep_boxes[:,0], 434 | keep_boxes[:,1], 435 | keep_boxes[:,2], 436 | keep_boxes[:,3], 437 | keep_cls[:,0], 438 | # keep_boxes[:,0] + keep_landmark[:, 0] * bw, 439 | # keep_boxes[:,1] + keep_landmark[:, 1] * bh, 440 | # keep_boxes[:,0] + keep_landmark[:, 2] * bw, 441 | # keep_boxes[:,1] + keep_landmark[:, 3] * bh, 442 | # keep_boxes[:,0] + keep_landmark[:, 4] * bw, 443 | # keep_boxes[:,1] + keep_landmark[:, 5] * bh, 444 | # keep_boxes[:,0] + keep_landmark[:, 6] * bw, 445 | # keep_boxes[:,1] + keep_landmark[:, 7] * bh, 446 | # keep_boxes[:,0] + keep_landmark[:, 8] * bw, 447 | # keep_boxes[:,1] + keep_landmark[:, 9] * bh, 448 | ]) 449 | 450 | align_topx = keep_boxes[:,0] + keep_reg[:,0] * bw 451 | align_topy = keep_boxes[:,1] + keep_reg[:,1] * bh 452 | align_bottomx = keep_boxes[:,2] + keep_reg[:,2] * bw 453 | align_bottomy = keep_boxes[:,3] + keep_reg[:,3] * bh 454 | 455 | boxes_align = np.vstack([align_topx, 456 | align_topy, 457 | align_bottomx, 458 | align_bottomy, 459 | keep_cls[:, 0], 460 | # align_topx + keep_landmark[:, 0] * bw, 461 | # align_topy + keep_landmark[:, 1] * bh, 462 | # align_topx + keep_landmark[:, 2] * bw, 463 | # align_topy + keep_landmark[:, 3] * bh, 464 | # align_topx + keep_landmark[:, 4] * bw, 465 | # align_topy + keep_landmark[:, 5] * bh, 466 | # align_topx + keep_landmark[:, 6] * bw, 467 | # align_topy + keep_landmark[:, 7] * bh, 468 | # align_topx + keep_landmark[:, 8] * bw, 469 | # align_topy + keep_landmark[:, 9] * bh, 470 | ]) 471 | 472 | boxes = boxes.T 473 | boxes_align = boxes_align.T 474 | 475 | return boxes, boxes_align 476 | 477 | def detect_onet(self, im, dets): 478 | """Get face candidates using onet 479 | 480 | Parameters: 481 | ---------- 482 | im: numpy array 483 | input image array 484 | dets: numpy array 485 | detection results of rnet 486 | 487 | Returns: 488 | ------- 489 | boxes_align: numpy array 490 | boxes after calibration 491 | landmarks_align: numpy array 492 | landmarks after calibration 493 | 494 | """ 495 | h, w, c = im.shape 496 | 497 | if dets is None: 498 | return None, None 499 | 500 | dets = self.square_bbox(dets) 501 | dets[:, 0:4] = np.round(dets[:, 0:4]) 502 | 503 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h) 504 | num_boxes = dets.shape[0] 505 | 506 | 507 | # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) 508 | cropped_ims_tensors = [] 509 | for i in range(num_boxes): 510 | tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) 511 | tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] 512 | crop_im = cv2.resize(tmp, (48, 48)) 513 | crop_im_tensor = image_tools.convert_image_to_tensor(crop_im) 514 | # cropped_ims_tensors[i, :, :, :] = crop_im_tensor 515 | cropped_ims_tensors.append(crop_im_tensor) 516 | feed_imgs = Variable(torch.stack(cropped_ims_tensors)) 517 | 518 | if self.rnet_detector.use_cuda: 519 | feed_imgs = feed_imgs.cuda() 520 | 521 | cls_map, reg, landmark = self.onet_detector(feed_imgs) 522 | 523 | cls_map = cls_map.cpu().data.numpy() 524 | reg = reg.cpu().data.numpy() 525 | landmark = landmark.cpu().data.numpy() 526 | 527 | keep_inds = np.where(cls_map > self.thresh[2])[0] 528 | 529 | if len(keep_inds) > 0: 530 | boxes = dets[keep_inds] 531 | cls = cls_map[keep_inds] 532 | reg = reg[keep_inds] 533 | landmark = landmark[keep_inds] 534 | else: 535 | return None, None 536 | 537 | keep = utils.nms(boxes, 0.7, mode="Minimum") 538 | 539 | if len(keep) == 0: 540 | return None, None 541 | 542 | keep_cls = cls[keep] 543 | keep_boxes = boxes[keep] 544 | keep_reg = reg[keep] 545 | keep_landmark = landmark[keep] 546 | 547 | bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1 548 | bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1 549 | 550 | 551 | align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw 552 | align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh 553 | align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw 554 | align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh 555 | 556 | align_landmark_topx = keep_boxes[:, 0] 557 | align_landmark_topy = keep_boxes[:, 1] 558 | 559 | 560 | 561 | 562 | boxes_align = np.vstack([align_topx, 563 | align_topy, 564 | align_bottomx, 565 | align_bottomy, 566 | keep_cls[:, 0], 567 | # align_topx + keep_landmark[:, 0] * bw, 568 | # align_topy + keep_landmark[:, 1] * bh, 569 | # align_topx + keep_landmark[:, 2] * bw, 570 | # align_topy + keep_landmark[:, 3] * bh, 571 | # align_topx + keep_landmark[:, 4] * bw, 572 | # align_topy + keep_landmark[:, 5] * bh, 573 | # align_topx + keep_landmark[:, 6] * bw, 574 | # align_topy + keep_landmark[:, 7] * bh, 575 | # align_topx + keep_landmark[:, 8] * bw, 576 | # align_topy + keep_landmark[:, 9] * bh, 577 | ]) 578 | 579 | boxes_align = boxes_align.T 580 | 581 | landmark = np.vstack([ 582 | align_landmark_topx + keep_landmark[:, 0] * bw, 583 | align_landmark_topy + keep_landmark[:, 1] * bh, 584 | align_landmark_topx + keep_landmark[:, 2] * bw, 585 | align_landmark_topy + keep_landmark[:, 3] * bh, 586 | align_landmark_topx + keep_landmark[:, 4] * bw, 587 | align_landmark_topy + keep_landmark[:, 5] * bh, 588 | align_landmark_topx + keep_landmark[:, 6] * bw, 589 | align_landmark_topy + keep_landmark[:, 7] * bh, 590 | align_landmark_topx + keep_landmark[:, 8] * bw, 591 | align_landmark_topy + keep_landmark[:, 9] * bh, 592 | ]) 593 | 594 | landmark_align = landmark.T 595 | 596 | return boxes_align, landmark_align 597 | 598 | 599 | def detect_face(self,img): 600 | """Detect face over image 601 | """ 602 | boxes_align = np.array([]) 603 | landmark_align =np.array([]) 604 | 605 | t = time.time() 606 | 607 | # pnet 608 | if self.pnet_detector: 609 | boxes, boxes_align = self.detect_pnet(img) 610 | if boxes_align is None: 611 | return np.array([]), np.array([]) 612 | 613 | t1 = time.time() - t 614 | t = time.time() 615 | 616 | # rnet 617 | if self.rnet_detector: 618 | boxes, boxes_align = self.detect_rnet(img, boxes_align) 619 | if boxes_align is None: 620 | return np.array([]), np.array([]) 621 | 622 | t2 = time.time() - t 623 | t = time.time() 624 | 625 | # onet 626 | if self.onet_detector: 627 | boxes_align, landmark_align = self.detect_onet(img, boxes_align) 628 | if boxes_align is None: 629 | return np.array([]), np.array([]) 630 | 631 | t3 = time.time() - t 632 | t = time.time() 633 | print("time cost " + '{:.3f}'.format(t1+t2+t3) + ' pnet {:.3f} rnet {:.3f} onet {:.3f}'.format(t1, t2, t3)) 634 | 635 | return boxes_align, landmark_align 636 | 637 | 638 | 639 | 640 | -------------------------------------------------------------------------------- /dface/core/image_reader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | 6 | class TrainImageReader: 7 | def __init__(self, imdb, im_size, batch_size=128, shuffle=False): 8 | 9 | self.imdb = imdb 10 | self.batch_size = batch_size 11 | self.im_size = im_size 12 | self.shuffle = shuffle 13 | 14 | self.cur = 0 15 | self.size = len(imdb) 16 | self.index = np.arange(self.size) 17 | self.num_classes = 2 18 | 19 | self.batch = None 20 | self.data = None 21 | self.label = None 22 | 23 | self.label_names= ['label', 'bbox_target', 'landmark_target'] 24 | self.reset() 25 | self.get_batch() 26 | 27 | def reset(self): 28 | self.cur = 0 29 | if self.shuffle: 30 | np.random.shuffle(self.index) 31 | 32 | def iter_next(self): 33 | return self.cur + self.batch_size <= self.size 34 | 35 | def __iter__(self): 36 | return self 37 | 38 | def __next__(self): 39 | return self.next() 40 | 41 | def next(self): 42 | if self.iter_next(): 43 | self.get_batch() 44 | self.cur += self.batch_size 45 | return self.data,self.label 46 | else: 47 | raise StopIteration 48 | 49 | def getindex(self): 50 | return self.cur / self.batch_size 51 | 52 | def getpad(self): 53 | if self.cur + self.batch_size > self.size: 54 | return self.cur + self.batch_size - self.size 55 | else: 56 | return 0 57 | 58 | def get_batch(self): 59 | cur_from = self.cur 60 | cur_to = min(cur_from + self.batch_size, self.size) 61 | imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)] 62 | data, label = get_minibatch(imdb) 63 | self.data = data['data'] 64 | self.label = [label[name] for name in self.label_names] 65 | 66 | 67 | 68 | class TestImageLoader: 69 | def __init__(self, imdb, batch_size=1, shuffle=False): 70 | self.imdb = imdb 71 | self.batch_size = batch_size 72 | self.shuffle = shuffle 73 | self.size = len(imdb) 74 | self.index = np.arange(self.size) 75 | 76 | self.cur = 0 77 | self.data = None 78 | self.label = None 79 | 80 | self.reset() 81 | self.get_batch() 82 | 83 | def reset(self): 84 | self.cur = 0 85 | if self.shuffle: 86 | np.random.shuffle(self.index) 87 | 88 | def iter_next(self): 89 | return self.cur + self.batch_size <= self.size 90 | 91 | def __iter__(self): 92 | return self 93 | 94 | def __next__(self): 95 | return self.next() 96 | 97 | def next(self): 98 | if self.iter_next(): 99 | self.get_batch() 100 | self.cur += self.batch_size 101 | return self.data 102 | else: 103 | raise StopIteration 104 | 105 | def getindex(self): 106 | return self.cur / self.batch_size 107 | 108 | def getpad(self): 109 | if self.cur + self.batch_size > self.size: 110 | return self.cur + self.batch_size - self.size 111 | else: 112 | return 0 113 | 114 | def get_batch(self): 115 | cur_from = self.cur 116 | cur_to = min(cur_from + self.batch_size, self.size) 117 | imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)] 118 | data= get_testbatch(imdb) 119 | self.data=data['data'] 120 | 121 | 122 | 123 | 124 | def get_minibatch(imdb): 125 | 126 | # im_size: 12, 24 or 48 127 | num_images = len(imdb) 128 | processed_ims = list() 129 | cls_label = list() 130 | bbox_reg_target = list() 131 | landmark_reg_target = list() 132 | 133 | for i in range(num_images): 134 | im = cv2.imread(imdb[i]['image']) 135 | #im = Image.open(imdb[i]['image']) 136 | 137 | if imdb[i]['flipped']: 138 | im = im[:, ::-1, :] 139 | #im = im.transpose(Image.FLIP_LEFT_RIGHT) 140 | 141 | cls = imdb[i]['label'] 142 | bbox_target = imdb[i]['bbox_target'] 143 | landmark = imdb[i]['landmark_target'] 144 | 145 | processed_ims.append(im) 146 | cls_label.append(cls) 147 | bbox_reg_target.append(bbox_target) 148 | landmark_reg_target.append(landmark) 149 | 150 | im_array = np.asarray(processed_ims) 151 | 152 | label_array = np.array(cls_label) 153 | 154 | bbox_target_array = np.vstack(bbox_reg_target) 155 | 156 | landmark_target_array = np.vstack(landmark_reg_target) 157 | 158 | data = {'data': im_array} 159 | label = {'label': label_array, 160 | 'bbox_target': bbox_target_array, 161 | 'landmark_target': landmark_target_array 162 | } 163 | 164 | return data, label 165 | 166 | 167 | def get_testbatch(imdb): 168 | assert len(imdb) == 1, "Single batch only" 169 | im = cv2.imread(imdb[0]['image']) 170 | data = {'data': im} 171 | return data -------------------------------------------------------------------------------- /dface/core/image_tools.py: -------------------------------------------------------------------------------- 1 | import torchvision.transforms as transforms 2 | import torch 3 | from torch.autograd.variable import Variable 4 | import numpy as np 5 | 6 | transform = transforms.ToTensor() 7 | 8 | def convert_image_to_tensor(image): 9 | """convert an image to pytorch tensor 10 | 11 | Parameters: 12 | ---------- 13 | image: numpy array , h * w * c 14 | 15 | Returns: 16 | ------- 17 | image_tensor: pytorch.FloatTensor, c * h * w 18 | """ 19 | image = image.astype(np.float) 20 | return transform(image) 21 | # return transform(image) 22 | 23 | 24 | def convert_chwTensor_to_hwcNumpy(tensor): 25 | """convert a group images pytorch tensor(count * c * h * w) to numpy array images(count * h * w * c) 26 | Parameters: 27 | ---------- 28 | tensor: numpy array , count * c * h * w 29 | 30 | Returns: 31 | ------- 32 | numpy array images: count * h * w * c 33 | """ 34 | 35 | if isinstance(tensor, Variable): 36 | return np.transpose(tensor.data.numpy(), (0,2,3,1)) 37 | elif isinstance(tensor, torch.FloatTensor): 38 | return np.transpose(tensor.numpy(), (0,2,3,1)) 39 | else: 40 | raise Exception("covert b*c*h*w tensor to b*h*w*c numpy error.This tensor must have 4 dimension.") -------------------------------------------------------------------------------- /dface/core/imagedb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | class ImageDB(object): 5 | def __init__(self, image_annotation_file, prefix_path='', mode='train'): 6 | self.prefix_path = prefix_path 7 | self.image_annotation_file = image_annotation_file 8 | self.classes = ['__background__', 'face'] 9 | self.num_classes = 2 10 | self.image_set_index = self.load_image_set_index() 11 | self.num_images = len(self.image_set_index) 12 | self.mode = mode 13 | 14 | 15 | def load_image_set_index(self): 16 | """Get image index 17 | 18 | Parameters: 19 | ---------- 20 | Returns: 21 | ------- 22 | image_set_index: str 23 | relative path of image 24 | """ 25 | assert os.path.exists(self.image_annotation_file), 'Path does not exist: {}'.format(self.image_annotation_file) 26 | with open(self.image_annotation_file, 'r') as f: 27 | image_set_index = [x.strip().split(' ')[0] for x in f.readlines()] 28 | return image_set_index 29 | 30 | 31 | def load_imdb(self): 32 | """Get and save ground truth image database 33 | 34 | Parameters: 35 | ---------- 36 | Returns: 37 | ------- 38 | gt_imdb: dict 39 | image database with annotations 40 | """ 41 | #cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') 42 | #if os.path.exists(cache_file): 43 | # with open(cache_file, 'rb') as f: 44 | # imdb = cPickle.load(f) 45 | # print '{} gt imdb loaded from {}'.format(self.name, cache_file) 46 | # return imdb 47 | gt_imdb = self.load_annotations() 48 | #with open(cache_file, 'wb') as f: 49 | # cPickle.dump(gt_imdb, f, cPickle.HIGHEST_PROTOCOL) 50 | return gt_imdb 51 | 52 | 53 | def real_image_path(self, index): 54 | """Given image index, return full path 55 | 56 | Parameters: 57 | ---------- 58 | index: str 59 | relative path of image 60 | Returns: 61 | ------- 62 | image_file: str 63 | full path of image 64 | """ 65 | 66 | index = index.replace("\\", "/") 67 | 68 | if not os.path.exists(index): 69 | image_file = os.path.join(self.prefix_path, index) 70 | else: 71 | image_file=index 72 | if not image_file.endswith('.jpg'): 73 | image_file = image_file + '.jpg' 74 | assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file) 75 | return image_file 76 | 77 | 78 | def load_annotations(self,annotion_type=1): 79 | """Load annotations 80 | 81 | Parameters: 82 | ---------- 83 | annotion_type: int 84 | 0:dsadsa 85 | 1:dsadsa 86 | Returns: 87 | ------- 88 | imdb: dict 89 | image database with annotations 90 | """ 91 | 92 | assert os.path.exists(self.image_annotation_file), 'annotations not found at {}'.format(self.image_annotation_file) 93 | with open(self.image_annotation_file, 'r') as f: 94 | annotations = f.readlines() 95 | 96 | 97 | imdb = [] 98 | for i in range(self.num_images): 99 | annotation = annotations[i].strip().split(' ') 100 | index = annotation[0] 101 | im_path = self.real_image_path(index) 102 | imdb_ = dict() 103 | imdb_['image'] = im_path 104 | 105 | if self.mode == 'test': 106 | # gt_boxes = map(float, annotation[1:]) 107 | # boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) 108 | # imdb_['gt_boxes'] = boxes 109 | pass 110 | else: 111 | label = annotation[1] 112 | imdb_['label'] = int(label) 113 | imdb_['flipped'] = False 114 | imdb_['bbox_target'] = np.zeros((4,)) 115 | imdb_['landmark_target'] = np.zeros((10,)) 116 | if len(annotation[2:])==4: 117 | bbox_target = annotation[2:6] 118 | imdb_['bbox_target'] = np.array(bbox_target).astype(float) 119 | if len(annotation[2:])==14: 120 | bbox_target = annotation[2:6] 121 | imdb_['bbox_target'] = np.array(bbox_target).astype(float) 122 | landmark = annotation[6:] 123 | imdb_['landmark_target'] = np.array(landmark).astype(float) 124 | imdb.append(imdb_) 125 | return imdb 126 | 127 | 128 | def append_flipped_images(self, imdb): 129 | """append flipped images to imdb 130 | 131 | Parameters: 132 | ---------- 133 | imdb: imdb 134 | image database 135 | Returns: 136 | ------- 137 | imdb: dict 138 | image database with flipped image annotations added 139 | """ 140 | print('append flipped images to imdb', len(imdb)) 141 | for i in range(len(imdb)): 142 | imdb_ = imdb[i] 143 | m_bbox = imdb_['bbox_target'].copy() 144 | m_bbox[0], m_bbox[2] = -m_bbox[2], -m_bbox[0] 145 | 146 | landmark_ = imdb_['landmark_target'].copy() 147 | landmark_ = landmark_.reshape((5, 2)) 148 | landmark_ = np.asarray([(1 - x, y) for (x, y) in landmark_]) 149 | landmark_[[0, 1]] = landmark_[[1, 0]] 150 | landmark_[[3, 4]] = landmark_[[4, 3]] 151 | 152 | item = {'image': imdb_['image'], 153 | 'label': imdb_['label'], 154 | 'bbox_target': m_bbox, 155 | 'landmark_target': landmark_.reshape((10)), 156 | 'flipped': True} 157 | 158 | imdb.append(item) 159 | self.image_set_index *= 2 160 | return imdb 161 | 162 | 163 | -------------------------------------------------------------------------------- /dface/core/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def weights_init(m): 7 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 8 | nn.init.xavier_uniform(m.weight.data) 9 | nn.init.constant(m.bias, 0.1) 10 | 11 | 12 | 13 | class LossFn: 14 | def __init__(self, cls_factor=1, box_factor=1, landmark_factor=1): 15 | # loss function 16 | self.cls_factor = cls_factor 17 | self.box_factor = box_factor 18 | self.land_factor = landmark_factor 19 | self.loss_cls = nn.BCELoss() 20 | self.loss_box = nn.MSELoss() 21 | self.loss_landmark = nn.MSELoss() 22 | 23 | 24 | def cls_loss(self,gt_label,pred_label): 25 | pred_label = torch.squeeze(pred_label) 26 | gt_label = torch.squeeze(gt_label) 27 | # get the mask element which >= 0, only 0 and 1 can effect the detection loss 28 | mask = torch.ge(gt_label,0) 29 | valid_gt_label = torch.masked_select(gt_label,mask) 30 | valid_pred_label = torch.masked_select(pred_label,mask) 31 | return self.loss_cls(valid_pred_label,valid_gt_label)*self.cls_factor 32 | 33 | 34 | def box_loss(self,gt_label,gt_offset,pred_offset): 35 | pred_offset = torch.squeeze(pred_offset) 36 | gt_offset = torch.squeeze(gt_offset) 37 | gt_label = torch.squeeze(gt_label) 38 | 39 | #get the mask element which != 0 40 | unmask = torch.eq(gt_label,0) 41 | mask = torch.eq(unmask,0) 42 | #convert mask to dim index 43 | chose_index = torch.nonzero(mask.data) 44 | chose_index = torch.squeeze(chose_index) 45 | #only valid element can effect the loss 46 | valid_gt_offset = gt_offset[chose_index,:] 47 | valid_pred_offset = pred_offset[chose_index,:] 48 | return self.loss_box(valid_pred_offset,valid_gt_offset)*self.box_factor 49 | 50 | 51 | def landmark_loss(self,gt_label,gt_landmark,pred_landmark): 52 | pred_landmark = torch.squeeze(pred_landmark) 53 | gt_landmark = torch.squeeze(gt_landmark) 54 | gt_label = torch.squeeze(gt_label) 55 | mask = torch.eq(gt_label,-2) 56 | 57 | chose_index = torch.nonzero(mask.data) 58 | chose_index = torch.squeeze(chose_index) 59 | 60 | valid_gt_landmark = gt_landmark[chose_index, :] 61 | valid_pred_landmark = pred_landmark[chose_index, :] 62 | return self.loss_landmark(valid_pred_landmark,valid_gt_landmark)*self.land_factor 63 | 64 | 65 | 66 | 67 | 68 | class PNet(nn.Module): 69 | ''' PNet ''' 70 | 71 | def __init__(self, is_train=False, use_cuda=True): 72 | super(PNet, self).__init__() 73 | self.is_train = is_train 74 | self.use_cuda = use_cuda 75 | 76 | # backend 77 | self.pre_layer = nn.Sequential( 78 | nn.Conv2d(3, 10, kernel_size=3, stride=1), # conv1 79 | nn.PReLU(), # PReLU1 80 | nn.MaxPool2d(kernel_size=2, stride=2), # pool1 81 | nn.Conv2d(10, 16, kernel_size=3, stride=1), # conv2 82 | nn.PReLU(), # PReLU2 83 | nn.Conv2d(16, 32, kernel_size=3, stride=1), # conv3 84 | nn.PReLU() # PReLU3 85 | ) 86 | # detection 87 | self.conv4_1 = nn.Conv2d(32, 1, kernel_size=1, stride=1) 88 | # bounding box regresion 89 | self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1, stride=1) 90 | # landmark localization 91 | self.conv4_3 = nn.Conv2d(32, 10, kernel_size=1, stride=1) 92 | 93 | # weight initiation with xavier 94 | self.apply(weights_init) 95 | 96 | def forward(self, x): 97 | x = self.pre_layer(x) 98 | label = F.sigmoid(self.conv4_1(x)) 99 | offset = self.conv4_2(x) 100 | # landmark = self.conv4_3(x) 101 | 102 | if self.is_train is True: 103 | # label_loss = LossUtil.label_loss(self.gt_label,torch.squeeze(label)) 104 | # bbox_loss = LossUtil.bbox_loss(self.gt_bbox,torch.squeeze(offset)) 105 | return label,offset 106 | #landmark = self.conv4_3(x) 107 | return label, offset 108 | 109 | 110 | 111 | 112 | 113 | class RNet(nn.Module): 114 | ''' RNet ''' 115 | 116 | def __init__(self,is_train=False, use_cuda=True): 117 | super(RNet, self).__init__() 118 | self.is_train = is_train 119 | self.use_cuda = use_cuda 120 | # backend 121 | self.pre_layer = nn.Sequential( 122 | nn.Conv2d(3, 28, kernel_size=3, stride=1), # conv1 123 | nn.PReLU(), # prelu1 124 | nn.MaxPool2d(kernel_size=3, stride=2), # pool1 125 | nn.Conv2d(28, 48, kernel_size=3, stride=1), # conv2 126 | nn.PReLU(), # prelu2 127 | nn.MaxPool2d(kernel_size=3, stride=2), # pool2 128 | nn.Conv2d(48, 64, kernel_size=2, stride=1), # conv3 129 | nn.PReLU() # prelu3 130 | 131 | ) 132 | self.conv4 = nn.Linear(64*2*2, 128) # conv4 133 | self.prelu4 = nn.PReLU() # prelu4 134 | # detection 135 | self.conv5_1 = nn.Linear(128, 1) 136 | # bounding box regression 137 | self.conv5_2 = nn.Linear(128, 4) 138 | # lanbmark localization 139 | self.conv5_3 = nn.Linear(128, 10) 140 | # weight initiation weih xavier 141 | self.apply(weights_init) 142 | 143 | def forward(self, x): 144 | # backend 145 | x = self.pre_layer(x) 146 | x = x.view(x.size(0), -1) 147 | x = self.conv4(x) 148 | x = self.prelu4(x) 149 | # detection 150 | det = torch.sigmoid(self.conv5_1(x)) 151 | box = self.conv5_2(x) 152 | # landmark = self.conv5_3(x) 153 | 154 | if self.is_train is True: 155 | return det, box 156 | #landmard = self.conv5_3(x) 157 | return det, box 158 | 159 | 160 | 161 | 162 | class ONet(nn.Module): 163 | ''' RNet ''' 164 | 165 | def __init__(self,is_train=False, use_cuda=True): 166 | super(ONet, self).__init__() 167 | self.is_train = is_train 168 | self.use_cuda = use_cuda 169 | # backend 170 | self.pre_layer = nn.Sequential( 171 | nn.Conv2d(3, 32, kernel_size=3, stride=1), # conv1 172 | nn.PReLU(), # prelu1 173 | nn.MaxPool2d(kernel_size=3, stride=2), # pool1 174 | nn.Conv2d(32, 64, kernel_size=3, stride=1), # conv2 175 | nn.PReLU(), # prelu2 176 | nn.MaxPool2d(kernel_size=3, stride=2), # pool2 177 | nn.Conv2d(64, 64, kernel_size=3, stride=1), # conv3 178 | nn.PReLU(), # prelu3 179 | nn.MaxPool2d(kernel_size=2,stride=2), # pool3 180 | nn.Conv2d(64,128,kernel_size=2,stride=1), # conv4 181 | nn.PReLU() # prelu4 182 | ) 183 | self.conv5 = nn.Linear(128*2*2, 256) # conv5 184 | self.prelu5 = nn.PReLU() # prelu5 185 | # detection 186 | self.conv6_1 = nn.Linear(256, 1) 187 | # bounding box regression 188 | self.conv6_2 = nn.Linear(256, 4) 189 | # lanbmark localization 190 | self.conv6_3 = nn.Linear(256, 10) 191 | # weight initiation weih xavier 192 | self.apply(weights_init) 193 | 194 | def forward(self, x): 195 | # backend 196 | x = self.pre_layer(x) 197 | x = x.view(x.size(0), -1) 198 | x = self.conv5(x) 199 | x = self.prelu5(x) 200 | # detection 201 | det = torch.sigmoid(self.conv6_1(x)) 202 | box = self.conv6_2(x) 203 | landmark = self.conv6_3(x) 204 | if self.is_train is True: 205 | return det, box, landmark 206 | #landmard = self.conv5_3(x) 207 | return det, box, landmark 208 | 209 | 210 | 211 | 212 | 213 | # Residual Block 214 | class ResidualBlock(nn.Module): 215 | def __init__(self, in_channels, out_channels, stride=1, downsample=None): 216 | super(ResidualBlock, self).__init__() 217 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride) 218 | self.bn1 = nn.BatchNorm2d(out_channels) 219 | self.relu = nn.ReLU(inplace=True) 220 | self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride) 221 | self.bn2 = nn.BatchNorm2d(out_channels) 222 | self.downsample = downsample 223 | 224 | def forward(self, x): 225 | residual = x 226 | out = self.conv1(x) 227 | out = self.bn1(out) 228 | out = self.relu(out) 229 | out = self.conv2(out) 230 | out = self.bn2(out) 231 | if self.downsample: 232 | residual = self.downsample(x) 233 | out += residual 234 | out = self.relu(out) 235 | return out 236 | 237 | 238 | 239 | # ResNet Module 240 | class ResNet(nn.Module): 241 | def __init__(self, block, num_classes=10): 242 | super(ResNet, self).__init__() 243 | self.in_channels = 16 244 | self.conv = nn.Conv2d(3, 16,kernel_size=3) 245 | self.bn = nn.BatchNorm2d(16) 246 | self.relu = nn.ReLU(inplace=True) 247 | self.layer1 = self.make_layer(block, 16, 3) 248 | self.layer2 = self.make_layer(block, 32, 3, 2) 249 | self.layer3 = self.make_layer(block, 64, 3, 2) 250 | self.avg_pool = nn.AvgPool2d(8) 251 | self.fc = nn.Linear(64, num_classes) 252 | 253 | def make_layer(self, block, out_channels, blocks, stride=1): 254 | downsample = None 255 | if (stride != 1) or (self.in_channels != out_channels): 256 | downsample = nn.Sequential( 257 | nn.Conv2d(self.in_channels, out_channels, kernel_size=3, stride=stride), 258 | nn.BatchNorm2d(out_channels)) 259 | layers = [] 260 | layers.append(block(self.in_channels, out_channels, stride, downsample)) 261 | self.in_channels = out_channels 262 | for i in range(1, blocks): 263 | layers.append(block(out_channels, out_channels)) 264 | return nn.Sequential(*layers) 265 | 266 | def forward(self, x): 267 | out = self.conv(x) 268 | out = self.bn(out) 269 | out = self.relu(out) 270 | out = self.layer1(out) 271 | out = self.layer2(out) 272 | out = self.layer3(out) 273 | out = self.avg_pool(out) 274 | out = out.view(out.size(0), -1) 275 | out = self.fc(out) 276 | return out -------------------------------------------------------------------------------- /dface/core/nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def torch_nms(dets, thresh, mode="Union"): 5 | """ 6 | greedily select boxes with high confidence 7 | keep boxes overlap <= thresh 8 | rule out overlap > thresh 9 | :param dets: [[x1, y1, x2, y2 score]] 10 | :param thresh: retain overlap <= thresh 11 | :return: indexes to keep 12 | """ 13 | x1 = dets[:, 0] 14 | y1 = dets[:, 1] 15 | x2 = dets[:, 2] 16 | y2 = dets[:, 3] 17 | scores = dets[:, 4] 18 | 19 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 20 | order = scores.argsort()[::-1] 21 | 22 | keep = [] 23 | while order.size > 0: 24 | i = order[0] 25 | keep.append(i) 26 | xx1 = np.maximum(x1[i], x1[order[1:]]) 27 | yy1 = np.maximum(y1[i], y1[order[1:]]) 28 | xx2 = np.minimum(x2[i], x2[order[1:]]) 29 | yy2 = np.minimum(y2[i], y2[order[1:]]) 30 | 31 | w = np.maximum(0.0, xx2 - xx1 + 1) 32 | h = np.maximum(0.0, yy2 - yy1 + 1) 33 | inter = w * h 34 | if mode == "Union": 35 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 36 | elif mode == "Minimum": 37 | ovr = inter / np.minimum(areas[i], areas[order[1:]]) 38 | 39 | inds = np.where(ovr <= thresh)[0] 40 | order = order[inds + 1] 41 | 42 | return keep 43 | -------------------------------------------------------------------------------- /dface/core/resnet_inception_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.utils.model_zoo as model_zoo 4 | import os 5 | import sys 6 | 7 | 8 | class BasicConv2d(nn.Module): 9 | def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0): 10 | super(BasicConv2d, self).__init__() 11 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, 12 | bias=False) # verify bias false 13 | self.bn = nn.BatchNorm2d(out_planes, eps=0.001, momentum=0, affine=True) 14 | self.relu = nn.ReLU(inplace=False) 15 | 16 | def forward(self, x): 17 | x = self.conv(x) 18 | x = self.bn(x) 19 | x = self.relu(x) 20 | return x 21 | 22 | 23 | class Mixed_5b(nn.Module): 24 | def __init__(self): 25 | super(Mixed_5b, self).__init__() 26 | 27 | self.branch0 = BasicConv2d(192, 96, kernel_size=1, stride=1) 28 | 29 | self.branch1 = nn.Sequential( 30 | BasicConv2d(192, 48, kernel_size=1, stride=1), 31 | BasicConv2d(48, 64, kernel_size=5, stride=1, padding=2) 32 | ) 33 | 34 | self.branch2 = nn.Sequential( 35 | BasicConv2d(192, 64, kernel_size=1, stride=1), 36 | BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1), 37 | BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1) 38 | ) 39 | 40 | self.branch3 = nn.Sequential( 41 | nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), 42 | BasicConv2d(192, 64, kernel_size=1, stride=1) 43 | ) 44 | 45 | def forward(self, x): 46 | x0 = self.branch0(x) 47 | x1 = self.branch1(x) 48 | x2 = self.branch2(x) 49 | x3 = self.branch3(x) 50 | out = torch.cat((x0, x1, x2, x3), 1) 51 | return out 52 | 53 | 54 | class Block35(nn.Module): 55 | def __init__(self, scale=1.0): 56 | super(Block35, self).__init__() 57 | 58 | self.scale = scale 59 | 60 | self.branch0 = BasicConv2d(320, 32, kernel_size=1, stride=1) 61 | 62 | self.branch1 = nn.Sequential( 63 | BasicConv2d(320, 32, kernel_size=1, stride=1), 64 | BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1) 65 | ) 66 | 67 | self.branch2 = nn.Sequential( 68 | BasicConv2d(320, 32, kernel_size=1, stride=1), 69 | BasicConv2d(32, 48, kernel_size=3, stride=1, padding=1), 70 | BasicConv2d(48, 64, kernel_size=3, stride=1, padding=1) 71 | ) 72 | 73 | self.conv2d = nn.Conv2d(128, 320, kernel_size=1, stride=1) 74 | self.relu = nn.ReLU(inplace=False) 75 | 76 | def forward(self, x): 77 | x0 = self.branch0(x) 78 | x1 = self.branch1(x) 79 | x2 = self.branch2(x) 80 | out = torch.cat((x0, x1, x2), 1) 81 | out = self.conv2d(out) 82 | out = out * self.scale + x 83 | out = self.relu(out) 84 | return out 85 | 86 | 87 | class Mixed_6a(nn.Module): 88 | def __init__(self): 89 | super(Mixed_6a, self).__init__() 90 | 91 | self.branch0 = BasicConv2d(320, 384, kernel_size=3, stride=2) 92 | 93 | self.branch1 = nn.Sequential( 94 | BasicConv2d(320, 256, kernel_size=1, stride=1), 95 | BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1), 96 | BasicConv2d(256, 384, kernel_size=3, stride=2) 97 | ) 98 | 99 | self.branch2 = nn.MaxPool2d(3, stride=2) 100 | 101 | def forward(self, x): 102 | x0 = self.branch0(x) 103 | x1 = self.branch1(x) 104 | x2 = self.branch2(x) 105 | out = torch.cat((x0, x1, x2), 1) 106 | return out 107 | 108 | 109 | class Block17(nn.Module): 110 | def __init__(self, scale=1.0): 111 | super(Block17, self).__init__() 112 | 113 | self.scale = scale 114 | 115 | self.branch0 = BasicConv2d(1088, 192, kernel_size=1, stride=1) 116 | 117 | self.branch1 = nn.Sequential( 118 | BasicConv2d(1088, 128, kernel_size=1, stride=1), 119 | BasicConv2d(128, 160, kernel_size=(1, 7), stride=1, padding=(0, 3)), 120 | BasicConv2d(160, 192, kernel_size=(7, 1), stride=1, padding=(3, 0)) 121 | ) 122 | 123 | self.conv2d = nn.Conv2d(384, 1088, kernel_size=1, stride=1) 124 | self.relu = nn.ReLU(inplace=False) 125 | 126 | def forward(self, x): 127 | x0 = self.branch0(x) 128 | x1 = self.branch1(x) 129 | out = torch.cat((x0, x1), 1) 130 | out = self.conv2d(out) 131 | out = out * self.scale + x 132 | out = self.relu(out) 133 | return out 134 | 135 | 136 | class Mixed_7a(nn.Module): 137 | def __init__(self): 138 | super(Mixed_7a, self).__init__() 139 | 140 | self.branch0 = nn.Sequential( 141 | BasicConv2d(1088, 256, kernel_size=1, stride=1), 142 | BasicConv2d(256, 384, kernel_size=3, stride=2) 143 | ) 144 | 145 | self.branch1 = nn.Sequential( 146 | BasicConv2d(1088, 256, kernel_size=1, stride=1), 147 | BasicConv2d(256, 288, kernel_size=3, stride=2) 148 | ) 149 | 150 | self.branch2 = nn.Sequential( 151 | BasicConv2d(1088, 256, kernel_size=1, stride=1), 152 | BasicConv2d(256, 288, kernel_size=3, stride=1, padding=1), 153 | BasicConv2d(288, 320, kernel_size=3, stride=2) 154 | ) 155 | 156 | self.branch3 = nn.MaxPool2d(3, stride=2) 157 | 158 | def forward(self, x): 159 | x0 = self.branch0(x) 160 | x1 = self.branch1(x) 161 | x2 = self.branch2(x) 162 | x3 = self.branch3(x) 163 | out = torch.cat((x0, x1, x2, x3), 1) 164 | return out 165 | 166 | 167 | class Block8(nn.Module): 168 | def __init__(self, scale=1.0, noReLU=False): 169 | super(Block8, self).__init__() 170 | 171 | self.scale = scale 172 | self.noReLU = noReLU 173 | 174 | self.branch0 = BasicConv2d(2080, 192, kernel_size=1, stride=1) 175 | 176 | self.branch1 = nn.Sequential( 177 | BasicConv2d(2080, 192, kernel_size=1, stride=1), 178 | BasicConv2d(192, 224, kernel_size=(1, 3), stride=1, padding=(0, 1)), 179 | BasicConv2d(224, 256, kernel_size=(3, 1), stride=1, padding=(1, 0)) 180 | ) 181 | 182 | self.conv2d = nn.Conv2d(448, 2080, kernel_size=1, stride=1) 183 | if not self.noReLU: 184 | self.relu = nn.ReLU(inplace=False) 185 | 186 | def forward(self, x): 187 | x0 = self.branch0(x) 188 | x1 = self.branch1(x) 189 | out = torch.cat((x0, x1), 1) 190 | out = self.conv2d(out) 191 | out = out * self.scale + x 192 | if not self.noReLU: 193 | out = self.relu(out) 194 | return out 195 | 196 | 197 | class InceptionResnetV2(nn.Module): 198 | def __init__(self, num_classes=1001): 199 | super(InceptionResnetV2, self).__init__() 200 | self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2) 201 | self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1) 202 | self.conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1) 203 | self.maxpool_3a = nn.MaxPool2d(3, stride=2) 204 | self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1) 205 | self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1) 206 | self.maxpool_5a = nn.MaxPool2d(3, stride=2) 207 | self.mixed_5b = Mixed_5b() 208 | self.repeat = nn.Sequential( 209 | Block35(scale=0.17), 210 | Block35(scale=0.17), 211 | Block35(scale=0.17), 212 | Block35(scale=0.17), 213 | Block35(scale=0.17), 214 | Block35(scale=0.17), 215 | Block35(scale=0.17), 216 | Block35(scale=0.17), 217 | Block35(scale=0.17), 218 | Block35(scale=0.17) 219 | ) 220 | self.mixed_6a = Mixed_6a() 221 | self.repeat_1 = nn.Sequential( 222 | Block17(scale=0.10), 223 | Block17(scale=0.10), 224 | Block17(scale=0.10), 225 | Block17(scale=0.10), 226 | Block17(scale=0.10), 227 | Block17(scale=0.10), 228 | Block17(scale=0.10), 229 | Block17(scale=0.10), 230 | Block17(scale=0.10), 231 | Block17(scale=0.10), 232 | Block17(scale=0.10), 233 | Block17(scale=0.10), 234 | Block17(scale=0.10), 235 | Block17(scale=0.10), 236 | Block17(scale=0.10), 237 | Block17(scale=0.10), 238 | Block17(scale=0.10), 239 | Block17(scale=0.10), 240 | Block17(scale=0.10), 241 | Block17(scale=0.10) 242 | ) 243 | self.mixed_7a = Mixed_7a() 244 | self.repeat_2 = nn.Sequential( 245 | Block8(scale=0.20), 246 | Block8(scale=0.20), 247 | Block8(scale=0.20), 248 | Block8(scale=0.20), 249 | Block8(scale=0.20), 250 | Block8(scale=0.20), 251 | Block8(scale=0.20), 252 | Block8(scale=0.20), 253 | Block8(scale=0.20) 254 | ) 255 | self.block8 = Block8(noReLU=True) 256 | self.conv2d_7b = BasicConv2d(2080, 1536, kernel_size=1, stride=1) 257 | self.avgpool_1a = nn.AvgPool2d(8, count_include_pad=False) 258 | self.classif = nn.Linear(1536, num_classes) 259 | 260 | def forward(self, x): 261 | x = self.conv2d_1a(x) 262 | x = self.conv2d_2a(x) 263 | x = self.conv2d_2b(x) 264 | x = self.maxpool_3a(x) 265 | x = self.conv2d_3b(x) 266 | x = self.conv2d_4a(x) 267 | x = self.maxpool_5a(x) 268 | x = self.mixed_5b(x) 269 | x = self.repeat(x) 270 | x = self.mixed_6a(x) 271 | x = self.repeat_1(x) 272 | x = self.mixed_7a(x) 273 | x = self.repeat_2(x) 274 | x = self.block8(x) 275 | x = self.conv2d_7b(x) 276 | x = self.avgpool_1a(x) 277 | x = x.view(x.size(0), -1) 278 | x = self.classif(x) 279 | return x -------------------------------------------------------------------------------- /dface/core/roc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | -------------------------------------------------------------------------------- /dface/core/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def IoU(box, boxes): 4 | """Compute IoU between detect box and gt boxes 5 | 6 | Parameters: 7 | ---------- 8 | box: numpy array , shape (5, ): x1, y1, x2, y2, score 9 | input box 10 | boxes: numpy array, shape (n, 4): x1, y1, x2, y2 11 | input ground truth boxes 12 | 13 | Returns: 14 | ------- 15 | ovr: numpy.array, shape (n, ) 16 | IoU 17 | """ 18 | box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1) 19 | area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) 20 | xx1 = np.maximum(box[0], boxes[:, 0]) 21 | yy1 = np.maximum(box[1], boxes[:, 1]) 22 | xx2 = np.minimum(box[2], boxes[:, 2]) 23 | yy2 = np.minimum(box[3], boxes[:, 3]) 24 | 25 | # compute the width and height of the bounding box 26 | w = np.maximum(0, xx2 - xx1 + 1) 27 | h = np.maximum(0, yy2 - yy1 + 1) 28 | 29 | inter = w * h 30 | ovr = np.true_divide(inter,(box_area + area - inter)) 31 | #ovr = inter / (box_area + area - inter) 32 | return ovr 33 | 34 | 35 | def convert_to_square(bbox): 36 | """Convert bbox to square 37 | 38 | Parameters: 39 | ---------- 40 | bbox: numpy array , shape n x 5 41 | input bbox 42 | 43 | Returns: 44 | ------- 45 | square bbox 46 | """ 47 | square_bbox = bbox.copy() 48 | 49 | h = bbox[:, 3] - bbox[:, 1] + 1 50 | w = bbox[:, 2] - bbox[:, 0] + 1 51 | max_side = np.maximum(h,w) 52 | square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5 53 | square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5 54 | square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1 55 | square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1 56 | return square_bbox 57 | 58 | 59 | def nms(dets, thresh, mode="Union"): 60 | """ 61 | greedily select boxes with high confidence 62 | keep boxes overlap <= thresh 63 | rule out overlap > thresh 64 | :param dets: [[x1, y1, x2, y2 score]] 65 | :param thresh: retain overlap <= thresh 66 | :return: indexes to keep 67 | """ 68 | x1 = dets[:, 0] 69 | y1 = dets[:, 1] 70 | x2 = dets[:, 2] 71 | y2 = dets[:, 3] 72 | scores = dets[:, 4] 73 | 74 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 75 | order = scores.argsort()[::-1] 76 | 77 | keep = [] 78 | while order.size > 0: 79 | i = order[0] 80 | keep.append(i) 81 | xx1 = np.maximum(x1[i], x1[order[1:]]) 82 | yy1 = np.maximum(y1[i], y1[order[1:]]) 83 | xx2 = np.minimum(x2[i], x2[order[1:]]) 84 | yy2 = np.minimum(y2[i], y2[order[1:]]) 85 | 86 | w = np.maximum(0.0, xx2 - xx1 + 1) 87 | h = np.maximum(0.0, yy2 - yy1 + 1) 88 | inter = w * h 89 | if mode == "Union": 90 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 91 | elif mode == "Minimum": 92 | ovr = inter / np.minimum(areas[i], areas[order[1:]]) 93 | 94 | inds = np.where(ovr <= thresh)[0] 95 | order = order[inds + 1] 96 | 97 | return keep 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /dface/core/vision.py: -------------------------------------------------------------------------------- 1 | from matplotlib.patches import Circle 2 | 3 | 4 | def vis_two(im_array, dets1, dets2, thresh=0.9): 5 | """Visualize detection results before and after calibration 6 | 7 | Parameters: 8 | ---------- 9 | im_array: numpy.ndarray, shape(1, c, h, w) 10 | test image in rgb 11 | dets1: numpy.ndarray([[x1 y1 x2 y2 score]]) 12 | detection results before calibration 13 | dets2: numpy.ndarray([[x1 y1 x2 y2 score]]) 14 | detection results after calibration 15 | thresh: float 16 | boxes with scores > thresh will be drawn in red otherwise yellow 17 | 18 | Returns: 19 | ------- 20 | """ 21 | import matplotlib.pyplot as plt 22 | import random 23 | 24 | figure = plt.figure() 25 | plt.subplot(121) 26 | plt.imshow(im_array) 27 | color = 'yellow' 28 | 29 | for i in range(dets1.shape[0]): 30 | bbox = dets1[i, :4] 31 | landmarks = dets1[i, 5:] 32 | score = dets1[i, 4] 33 | if score > thresh: 34 | rect = plt.Rectangle((bbox[0], bbox[1]), 35 | bbox[2] - bbox[0], 36 | bbox[3] - bbox[1], fill=False, 37 | edgecolor='red', linewidth=0.7) 38 | plt.gca().add_patch(rect) 39 | landmarks = landmarks.reshape((5,2)) 40 | for j in range(5): 41 | plt.scatter(landmarks[j,0],landmarks[j,1],c='yellow',linewidths=0.1, marker='x', s=5) 42 | 43 | 44 | # plt.gca().text(bbox[0], bbox[1] - 2, 45 | # '{:.3f}'.format(score), 46 | # bbox=dict(facecolor='blue', alpha=0.5), fontsize=12, color='white') 47 | # else: 48 | # rect = plt.Rectangle((bbox[0], bbox[1]), 49 | # bbox[2] - bbox[0], 50 | # bbox[3] - bbox[1], fill=False, 51 | # edgecolor=color, linewidth=0.5) 52 | # plt.gca().add_patch(rect) 53 | 54 | plt.subplot(122) 55 | plt.imshow(im_array) 56 | color = 'yellow' 57 | 58 | for i in range(dets2.shape[0]): 59 | bbox = dets2[i, :4] 60 | landmarks = dets1[i, 5:] 61 | score = dets2[i, 4] 62 | if score > thresh: 63 | rect = plt.Rectangle((bbox[0], bbox[1]), 64 | bbox[2] - bbox[0], 65 | bbox[3] - bbox[1], fill=False, 66 | edgecolor='red', linewidth=0.7) 67 | plt.gca().add_patch(rect) 68 | 69 | landmarks = landmarks.reshape((5, 2)) 70 | for j in range(5): 71 | plt.scatter(landmarks[j, 0], landmarks[j, 1], c='yellow',linewidths=0.1, marker='x', s=5) 72 | 73 | # plt.gca().text(bbox[0], bbox[1] - 2, 74 | # '{:.3f}'.format(score), 75 | # bbox=dict(facecolor='blue', alpha=0.5), fontsize=12, color='white') 76 | # else: 77 | # rect = plt.Rectangle((bbox[0], bbox[1]), 78 | # bbox[2] - bbox[0], 79 | # bbox[3] - bbox[1], fill=False, 80 | # edgecolor=color, linewidth=0.5) 81 | # plt.gca().add_patch(rect) 82 | plt.show() 83 | 84 | 85 | def vis_face(im_array, dets, landmarks=None): 86 | """Visualize detection results before and after calibration 87 | 88 | Parameters: 89 | ---------- 90 | im_array: numpy.ndarray, shape(1, c, h, w) 91 | test image in rgb 92 | dets1: numpy.ndarray([[x1 y1 x2 y2 score]]) 93 | detection results before calibration 94 | dets2: numpy.ndarray([[x1 y1 x2 y2 score]]) 95 | detection results after calibration 96 | thresh: float 97 | boxes with scores > thresh will be drawn in red otherwise yellow 98 | 99 | Returns: 100 | ------- 101 | """ 102 | import matplotlib.pyplot as plt 103 | import random 104 | import pylab 105 | 106 | figure = pylab.figure() 107 | # plt.subplot(121) 108 | pylab.imshow(im_array) 109 | figure.suptitle('DFace Detector', fontsize=20) 110 | 111 | 112 | 113 | for i in range(dets.shape[0]): 114 | bbox = dets[i, :4] 115 | 116 | rect = pylab.Rectangle((bbox[0], bbox[1]), 117 | bbox[2] - bbox[0], 118 | bbox[3] - bbox[1], fill=False, 119 | edgecolor='yellow', linewidth=0.9) 120 | pylab.gca().add_patch(rect) 121 | 122 | if landmarks is not None: 123 | for i in range(landmarks.shape[0]): 124 | landmarks_one = landmarks[i, :] 125 | landmarks_one = landmarks_one.reshape((5, 2)) 126 | for j in range(5): 127 | # pylab.scatter(landmarks_one[j, 0], landmarks_one[j, 1], c='yellow', linewidths=0.1, marker='x', s=5) 128 | 129 | cir1 = Circle(xy=(landmarks_one[j, 0], landmarks_one[j, 1]), radius=2, alpha=0.4, color="red") 130 | pylab.gca().add_patch(cir1) 131 | # plt.gca().text(bbox[0], bbox[1] - 2, 132 | # '{:.3f}'.format(score), 133 | # bbox=dict(facecolor='blue', alpha=0.5), fontsize=12, color='white') 134 | # else: 135 | # rect = plt.Rectangle((bbox[0], bbox[1]), 136 | # bbox[2] - bbox[0], 137 | # bbox[3] - bbox[1], fill=False, 138 | # edgecolor=color, linewidth=0.5) 139 | # plt.gca().add_patch(rect) 140 | 141 | pylab.show() -------------------------------------------------------------------------------- /dface/prepare_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/prepare_data/__init__.py -------------------------------------------------------------------------------- /dface/prepare_data/assemble.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import numpy.random as npr 4 | import numpy as np 5 | 6 | def assemble_data(output_file, anno_file_list=[]): 7 | #assemble the annotations to one file 8 | size = 12 9 | 10 | if len(anno_file_list)==0: 11 | return 0 12 | 13 | if os.path.exists(output_file): 14 | os.remove(output_file) 15 | 16 | for anno_file in anno_file_list: 17 | with open(anno_file, 'r') as f: 18 | anno_lines = f.readlines() 19 | 20 | base_num = 250000 21 | 22 | if len(anno_lines) > base_num * 3: 23 | idx_keep = npr.choice(len(anno_lines), size=base_num * 3, replace=True) 24 | elif len(anno_lines) > 100000: 25 | idx_keep = npr.choice(len(anno_lines), size=len(anno_lines), replace=True) 26 | else: 27 | idx_keep = np.arange(len(anno_lines)) 28 | np.random.shuffle(idx_keep) 29 | chose_count = 0 30 | with open(output_file, 'a+') as f: 31 | for idx in idx_keep: 32 | f.write(anno_lines[idx]) 33 | chose_count+=1 34 | 35 | return chose_count -------------------------------------------------------------------------------- /dface/prepare_data/assemble_onet_imglist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import dface.config as config 3 | import dface.prepare_data.assemble as assemble 4 | 5 | 6 | if __name__ == '__main__': 7 | 8 | anno_list = [] 9 | 10 | net_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_LANDMARK_ANNO_FILENAME) 11 | net_postive_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_POSTIVE_ANNO_FILENAME) 12 | net_part_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_PART_ANNO_FILENAME) 13 | net_neg_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_NEGATIVE_ANNO_FILENAME) 14 | 15 | anno_list.append(net_postive_file) 16 | anno_list.append(net_part_file) 17 | anno_list.append(net_neg_file) 18 | anno_list.append(net_landmark_file) 19 | 20 | imglist_filename = config.ONET_TRAIN_IMGLIST_FILENAME 21 | anno_dir = config.ANNO_STORE_DIR 22 | imglist_file = os.path.join(anno_dir, imglist_filename) 23 | 24 | chose_count = assemble.assemble_data(imglist_file ,anno_list) 25 | print("PNet train annotation result file path:%s" % imglist_file) 26 | -------------------------------------------------------------------------------- /dface/prepare_data/assemble_pnet_imglist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import dface.config as config 3 | import dface.prepare_data.assemble as assemble 4 | 5 | 6 | if __name__ == '__main__': 7 | 8 | anno_list = [] 9 | 10 | # pnet_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_LANDMARK_ANNO_FILENAME) 11 | pnet_postive_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_POSTIVE_ANNO_FILENAME) 12 | pnet_part_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_PART_ANNO_FILENAME) 13 | pnet_neg_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_NEGATIVE_ANNO_FILENAME) 14 | 15 | anno_list.append(pnet_postive_file) 16 | anno_list.append(pnet_part_file) 17 | anno_list.append(pnet_neg_file) 18 | # anno_list.append(pnet_landmark_file) 19 | 20 | imglist_filename = config.PNET_TRAIN_IMGLIST_FILENAME 21 | anno_dir = config.ANNO_STORE_DIR 22 | imglist_file = os.path.join(anno_dir, imglist_filename) 23 | 24 | chose_count = assemble.assemble_data(imglist_file ,anno_list) 25 | print("PNet train annotation result file path:%s" % imglist_file) 26 | -------------------------------------------------------------------------------- /dface/prepare_data/assemble_rnet_imglist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import dface.config as config 3 | import dface.prepare_data.assemble as assemble 4 | 5 | 6 | if __name__ == '__main__': 7 | 8 | anno_list = [] 9 | 10 | # pnet_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_LANDMARK_ANNO_FILENAME) 11 | pnet_postive_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_POSTIVE_ANNO_FILENAME) 12 | pnet_part_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_PART_ANNO_FILENAME) 13 | pnet_neg_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_NEGATIVE_ANNO_FILENAME) 14 | 15 | anno_list.append(pnet_postive_file) 16 | anno_list.append(pnet_part_file) 17 | anno_list.append(pnet_neg_file) 18 | # anno_list.append(pnet_landmark_file) 19 | 20 | imglist_filename = config.RNET_TRAIN_IMGLIST_FILENAME 21 | anno_dir = config.ANNO_STORE_DIR 22 | imglist_file = os.path.join(anno_dir, imglist_filename) 23 | 24 | chose_count = assemble.assemble_data(imglist_file ,anno_list) 25 | print("PNet train annotation result file path:%s" % imglist_file) 26 | -------------------------------------------------------------------------------- /dface/prepare_data/gen_Onet_train_data.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import cv2 4 | import numpy as np 5 | from dface.core.detect import MtcnnDetector,create_mtcnn_net 6 | from dface.core.imagedb import ImageDB 7 | from dface.core.image_reader import TestImageLoader 8 | import time 9 | import os 10 | import cPickle 11 | from dface.core.utils import convert_to_square,IoU 12 | import dface.config as config 13 | import dface.core.vision as vision 14 | 15 | def gen_onet_data(data_dir, anno_file, pnet_model_file, rnet_model_file, prefix_path='', use_cuda=True, vis=False): 16 | 17 | 18 | pnet, rnet, _ = create_mtcnn_net(p_model_path=pnet_model_file, r_model_path=rnet_model_file, use_cuda=use_cuda) 19 | mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, min_face_size=12) 20 | 21 | imagedb = ImageDB(anno_file,mode="test",prefix_path=prefix_path) 22 | imdb = imagedb.load_imdb() 23 | image_reader = TestImageLoader(imdb,1,False) 24 | 25 | all_boxes = list() 26 | batch_idx = 0 27 | 28 | for databatch in image_reader: 29 | if batch_idx % 100 == 0: 30 | print("%d images done" % batch_idx) 31 | im = databatch 32 | 33 | t = time.time() 34 | 35 | p_boxes, p_boxes_align = mtcnn_detector.detect_pnet(im=im) 36 | 37 | boxes, boxes_align = mtcnn_detector.detect_rnet(im=im, dets=p_boxes_align) 38 | 39 | if boxes_align is None: 40 | all_boxes.append(np.array([])) 41 | batch_idx += 1 42 | continue 43 | if vis: 44 | rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB) 45 | vision.vis_two(rgb_im, boxes, boxes_align) 46 | 47 | t1 = time.time() - t 48 | t = time.time() 49 | all_boxes.append(boxes_align) 50 | batch_idx += 1 51 | 52 | save_path = config.MODEL_STORE_DIR 53 | 54 | if not os.path.exists(save_path): 55 | os.mkdir(save_path) 56 | 57 | save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time())) 58 | with open(save_file, 'wb') as f: 59 | cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) 60 | 61 | 62 | gen_onet_sample_data(data_dir,anno_file,save_file,prefix_path) 63 | 64 | 65 | 66 | 67 | 68 | 69 | def gen_onet_sample_data(data_dir,anno_file,det_boxs_file,prefix): 70 | 71 | neg_save_dir = os.path.join(data_dir, "48/negative") 72 | pos_save_dir = os.path.join(data_dir, "48/positive") 73 | part_save_dir = os.path.join(data_dir, "48/part") 74 | 75 | for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]: 76 | if not os.path.exists(dir_path): 77 | os.makedirs(dir_path) 78 | 79 | 80 | # load ground truth from annotation file 81 | # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image 82 | 83 | with open(anno_file, 'r') as f: 84 | annotations = f.readlines() 85 | 86 | image_size = 48 87 | net = "onet" 88 | 89 | im_idx_list = list() 90 | gt_boxes_list = list() 91 | num_of_images = len(annotations) 92 | print("processing %d images in total" % num_of_images) 93 | 94 | for annotation in annotations: 95 | annotation = annotation.strip().split(' ') 96 | im_idx = os.path.join(prefix,annotation[0]) 97 | 98 | boxes = map(float, annotation[1:]) 99 | boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) 100 | im_idx_list.append(im_idx) 101 | gt_boxes_list.append(boxes) 102 | 103 | 104 | save_path = config.ANNO_STORE_DIR 105 | if not os.path.exists(save_path): 106 | os.makedirs(save_path) 107 | 108 | f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w') 109 | f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w') 110 | f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w') 111 | 112 | det_handle = open(det_boxs_file, 'r') 113 | 114 | det_boxes = cPickle.load(det_handle) 115 | print(len(det_boxes), num_of_images) 116 | assert len(det_boxes) == num_of_images, "incorrect detections or ground truths" 117 | 118 | # index of neg, pos and part face, used as their image names 119 | n_idx = 0 120 | p_idx = 0 121 | d_idx = 0 122 | image_done = 0 123 | for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list): 124 | if image_done % 100 == 0: 125 | print("%d images done" % image_done) 126 | image_done += 1 127 | 128 | if dets.shape[0] == 0: 129 | continue 130 | img = cv2.imread(im_idx) 131 | dets = convert_to_square(dets) 132 | dets[:, 0:4] = np.round(dets[:, 0:4]) 133 | 134 | for box in dets: 135 | x_left, y_top, x_right, y_bottom = box[0:4].astype(int) 136 | width = x_right - x_left + 1 137 | height = y_bottom - y_top + 1 138 | 139 | # ignore box that is too small or beyond image border 140 | if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1: 141 | continue 142 | 143 | # compute intersection over union(IoU) between current box and all gt boxes 144 | Iou = IoU(box, gts) 145 | cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] 146 | resized_im = cv2.resize(cropped_im, (image_size, image_size), 147 | interpolation=cv2.INTER_LINEAR) 148 | 149 | # save negative images and write label 150 | if np.max(Iou) < 0.3: 151 | # Iou with all gts must below 0.3 152 | save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) 153 | f2.write(save_file + ' 0\n') 154 | cv2.imwrite(save_file, resized_im) 155 | n_idx += 1 156 | else: 157 | # find gt_box with the highest iou 158 | idx = np.argmax(Iou) 159 | assigned_gt = gts[idx] 160 | x1, y1, x2, y2 = assigned_gt 161 | 162 | # compute bbox reg label 163 | offset_x1 = (x1 - x_left) / float(width) 164 | offset_y1 = (y1 - y_top) / float(height) 165 | offset_x2 = (x2 - x_right) / float(width) 166 | offset_y2 = (y2 - y_bottom) / float(height) 167 | 168 | # save positive and part-face images and write labels 169 | if np.max(Iou) >= 0.65: 170 | save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) 171 | f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % ( 172 | offset_x1, offset_y1, offset_x2, offset_y2)) 173 | cv2.imwrite(save_file, resized_im) 174 | p_idx += 1 175 | 176 | elif np.max(Iou) >= 0.4: 177 | save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) 178 | f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % ( 179 | offset_x1, offset_y1, offset_x2, offset_y2)) 180 | cv2.imwrite(save_file, resized_im) 181 | d_idx += 1 182 | f1.close() 183 | f2.close() 184 | f3.close() 185 | 186 | 187 | 188 | def model_store_path(): 189 | return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))+"/model_store" 190 | 191 | 192 | 193 | def parse_args(): 194 | parser = argparse.ArgumentParser(description='Test mtcnn', 195 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 196 | 197 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark', 198 | default='../data/wider/', type=str) 199 | parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file', 200 | default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str) 201 | parser.add_argument('--pmodel_file', dest='pnet_model_file', help='PNet model file path', 202 | default='/idata/workspace/dface/model_store/pnet_epoch.pt', type=str) 203 | parser.add_argument('--rmodel_file', dest='rnet_model_file', help='RNet model file path', 204 | default='/idata/workspace/dface/model_store/rnet_epoch.pt', type=str) 205 | parser.add_argument('--gpu', dest='use_cuda', help='with gpu', 206 | default=config.USE_CUDA, type=bool) 207 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 208 | default='', type=str) 209 | 210 | args = parser.parse_args() 211 | return args 212 | 213 | 214 | 215 | if __name__ == '__main__': 216 | args = parse_args() 217 | gen_onet_data(args.traindata_store, args.annotation_file, args.pnet_model_file, args.rnet_model_file, args.prefix_path, args.use_cuda) 218 | 219 | 220 | 221 | -------------------------------------------------------------------------------- /dface/prepare_data/gen_Pnet_train_data.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import cv2 4 | import os 5 | import numpy.random as npr 6 | from dface.core.utils import IoU 7 | import dface.config as config 8 | 9 | def gen_pnet_data(data_dir,anno_file,prefix): 10 | 11 | neg_save_dir = os.path.join(data_dir,"12/negative") 12 | pos_save_dir = os.path.join(data_dir,"12/positive") 13 | part_save_dir = os.path.join(data_dir,"12/part") 14 | 15 | for dir_path in [neg_save_dir,pos_save_dir,part_save_dir]: 16 | if not os.path.exists(dir_path): 17 | os.makedirs(dir_path) 18 | 19 | save_dir = os.path.join(data_dir,"pnet") 20 | if not os.path.exists(save_dir): 21 | os.mkdir(save_dir) 22 | 23 | post_save_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_POSTIVE_ANNO_FILENAME) 24 | neg_save_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_NEGATIVE_ANNO_FILENAME) 25 | part_save_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_PART_ANNO_FILENAME) 26 | 27 | f1 = open(post_save_file, 'w') 28 | f2 = open(neg_save_file, 'w') 29 | f3 = open(part_save_file, 'w') 30 | 31 | with open(anno_file, 'r') as f: 32 | annotations = f.readlines() 33 | 34 | num = len(annotations) 35 | print("%d pics in total" % num) 36 | p_idx = 0 37 | n_idx = 0 38 | d_idx = 0 39 | idx = 0 40 | box_idx = 0 41 | for annotation in annotations: 42 | annotation = annotation.strip().split(' ') 43 | im_path = os.path.join(prefix,annotation[0]) 44 | bbox = list(map(float, annotation[1:])) 45 | boxes = np.array(bbox, dtype=np.int32).reshape(-1, 4) 46 | img = cv2.imread(im_path) 47 | idx += 1 48 | if idx % 100 == 0: 49 | print(idx, "images done") 50 | 51 | height, width, channel = img.shape 52 | 53 | neg_num = 0 54 | while neg_num < 50: 55 | size = npr.randint(12, min(width, height) / 2) 56 | nx = npr.randint(0, width - size) 57 | ny = npr.randint(0, height - size) 58 | crop_box = np.array([nx, ny, nx + size, ny + size]) 59 | 60 | Iou = IoU(crop_box, boxes) 61 | 62 | cropped_im = img[ny : ny + size, nx : nx + size, :] 63 | resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) 64 | 65 | if np.max(Iou) < 0.3: 66 | # Iou with all gts must below 0.3 67 | save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx) 68 | f2.write(save_file + ' 0\n') 69 | cv2.imwrite(save_file, resized_im) 70 | n_idx += 1 71 | neg_num += 1 72 | 73 | 74 | for box in boxes: 75 | # box (x_left, y_top, x_right, y_bottom) 76 | x1, y1, x2, y2 = box 77 | w = x2 - x1 + 1 78 | h = y2 - y1 + 1 79 | 80 | # ignore small faces 81 | # in case the ground truth boxes of small faces are not accurate 82 | if max(w, h) < 40 or x1 < 0 or y1 < 0: 83 | continue 84 | 85 | # generate negative examples that have overlap with gt 86 | for i in range(5): 87 | size = npr.randint(12, min(width, height) / 2) 88 | # delta_x and delta_y are offsets of (x1, y1) 89 | delta_x = npr.randint(max(-size, -x1), w) 90 | delta_y = npr.randint(max(-size, -y1), h) 91 | nx1 = max(0, x1 + delta_x) 92 | ny1 = max(0, y1 + delta_y) 93 | 94 | 95 | 96 | if nx1 + size > width or ny1 + size > height: 97 | continue 98 | crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size]) 99 | Iou = IoU(crop_box, boxes) 100 | 101 | cropped_im = img[ny1 : ny1 + size, nx1 : nx1 + size, :] 102 | resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) 103 | 104 | if np.max(Iou) < 0.3: 105 | # Iou with all gts must below 0.3 106 | save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx) 107 | f2.write(save_file + ' 0\n') 108 | cv2.imwrite(save_file, resized_im) 109 | n_idx += 1 110 | 111 | # generate positive examples and part faces 112 | for i in range(20): 113 | size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) 114 | 115 | # delta here is the offset of box center 116 | delta_x = npr.randint(-w * 0.2, w * 0.2) 117 | delta_y = npr.randint(-h * 0.2, h * 0.2) 118 | 119 | nx1 = max(x1 + w / 2 + delta_x - size / 2, 0) 120 | ny1 = max(y1 + h / 2 + delta_y - size / 2, 0) 121 | nx2 = nx1 + size 122 | ny2 = ny1 + size 123 | 124 | if nx2 > width or ny2 > height: 125 | continue 126 | crop_box = np.array([nx1, ny1, nx2, ny2]) 127 | 128 | offset_x1 = (x1 - nx1) / float(size) 129 | offset_y1 = (y1 - ny1) / float(size) 130 | offset_x2 = (x2 - nx2) / float(size) 131 | offset_y2 = (y2 - ny2) / float(size) 132 | 133 | cropped_im = img[ny1 : ny2, nx1 : nx2, :] 134 | resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) 135 | 136 | box_ = box.reshape(1, -1) 137 | if IoU(crop_box, box_) >= 0.65: 138 | save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx) 139 | f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2)) 140 | cv2.imwrite(save_file, resized_im) 141 | p_idx += 1 142 | elif IoU(crop_box, box_) >= 0.4: 143 | save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx) 144 | f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2)) 145 | cv2.imwrite(save_file, resized_im) 146 | d_idx += 1 147 | box_idx += 1 148 | print("%s images done, pos: %s part: %s neg: %s"%(idx, p_idx, d_idx, n_idx)) 149 | 150 | f1.close() 151 | f2.close() 152 | f3.close() 153 | 154 | 155 | 156 | def parse_args(): 157 | parser = argparse.ArgumentParser(description='Test mtcnn', 158 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 159 | 160 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark', 161 | default='../data/wider/', type=str) 162 | parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file', 163 | default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str) 164 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 165 | default='', type=str) 166 | 167 | 168 | 169 | 170 | args = parser.parse_args() 171 | return args 172 | 173 | if __name__ == '__main__': 174 | args = parse_args() 175 | gen_pnet_data(args.traindata_store,args.annotation_file,args.prefix_path) 176 | -------------------------------------------------------------------------------- /dface/prepare_data/gen_Rnet_train_data.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | import cv2 5 | import numpy as np 6 | from dface.core.detect import MtcnnDetector,create_mtcnn_net 7 | from dface.core.imagedb import ImageDB 8 | from dface.core.image_reader import TestImageLoader 9 | import time 10 | import os 11 | import cPickle 12 | from dface.core.utils import convert_to_square,IoU 13 | import dface.config as config 14 | import dface.core.vision as vision 15 | 16 | def gen_rnet_data(data_dir, anno_file, pnet_model_file, prefix_path='', use_cuda=True, vis=False): 17 | 18 | 19 | pnet, _, _ = create_mtcnn_net(p_model_path=pnet_model_file, use_cuda=use_cuda) 20 | mtcnn_detector = MtcnnDetector(pnet=pnet,min_face_size=12) 21 | 22 | imagedb = ImageDB(anno_file,mode="test",prefix_path=prefix_path) 23 | imdb = imagedb.load_imdb() 24 | image_reader = TestImageLoader(imdb,1,False) 25 | 26 | all_boxes = list() 27 | batch_idx = 0 28 | 29 | for databatch in image_reader: 30 | if batch_idx % 100 == 0: 31 | print ("%d images done" % batch_idx) 32 | im = databatch 33 | 34 | t = time.time() 35 | 36 | boxes, boxes_align = mtcnn_detector.detect_pnet(im=im) 37 | if boxes_align is None: 38 | all_boxes.append(np.array([])) 39 | batch_idx += 1 40 | continue 41 | if vis: 42 | rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB) 43 | vision.vis_two(rgb_im, boxes, boxes_align) 44 | 45 | t1 = time.time() - t 46 | t = time.time() 47 | all_boxes.append(boxes_align) 48 | batch_idx += 1 49 | 50 | # save_path = model_store_path() 51 | save_path = config.MODEL_STORE_DIR 52 | 53 | if not os.path.exists(save_path): 54 | os.mkdir(save_path) 55 | 56 | save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time())) 57 | with open(save_file, 'wb') as f: 58 | cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) 59 | 60 | 61 | gen_rnet_sample_data(data_dir,anno_file,save_file,prefix_path) 62 | 63 | 64 | 65 | def gen_rnet_sample_data(data_dir,anno_file,det_boxs_file,prefix_path): 66 | 67 | neg_save_dir = os.path.join(data_dir, "24/negative") 68 | pos_save_dir = os.path.join(data_dir, "24/positive") 69 | part_save_dir = os.path.join(data_dir, "24/part") 70 | 71 | for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]: 72 | if not os.path.exists(dir_path): 73 | os.makedirs(dir_path) 74 | 75 | 76 | # load ground truth from annotation file 77 | # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image 78 | 79 | with open(anno_file, 'r') as f: 80 | annotations = f.readlines() 81 | 82 | image_size = 24 83 | net = "rnet" 84 | 85 | im_idx_list = list() 86 | gt_boxes_list = list() 87 | num_of_images = len(annotations) 88 | print ("processing %d images in total" % num_of_images) 89 | 90 | for annotation in annotations: 91 | annotation = annotation.strip().split(' ') 92 | im_idx = os.path.join(prefix_path,annotation[0]) 93 | 94 | boxes = map(float, annotation[1:]) 95 | boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) 96 | im_idx_list.append(im_idx) 97 | gt_boxes_list.append(boxes) 98 | 99 | 100 | save_path = config.ANNO_STORE_DIR 101 | if not os.path.exists(save_path): 102 | os.makedirs(save_path) 103 | 104 | f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w') 105 | f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w') 106 | f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w') 107 | 108 | det_handle = open(det_boxs_file, 'r') 109 | 110 | det_boxes = cPickle.load(det_handle) 111 | print(len(det_boxes), num_of_images) 112 | assert len(det_boxes) == num_of_images, "incorrect detections or ground truths" 113 | 114 | # index of neg, pos and part face, used as their image names 115 | n_idx = 0 116 | p_idx = 0 117 | d_idx = 0 118 | image_done = 0 119 | for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list): 120 | if image_done % 100 == 0: 121 | print("%d images done" % image_done) 122 | image_done += 1 123 | 124 | if dets.shape[0] == 0: 125 | continue 126 | img = cv2.imread(im_idx) 127 | dets = convert_to_square(dets) 128 | dets[:, 0:4] = np.round(dets[:, 0:4]) 129 | 130 | for box in dets: 131 | x_left, y_top, x_right, y_bottom = box[0:4].astype(int) 132 | width = x_right - x_left + 1 133 | height = y_bottom - y_top + 1 134 | 135 | # ignore box that is too small or beyond image border 136 | if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1: 137 | continue 138 | 139 | # compute intersection over union(IoU) between current box and all gt boxes 140 | Iou = IoU(box, gts) 141 | cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] 142 | resized_im = cv2.resize(cropped_im, (image_size, image_size), 143 | interpolation=cv2.INTER_LINEAR) 144 | 145 | # save negative images and write label 146 | if np.max(Iou) < 0.3: 147 | # Iou with all gts must below 0.3 148 | save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) 149 | f2.write(save_file + ' 0\n') 150 | cv2.imwrite(save_file, resized_im) 151 | n_idx += 1 152 | else: 153 | # find gt_box with the highest iou 154 | idx = np.argmax(Iou) 155 | assigned_gt = gts[idx] 156 | x1, y1, x2, y2 = assigned_gt 157 | 158 | # compute bbox reg label 159 | offset_x1 = (x1 - x_left) / float(width) 160 | offset_y1 = (y1 - y_top) / float(height) 161 | offset_x2 = (x2 - x_right) / float(width) 162 | offset_y2 = (y2 - y_bottom) / float(height) 163 | 164 | # save positive and part-face images and write labels 165 | if np.max(Iou) >= 0.65: 166 | save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) 167 | f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % ( 168 | offset_x1, offset_y1, offset_x2, offset_y2)) 169 | cv2.imwrite(save_file, resized_im) 170 | p_idx += 1 171 | 172 | elif np.max(Iou) >= 0.4: 173 | save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) 174 | f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % ( 175 | offset_x1, offset_y1, offset_x2, offset_y2)) 176 | cv2.imwrite(save_file, resized_im) 177 | d_idx += 1 178 | f1.close() 179 | f2.close() 180 | f3.close() 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | def model_store_path(): 189 | return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))+"/model_store" 190 | 191 | 192 | 193 | def parse_args(): 194 | parser = argparse.ArgumentParser(description='Test mtcnn', 195 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 196 | 197 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark', 198 | default='../data/wider/', type=str) 199 | parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file', 200 | default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str) 201 | parser.add_argument('--pmodel_file', dest='pnet_model_file', help='PNet model file path', 202 | default='/idata/workspace/dface/model_store/pnet_epoch.pt', type=str) 203 | parser.add_argument('--gpu', dest='use_cuda', help='with gpu', 204 | default=config.USE_CUDA, type=bool) 205 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 206 | default='', type=str) 207 | 208 | 209 | args = parser.parse_args() 210 | return args 211 | 212 | 213 | 214 | if __name__ == '__main__': 215 | args = parse_args() 216 | gen_rnet_data(args.traindata_store, args.annotation_file, args.pnet_model_file, args.prefix_path, args.use_cuda) 217 | 218 | 219 | 220 | -------------------------------------------------------------------------------- /dface/prepare_data/gen_landmark_12.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | import cv2 4 | import numpy as np 5 | import sys 6 | import numpy.random as npr 7 | import argparse 8 | import dface.config as config 9 | import dface.core.utils as utils 10 | 11 | 12 | def gen_data(anno_file, data_dir, prefix): 13 | 14 | 15 | size = 12 16 | image_id = 0 17 | 18 | landmark_imgs_save_dir = os.path.join(data_dir,"12/landmark") 19 | if not os.path.exists(landmark_imgs_save_dir): 20 | os.makedirs(landmark_imgs_save_dir) 21 | 22 | anno_dir = config.ANNO_STORE_DIR 23 | if not os.path.exists(anno_dir): 24 | os.makedirs(anno_dir) 25 | 26 | landmark_anno_filename = config.PNET_LANDMARK_ANNO_FILENAME 27 | save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename) 28 | 29 | f = open(save_landmark_anno, 'w') 30 | # dstdir = "train_landmark_few" 31 | 32 | 33 | with open(anno_file, 'r') as f2: 34 | annotations = f2.readlines() 35 | 36 | num = len(annotations) 37 | print("%d pics in total" % num) 38 | 39 | l_idx =0 40 | idx = 0 41 | # image_path bbox landmark(5*2) 42 | for annotation in annotations: 43 | # print imgPath 44 | 45 | annotation = annotation.strip().split(' ') 46 | 47 | assert len(annotation)==15,"each line should have 15 element" 48 | 49 | im_path = os.path.join(prefix,annotation[0].replace("\\", "/")) 50 | 51 | gt_box = map(float, annotation[1:5]) 52 | gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]] 53 | 54 | 55 | gt_box = np.array(gt_box, dtype=np.int32) 56 | 57 | 58 | 59 | landmark = bbox = map(float, annotation[5:]) 60 | landmark = np.array(landmark, dtype=np.float) 61 | 62 | img = cv2.imread(im_path) 63 | assert (img is not None) 64 | 65 | height, width, channel = img.shape 66 | # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1] 67 | # crop_face = cv2.resize(crop_face,(size,size)) 68 | 69 | idx = idx + 1 70 | if idx % 100 == 0: 71 | print("%d images done, landmark images: %d"%(idx,l_idx)) 72 | 73 | x1, y1, x2, y2 = gt_box 74 | 75 | # gt's width 76 | w = x2 - x1 + 1 77 | # gt's height 78 | h = y2 - y1 + 1 79 | if max(w, h) < 40 or x1 < 0 or y1 < 0: 80 | continue 81 | # random shift 82 | for i in range(10): 83 | bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) 84 | delta_x = npr.randint(-w * 0.2, w * 0.2) 85 | delta_y = npr.randint(-h * 0.2, h * 0.2) 86 | nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0) 87 | ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0) 88 | 89 | nx2 = nx1 + bbox_size 90 | ny2 = ny1 + bbox_size 91 | if nx2 > width or ny2 > height: 92 | continue 93 | crop_box = np.array([nx1, ny1, nx2, ny2]) 94 | cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] 95 | resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR) 96 | 97 | offset_x1 = (x1 - nx1) / float(bbox_size) 98 | offset_y1 = (y1 - ny1) / float(bbox_size) 99 | offset_x2 = (x2 - nx2) / float(bbox_size) 100 | offset_y2 = (y2 - ny2) / float(bbox_size) 101 | 102 | offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size) 103 | offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size) 104 | 105 | offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size) 106 | offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size) 107 | 108 | offset_nose_x = (landmark[4] - nx1) / float(bbox_size) 109 | offset_nose_y = (landmark[5] - ny1) / float(bbox_size) 110 | 111 | offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size) 112 | offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size) 113 | 114 | offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size) 115 | offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size) 116 | 117 | 118 | # cal iou 119 | iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0)) 120 | if iou > 0.65: 121 | save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx) 122 | cv2.imwrite(save_file, resized_im) 123 | 124 | f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \ 125 | (offset_x1, offset_y1, offset_x2, offset_y2, \ 126 | offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y)) 127 | 128 | l_idx += 1 129 | 130 | 131 | f.close() 132 | 133 | 134 | 135 | 136 | def parse_args(): 137 | parser = argparse.ArgumentParser(description='Test mtcnn', 138 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 139 | 140 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark', 141 | default='../data/wider/', type=str) 142 | parser.add_argument('--anno_file', dest='annotation_file', help='celeba dataset original annotation file', 143 | default='../data/wider/anno.txt', type=str) 144 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 145 | default='../data/', type=str) 146 | 147 | 148 | args = parser.parse_args() 149 | return args 150 | 151 | if __name__ == '__main__': 152 | args = parse_args() 153 | 154 | gen_data(args.annotation_file, args.traindata_store, args.prefix_path) 155 | 156 | 157 | -------------------------------------------------------------------------------- /dface/prepare_data/gen_landmark_24.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | import cv2 4 | import numpy as np 5 | import random 6 | import sys 7 | import numpy.random as npr 8 | import argparse 9 | import dface.config as config 10 | import dface.core.utils as utils 11 | 12 | 13 | 14 | def gen_data(anno_file, data_dir, prefix): 15 | 16 | 17 | size = 24 18 | image_id = 0 19 | 20 | landmark_imgs_save_dir = os.path.join(data_dir,"24/landmark") 21 | if not os.path.exists(landmark_imgs_save_dir): 22 | os.makedirs(landmark_imgs_save_dir) 23 | 24 | anno_dir = config.ANNO_STORE_DIR 25 | if not os.path.exists(anno_dir): 26 | os.makedirs(anno_dir) 27 | 28 | landmark_anno_filename = config.RNET_LANDMARK_ANNO_FILENAME 29 | save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename) 30 | 31 | f = open(save_landmark_anno, 'w') 32 | # dstdir = "train_landmark_few" 33 | 34 | with open(anno_file, 'r') as f2: 35 | annotations = f2.readlines() 36 | 37 | num = len(annotations) 38 | print("%d total images" % num) 39 | 40 | l_idx =0 41 | idx = 0 42 | # image_path bbox landmark(5*2) 43 | for annotation in annotations: 44 | # print imgPath 45 | 46 | annotation = annotation.strip().split(' ') 47 | 48 | assert len(annotation)==15,"each line should have 15 element" 49 | 50 | im_path = os.path.join(prefix,annotation[0].replace("\\", "/")) 51 | 52 | gt_box = map(float, annotation[1:5]) 53 | gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]] 54 | 55 | 56 | gt_box = np.array(gt_box, dtype=np.int32) 57 | 58 | landmark = map(float, annotation[5:]) 59 | landmark = np.array(landmark, dtype=np.float) 60 | 61 | img = cv2.imread(im_path) 62 | assert (img is not None) 63 | 64 | height, width, channel = img.shape 65 | # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1] 66 | # crop_face = cv2.resize(crop_face,(size,size)) 67 | 68 | idx = idx + 1 69 | if idx % 100 == 0: 70 | print("%d images done, landmark images: %d"%(idx,l_idx)) 71 | 72 | x1, y1, x2, y2 = gt_box 73 | 74 | # gt's width 75 | w = x2 - x1 + 1 76 | # gt's height 77 | h = y2 - y1 + 1 78 | if max(w, h) < 40 or x1 < 0 or y1 < 0: 79 | continue 80 | # random shift 81 | for i in range(10): 82 | bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) 83 | delta_x = npr.randint(-w * 0.2, w * 0.2) 84 | delta_y = npr.randint(-h * 0.2, h * 0.2) 85 | nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0) 86 | ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0) 87 | 88 | nx2 = nx1 + bbox_size 89 | ny2 = ny1 + bbox_size 90 | if nx2 > width or ny2 > height: 91 | continue 92 | crop_box = np.array([nx1, ny1, nx2, ny2]) 93 | cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] 94 | resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR) 95 | 96 | offset_x1 = (x1 - nx1) / float(bbox_size) 97 | offset_y1 = (y1 - ny1) / float(bbox_size) 98 | offset_x2 = (x2 - nx2) / float(bbox_size) 99 | offset_y2 = (y2 - ny2) / float(bbox_size) 100 | 101 | offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size) 102 | offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size) 103 | 104 | offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size) 105 | offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size) 106 | 107 | offset_nose_x = (landmark[4] - nx1) / float(bbox_size) 108 | offset_nose_y = (landmark[5] - ny1) / float(bbox_size) 109 | 110 | offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size) 111 | offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size) 112 | 113 | offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size) 114 | offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size) 115 | 116 | 117 | # cal iou 118 | iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0)) 119 | if iou > 0.65: 120 | save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx) 121 | cv2.imwrite(save_file, resized_im) 122 | 123 | f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \ 124 | (offset_x1, offset_y1, offset_x2, offset_y2, \ 125 | offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y)) 126 | 127 | l_idx += 1 128 | 129 | f.close() 130 | 131 | 132 | 133 | 134 | def parse_args(): 135 | parser = argparse.ArgumentParser(description='Test mtcnn', 136 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 137 | 138 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark', 139 | default='/idata/data/wider/', type=str) 140 | parser.add_argument('--anno_file', dest='annotation_file', help='celeba dataset original annotation file', 141 | default='/idata/data/trainImageList.txt', type=str) 142 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 143 | default='/idata/data', type=str) 144 | 145 | 146 | args = parser.parse_args() 147 | return args 148 | 149 | if __name__ == '__main__': 150 | args = parse_args() 151 | 152 | gen_data(args.annotation_file, args.traindata_store, args.prefix_path) 153 | 154 | 155 | -------------------------------------------------------------------------------- /dface/prepare_data/gen_landmark_48.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | import cv2 4 | import numpy as np 5 | import random 6 | import sys 7 | import numpy.random as npr 8 | import argparse 9 | import dface.config as config 10 | import dface.core.utils as utils 11 | 12 | 13 | def gen_data(anno_file, data_dir, prefix): 14 | 15 | 16 | size = 48 17 | image_id = 0 18 | 19 | landmark_imgs_save_dir = os.path.join(data_dir,"48/landmark") 20 | if not os.path.exists(landmark_imgs_save_dir): 21 | os.makedirs(landmark_imgs_save_dir) 22 | 23 | anno_dir = config.ANNO_STORE_DIR 24 | if not os.path.exists(anno_dir): 25 | os.makedirs(anno_dir) 26 | 27 | landmark_anno_filename = config.ONET_LANDMARK_ANNO_FILENAME 28 | save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename) 29 | 30 | f = open(save_landmark_anno, 'w') 31 | # dstdir = "train_landmark_few" 32 | 33 | with open(anno_file, 'r') as f2: 34 | annotations = f2.readlines() 35 | 36 | num = len(annotations) 37 | print("%d total images" % num) 38 | 39 | l_idx =0 40 | idx = 0 41 | # image_path bbox landmark(5*2) 42 | for annotation in annotations: 43 | # print imgPath 44 | 45 | annotation = annotation.strip().split(' ') 46 | 47 | assert len(annotation)==15,"each line should have 15 element" 48 | 49 | im_path = os.path.join(prefix,annotation[0].replace("\\", "/")) 50 | 51 | gt_box = map(float, annotation[1:5]) 52 | # gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]] 53 | 54 | 55 | gt_box = np.array(gt_box, dtype=np.int32) 56 | 57 | landmark = map(float, annotation[5:]) 58 | landmark = np.array(landmark, dtype=np.float) 59 | 60 | img = cv2.imread(im_path) 61 | assert (img is not None) 62 | 63 | height, width, channel = img.shape 64 | # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1] 65 | # crop_face = cv2.resize(crop_face,(size,size)) 66 | 67 | idx = idx + 1 68 | if idx % 100 == 0: 69 | print("%d images done, landmark images: %d"%(idx,l_idx)) 70 | 71 | x1, y1, x2, y2 = gt_box 72 | 73 | # gt's width 74 | w = x2 - x1 + 1 75 | # gt's height 76 | h = y2 - y1 + 1 77 | if max(w, h) < 40 or x1 < 0 or y1 < 0: 78 | continue 79 | # random shift 80 | for i in range(10): 81 | bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) 82 | delta_x = npr.randint(-w * 0.2, w * 0.2) 83 | delta_y = npr.randint(-h * 0.2, h * 0.2) 84 | nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0) 85 | ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0) 86 | 87 | nx2 = nx1 + bbox_size 88 | ny2 = ny1 + bbox_size 89 | if nx2 > width or ny2 > height: 90 | continue 91 | crop_box = np.array([nx1, ny1, nx2, ny2]) 92 | cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :] 93 | resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR) 94 | 95 | offset_x1 = (x1 - nx1) / float(bbox_size) 96 | offset_y1 = (y1 - ny1) / float(bbox_size) 97 | offset_x2 = (x2 - nx2) / float(bbox_size) 98 | offset_y2 = (y2 - ny2) / float(bbox_size) 99 | 100 | offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size) 101 | offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size) 102 | 103 | offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size) 104 | offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size) 105 | 106 | offset_nose_x = (landmark[4] - nx1) / float(bbox_size) 107 | offset_nose_y = (landmark[5] - ny1) / float(bbox_size) 108 | 109 | offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size) 110 | offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size) 111 | 112 | offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size) 113 | offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size) 114 | 115 | 116 | # cal iou 117 | iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0)) 118 | if iou > 0.65: 119 | save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx) 120 | cv2.imwrite(save_file, resized_im) 121 | 122 | f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \ 123 | (offset_x1, offset_y1, offset_x2, offset_y2, \ 124 | offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y)) 125 | 126 | l_idx += 1 127 | 128 | f.close() 129 | 130 | 131 | 132 | 133 | def parse_args(): 134 | parser = argparse.ArgumentParser(description='Test mtcnn', 135 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 136 | 137 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark', 138 | default='/idata/data/wider/', type=str) 139 | parser.add_argument('--anno_file', dest='annotation_file', help='celeba dataset original annotation file', 140 | default='/idata/data/trainImageList.txt', type=str) 141 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path', 142 | default='/idata/data', type=str) 143 | 144 | 145 | args = parser.parse_args() 146 | return args 147 | 148 | if __name__ == '__main__': 149 | args = parse_args() 150 | 151 | gen_data(args.annotation_file, args.traindata_store, args.prefix_path) 152 | 153 | 154 | -------------------------------------------------------------------------------- /dface/prepare_data/gen_landmark_net_48.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import cv2 4 | import numpy as np 5 | from core.detect import MtcnnDetector,create_mtcnn_net 6 | from core.imagedb import ImageDB 7 | from core.image_reader import TestImageLoader 8 | import time 9 | import os 10 | import cPickle 11 | from dface.core.utils import convert_to_square,IoU 12 | import dface.config as config 13 | import dface.core.vision as vision 14 | 15 | def gen_landmark48_data(data_dir, anno_file, pnet_model_file, rnet_model_file, prefix_path='', use_cuda=True, vis=False): 16 | 17 | 18 | pnet, rnet, _ = create_mtcnn_net(p_model_path=pnet_model_file, r_model_path=rnet_model_file, use_cuda=use_cuda) 19 | mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, min_face_size=12) 20 | 21 | imagedb = ImageDB(anno_file,mode="test",prefix_path=prefix_path) 22 | imdb = imagedb.load_imdb() 23 | image_reader = TestImageLoader(imdb,1,False) 24 | 25 | all_boxes = list() 26 | batch_idx = 0 27 | 28 | for databatch in image_reader: 29 | if batch_idx % 100 == 0: 30 | print("%d images done" % batch_idx) 31 | im = databatch 32 | 33 | 34 | if im.shape[0] >= 1200 or im.shape[1] >=1200: 35 | all_boxes.append(np.array([])) 36 | batch_idx += 1 37 | continue 38 | 39 | 40 | t = time.time() 41 | 42 | p_boxes, p_boxes_align = mtcnn_detector.detect_pnet(im=im) 43 | 44 | boxes, boxes_align = mtcnn_detector.detect_rnet(im=im, dets=p_boxes_align) 45 | 46 | if boxes_align is None: 47 | all_boxes.append(np.array([])) 48 | batch_idx += 1 49 | continue 50 | if vis: 51 | rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB) 52 | vision.vis_two(rgb_im, boxes, boxes_align) 53 | 54 | t1 = time.time() - t 55 | t = time.time() 56 | all_boxes.append(boxes_align) 57 | batch_idx += 1 58 | 59 | save_path = config.MODEL_STORE_DIR 60 | 61 | if not os.path.exists(save_path): 62 | os.mkdir(save_path) 63 | 64 | save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time())) 65 | with open(save_file, 'wb') as f: 66 | cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) 67 | 68 | 69 | gen_sample_data(data_dir,anno_file,save_file, prefix_path) 70 | 71 | 72 | 73 | def gen_sample_data(data_dir, anno_file, det_boxs_file, prefix_path =''): 74 | 75 | landmark_save_dir = os.path.join(data_dir, "48/landmark") 76 | 77 | if not os.path.exists(landmark_save_dir): 78 | os.makedirs(landmark_save_dir) 79 | 80 | 81 | # load ground truth from annotation file 82 | # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image 83 | 84 | with open(anno_file, 'r') as f: 85 | annotations = f.readlines() 86 | 87 | image_size = 48 88 | net = "onet" 89 | 90 | im_idx_list = list() 91 | gt_boxes_list = list() 92 | gt_landmark_list = list() 93 | num_of_images = len(annotations) 94 | print("processing %d images in total" % num_of_images) 95 | 96 | for annotation in annotations: 97 | annotation = annotation.strip().split(' ') 98 | im_idx = annotation[0] 99 | 100 | boxes = map(float, annotation[1:5]) 101 | boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) 102 | landmarks = map(float, annotation[5:]) 103 | landmarks = np.array(landmarks, dtype=np.float32).reshape(-1, 10) 104 | 105 | im_idx_list.append(im_idx) 106 | gt_boxes_list.append(boxes) 107 | gt_landmark_list.append(landmarks) 108 | 109 | 110 | save_path = config.ANNO_STORE_DIR 111 | if not os.path.exists(save_path): 112 | os.makedirs(save_path) 113 | 114 | f = open(os.path.join(save_path, 'landmark_48.txt'), 'w') 115 | 116 | 117 | det_handle = open(det_boxs_file, 'r') 118 | 119 | det_boxes = cPickle.load(det_handle) 120 | print(len(det_boxes), num_of_images) 121 | assert len(det_boxes) == num_of_images, "incorrect detections or ground truths" 122 | 123 | # index of neg, pos and part face, used as their image names 124 | p_idx = 0 125 | image_done = 0 126 | for im_idx, dets, gts, landmark in zip(im_idx_list, det_boxes, gt_boxes_list, gt_landmark_list): 127 | if image_done % 100 == 0: 128 | print("%d images done" % image_done) 129 | image_done += 1 130 | 131 | if dets.shape[0] == 0: 132 | continue 133 | img = cv2.imread(os.path.join(prefix_path,im_idx)) 134 | dets = convert_to_square(dets) 135 | dets[:, 0:4] = np.round(dets[:, 0:4]) 136 | 137 | for box in dets: 138 | x_left, y_top, x_right, y_bottom = box[0:4].astype(int) 139 | width = x_right - x_left + 1 140 | height = y_bottom - y_top + 1 141 | 142 | # ignore box that is too small or beyond image border 143 | if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1: 144 | continue 145 | 146 | # compute intersection over union(IoU) between current box and all gt boxes 147 | Iou = IoU(box, gts) 148 | cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] 149 | resized_im = cv2.resize(cropped_im, (image_size, image_size), 150 | interpolation=cv2.INTER_LINEAR) 151 | 152 | # save negative images and write label 153 | if np.max(Iou) < 0.3: 154 | # Iou with all gts must below 0.3 155 | continue 156 | else: 157 | # find gt_box with the highest iou 158 | idx = np.argmax(Iou) 159 | assigned_gt = gts[idx] 160 | x1, y1, x2, y2 = assigned_gt 161 | 162 | # compute bbox reg label 163 | offset_x1 = (x1 - x_left) / float(width) 164 | offset_y1 = (y1 - y_top) / float(height) 165 | offset_x2 = (x2 - x_right) / float(width) 166 | offset_y2 = (y2 - y_bottom) / float(height) 167 | 168 | offset_left_eye_x = (landmark[0,0] - x_left) / float(width) 169 | offset_left_eye_y = (landmark[0,1] - y_top) / float(height) 170 | 171 | offset_right_eye_x = (landmark[0,2] - x_left) / float(width) 172 | offset_right_eye_y = (landmark[0,3] - y_top) / float(height) 173 | 174 | offset_nose_x = (landmark[0,4] - x_left) / float(width) 175 | offset_nose_y = (landmark[0,5] - y_top) / float(height) 176 | 177 | offset_left_mouth_x = (landmark[0,6] - x_left) / float(width) 178 | offset_left_mouth_y = (landmark[0,7] - y_top) / float(height) 179 | 180 | offset_right_mouth_x = (landmark[0,8] - x_left) / float(width) 181 | offset_right_mouth_y = (landmark[0,9] - y_top) / float(height) 182 | 183 | 184 | 185 | # save positive and part-face images and write labels 186 | if np.max(Iou) >= 0.65: 187 | save_file = os.path.join(landmark_save_dir, "%s.jpg" % p_idx) 188 | 189 | f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \ 190 | (offset_x1, offset_y1, offset_x2, offset_y2, \ 191 | offset_left_eye_x, offset_left_eye_y, offset_right_eye_x, offset_right_eye_y, 192 | offset_nose_x, offset_nose_y, offset_left_mouth_x, offset_left_mouth_y, 193 | offset_right_mouth_x, offset_right_mouth_y)) 194 | 195 | cv2.imwrite(save_file, resized_im) 196 | p_idx += 1 197 | 198 | f.close() 199 | 200 | 201 | 202 | def model_store_path(): 203 | return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))+"/model_store" 204 | 205 | 206 | 207 | def parse_args(): 208 | parser = argparse.ArgumentParser(description='Test mtcnn', 209 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 210 | 211 | parser.add_argument('--dataset_path', dest='dataset_path', help='dataset folder', 212 | default='../data/wider/', type=str) 213 | parser.add_argument('--anno_file', dest='annotation_file', help='output data folder', 214 | default='../data/wider/anno.txt', type=str) 215 | parser.add_argument('--pmodel_file', dest='pnet_model_file', help='PNet model file path', 216 | default='/idata/workspace/mtcnn/model_store/pnet_epoch_5best.pt', type=str) 217 | parser.add_argument('--rmodel_file', dest='rnet_model_file', help='RNet model file path', 218 | default='/idata/workspace/mtcnn/model_store/rnet_epoch_1.pt', type=str) 219 | parser.add_argument('--gpu', dest='use_cuda', help='with gpu', 220 | default=config.USE_CUDA, type=bool) 221 | parser.add_argument('--prefix_path', dest='prefix_path', help='image prefix root path', 222 | default='', type=str) 223 | 224 | args = parser.parse_args() 225 | return args 226 | 227 | 228 | 229 | if __name__ == '__main__': 230 | args = parse_args() 231 | gen_landmark48_data(args.dataset_path, args.annotation_file, args.pnet_model_file, args.rnet_model_file, args.prefix_path, args.use_cuda) 232 | 233 | 234 | 235 | -------------------------------------------------------------------------------- /dface/prepare_data/widerface_annotation_gen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/prepare_data/widerface_annotation_gen/__init__.py -------------------------------------------------------------------------------- /dface/prepare_data/widerface_annotation_gen/transform.py: -------------------------------------------------------------------------------- 1 | from dface.prepare_data.widerface_annotation_gen.wider_loader import WIDER 2 | import cv2 3 | import time 4 | 5 | #wider face original images path 6 | path_to_image = '/idata/data/wider_face/WIDER_train/images' 7 | 8 | #matlab file path 9 | file_to_label = './wider_face_train.mat' 10 | 11 | #target file path 12 | target_file = './anno.txt' 13 | 14 | wider = WIDER(file_to_label, path_to_image) 15 | 16 | 17 | line_count = 0 18 | box_count = 0 19 | 20 | print('start transforming....') 21 | t = time.time() 22 | 23 | with open(target_file, 'w+') as f: 24 | # press ctrl-C to stop the process 25 | for data in wider.next(): 26 | line = [] 27 | line.append(str(data.image_name)) 28 | line_count += 1 29 | for i,box in enumerate(data.bboxes): 30 | box_count += 1 31 | for j,bvalue in enumerate(box): 32 | line.append(str(bvalue)) 33 | 34 | line.append('\n') 35 | 36 | line_str = ' '.join(line) 37 | f.write(line_str) 38 | 39 | st = time.time()-t 40 | print('end transforming') 41 | 42 | print('spend time:%ld'%st) 43 | print('total line(images):%d'%line_count) 44 | print('total boxes(faces):%d'%box_count) 45 | 46 | 47 | -------------------------------------------------------------------------------- /dface/prepare_data/widerface_annotation_gen/wider_face_train.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/prepare_data/widerface_annotation_gen/wider_face_train.mat -------------------------------------------------------------------------------- /dface/prepare_data/widerface_annotation_gen/wider_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | from scipy.io import loadmat 3 | 4 | class DATA: 5 | def __init__(self, image_name, bboxes): 6 | self.image_name = image_name 7 | self.bboxes = bboxes 8 | 9 | 10 | class WIDER(object): 11 | def __init__(self, file_to_label, path_to_image=None): 12 | self.file_to_label = file_to_label 13 | self.path_to_image = path_to_image 14 | 15 | self.f = loadmat(file_to_label) 16 | self.event_list = self.f['event_list'] 17 | self.file_list = self.f['file_list'] 18 | self.face_bbx_list = self.f['face_bbx_list'] 19 | 20 | def next(self): 21 | for event_idx, event in enumerate(self.event_list): 22 | e = event[0][0].encode('utf-8') 23 | for file, bbx in zip(self.file_list[event_idx][0], 24 | self.face_bbx_list[event_idx][0]): 25 | f = file[0][0].encode('utf-8') 26 | path_of_image = os.path.join(self.path_to_image, e, f) + ".jpg" 27 | 28 | bboxes = [] 29 | bbx0 = bbx[0] 30 | for i in range(bbx0.shape[0]): 31 | xmin, ymin, xmax, ymax = bbx0[i] 32 | bboxes.append((int(xmin), int(ymin), int(xmax), int(ymax))) 33 | yield DATA(path_of_image, bboxes) 34 | 35 | -------------------------------------------------------------------------------- /dface/train_net/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/train_net/__init__.py -------------------------------------------------------------------------------- /dface/train_net/train.py: -------------------------------------------------------------------------------- 1 | from dface.core.image_reader import TrainImageReader 2 | import datetime 3 | import os 4 | from dface.core.models import PNet,RNet,ONet,LossFn 5 | import torch 6 | from torch.autograd import Variable 7 | import dface.core.image_tools as image_tools 8 | 9 | 10 | 11 | 12 | 13 | def compute_accuracy(prob_cls, gt_cls): 14 | prob_cls = torch.squeeze(prob_cls) 15 | gt_cls = torch.squeeze(gt_cls) 16 | 17 | #we only need the detection which >= 0 18 | mask = torch.ge(gt_cls,0) 19 | #get valid element 20 | valid_gt_cls = torch.masked_select(gt_cls,mask) 21 | valid_prob_cls = torch.masked_select(prob_cls,mask) 22 | size = min(valid_gt_cls.size()[0], valid_prob_cls.size()[0]) 23 | prob_ones = torch.ge(valid_prob_cls,0.6).float() 24 | right_ones = torch.eq(prob_ones,valid_gt_cls).float() 25 | 26 | return torch.div(torch.mul(torch.sum(right_ones),float(1.0)),float(size)) 27 | 28 | 29 | def train_pnet(model_store_path, end_epoch,imdb, 30 | batch_size,frequent=50,base_lr=0.01,use_cuda=True): 31 | 32 | if not os.path.exists(model_store_path): 33 | os.makedirs(model_store_path) 34 | 35 | lossfn = LossFn() 36 | net = PNet(is_train=True, use_cuda=use_cuda) 37 | net.train() 38 | if use_cuda: 39 | net.cuda() 40 | 41 | optimizer = torch.optim.Adam(net.parameters(), lr=base_lr) 42 | 43 | train_data=TrainImageReader(imdb,12,batch_size,shuffle=True) 44 | 45 | 46 | for cur_epoch in range(1,end_epoch+1): 47 | train_data.reset() 48 | accuracy_list=[] 49 | cls_loss_list=[] 50 | bbox_loss_list=[] 51 | # landmark_loss_list=[] 52 | 53 | for batch_idx,(image,(gt_label,gt_bbox,gt_landmark))in enumerate(train_data): 54 | 55 | im_tensor = [ image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0]) ] 56 | im_tensor = torch.stack(im_tensor) 57 | 58 | im_tensor = Variable(im_tensor) 59 | gt_label = Variable(torch.from_numpy(gt_label).float()) 60 | 61 | gt_bbox = Variable(torch.from_numpy(gt_bbox).float()) 62 | # gt_landmark = Variable(torch.from_numpy(gt_landmark).float()) 63 | 64 | if use_cuda: 65 | im_tensor = im_tensor.cuda() 66 | gt_label = gt_label.cuda() 67 | gt_bbox = gt_bbox.cuda() 68 | # gt_landmark = gt_landmark.cuda() 69 | 70 | cls_pred, box_offset_pred = net(im_tensor) 71 | # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred) 72 | 73 | cls_loss = lossfn.cls_loss(gt_label,cls_pred) 74 | box_offset_loss = lossfn.box_loss(gt_label,gt_bbox,box_offset_pred) 75 | # landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred) 76 | 77 | all_loss = cls_loss*1.0+box_offset_loss*0.5 78 | 79 | if batch_idx%frequent==0: 80 | accuracy=compute_accuracy(cls_pred,gt_label) 81 | 82 | show1 = accuracy.data.tolist()[0] 83 | show2 = cls_loss.data.tolist()[0] 84 | show3 = box_offset_loss.data.tolist()[0] 85 | show5 = all_loss.data.tolist()[0] 86 | 87 | print("%s : Epoch: %d, Step: %d, accuracy: %s, det loss: %s, bbox loss: %s, all_loss: %s, lr:%s "%(datetime.datetime.now(),cur_epoch,batch_idx, show1,show2,show3,show5,base_lr)) 88 | accuracy_list.append(accuracy) 89 | cls_loss_list.append(cls_loss) 90 | bbox_loss_list.append(box_offset_loss) 91 | 92 | optimizer.zero_grad() 93 | all_loss.backward() 94 | optimizer.step() 95 | 96 | 97 | accuracy_avg = torch.mean(torch.cat(accuracy_list)) 98 | cls_loss_avg = torch.mean(torch.cat(cls_loss_list)) 99 | bbox_loss_avg = torch.mean(torch.cat(bbox_loss_list)) 100 | # landmark_loss_avg = torch.mean(torch.cat(landmark_loss_list)) 101 | 102 | show6 = accuracy_avg.data.tolist()[0] 103 | show7 = cls_loss_avg.data.tolist()[0] 104 | show8 = bbox_loss_avg.data.tolist()[0] 105 | 106 | print("Epoch: %d, accuracy: %s, cls loss: %s, bbox loss: %s" % (cur_epoch, show6, show7, show8)) 107 | torch.save(net.state_dict(), os.path.join(model_store_path,"pnet_epoch_%d.pt" % cur_epoch)) 108 | torch.save(net, os.path.join(model_store_path,"pnet_epoch_model_%d.pkl" % cur_epoch)) 109 | 110 | 111 | 112 | 113 | def train_rnet(model_store_path, end_epoch,imdb, 114 | batch_size,frequent=50,base_lr=0.01,use_cuda=True): 115 | 116 | if not os.path.exists(model_store_path): 117 | os.makedirs(model_store_path) 118 | 119 | lossfn = LossFn() 120 | net = RNet(is_train=True, use_cuda=use_cuda) 121 | net.train() 122 | if use_cuda: 123 | net.cuda() 124 | 125 | optimizer = torch.optim.Adam(net.parameters(), lr=base_lr) 126 | 127 | train_data=TrainImageReader(imdb,24,batch_size,shuffle=True) 128 | 129 | 130 | for cur_epoch in range(1,end_epoch+1): 131 | train_data.reset() 132 | accuracy_list=[] 133 | cls_loss_list=[] 134 | bbox_loss_list=[] 135 | landmark_loss_list=[] 136 | 137 | for batch_idx,(image,(gt_label,gt_bbox,gt_landmark))in enumerate(train_data): 138 | 139 | im_tensor = [ image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0]) ] 140 | im_tensor = torch.stack(im_tensor) 141 | 142 | im_tensor = Variable(im_tensor) 143 | gt_label = Variable(torch.from_numpy(gt_label).float()) 144 | 145 | gt_bbox = Variable(torch.from_numpy(gt_bbox).float()) 146 | gt_landmark = Variable(torch.from_numpy(gt_landmark).float()) 147 | 148 | if use_cuda: 149 | im_tensor = im_tensor.cuda() 150 | gt_label = gt_label.cuda() 151 | gt_bbox = gt_bbox.cuda() 152 | gt_landmark = gt_landmark.cuda() 153 | 154 | cls_pred, box_offset_pred = net(im_tensor) 155 | # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred) 156 | 157 | cls_loss = lossfn.cls_loss(gt_label,cls_pred) 158 | box_offset_loss = lossfn.box_loss(gt_label,gt_bbox,box_offset_pred) 159 | # landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred) 160 | 161 | all_loss = cls_loss*1.0+box_offset_loss*0.5 162 | 163 | if batch_idx%frequent==0: 164 | accuracy=compute_accuracy(cls_pred,gt_label) 165 | 166 | show1 = accuracy.data.tolist()[0] 167 | show2 = cls_loss.data.tolist()[0] 168 | show3 = box_offset_loss.data.tolist()[0] 169 | # show4 = landmark_loss.data.tolist()[0] 170 | show5 = all_loss.data.tolist()[0] 171 | 172 | print("%s : Epoch: %d, Step: %d, accuracy: %s, det loss: %s, bbox loss: %s, all_loss: %s, lr:%s "%(datetime.datetime.now(), cur_epoch, batch_idx, show1, show2, show3, show5, base_lr)) 173 | accuracy_list.append(accuracy) 174 | cls_loss_list.append(cls_loss) 175 | bbox_loss_list.append(box_offset_loss) 176 | # landmark_loss_list.append(landmark_loss) 177 | 178 | optimizer.zero_grad() 179 | all_loss.backward() 180 | optimizer.step() 181 | 182 | 183 | accuracy_avg = torch.mean(torch.cat(accuracy_list)) 184 | cls_loss_avg = torch.mean(torch.cat(cls_loss_list)) 185 | bbox_loss_avg = torch.mean(torch.cat(bbox_loss_list)) 186 | # landmark_loss_avg = torch.mean(torch.cat(landmark_loss_list)) 187 | 188 | show6 = accuracy_avg.data.tolist()[0] 189 | show7 = cls_loss_avg.data.tolist()[0] 190 | show8 = bbox_loss_avg.data.tolist()[0] 191 | # show9 = landmark_loss_avg.data.tolist()[0] 192 | 193 | print("Epoch: %d, accuracy: %s, cls loss: %s, bbox loss: %s" % (cur_epoch, show6, show7, show8)) 194 | torch.save(net.state_dict(), os.path.join(model_store_path,"rnet_epoch_%d.pt" % cur_epoch)) 195 | torch.save(net, os.path.join(model_store_path,"rnet_epoch_model_%d.pkl" % cur_epoch)) 196 | 197 | 198 | def train_onet(model_store_path, end_epoch,imdb, 199 | batch_size,frequent=50,base_lr=0.01,use_cuda=True): 200 | 201 | if not os.path.exists(model_store_path): 202 | os.makedirs(model_store_path) 203 | 204 | lossfn = LossFn() 205 | net = ONet(is_train=True) 206 | net.train() 207 | if use_cuda: 208 | net.cuda() 209 | 210 | optimizer = torch.optim.Adam(net.parameters(), lr=base_lr) 211 | 212 | train_data=TrainImageReader(imdb,48,batch_size,shuffle=True) 213 | 214 | 215 | for cur_epoch in range(1,end_epoch+1): 216 | train_data.reset() 217 | accuracy_list=[] 218 | cls_loss_list=[] 219 | bbox_loss_list=[] 220 | landmark_loss_list=[] 221 | 222 | for batch_idx,(image,(gt_label,gt_bbox,gt_landmark))in enumerate(train_data): 223 | 224 | im_tensor = [ image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0]) ] 225 | im_tensor = torch.stack(im_tensor) 226 | 227 | im_tensor = Variable(im_tensor) 228 | gt_label = Variable(torch.from_numpy(gt_label).float()) 229 | 230 | gt_bbox = Variable(torch.from_numpy(gt_bbox).float()) 231 | gt_landmark = Variable(torch.from_numpy(gt_landmark).float()) 232 | 233 | if use_cuda: 234 | im_tensor = im_tensor.cuda() 235 | gt_label = gt_label.cuda() 236 | gt_bbox = gt_bbox.cuda() 237 | gt_landmark = gt_landmark.cuda() 238 | 239 | cls_pred, box_offset_pred, landmark_offset_pred = net(im_tensor) 240 | # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred) 241 | 242 | cls_loss = lossfn.cls_loss(gt_label,cls_pred) 243 | box_offset_loss = lossfn.box_loss(gt_label,gt_bbox,box_offset_pred) 244 | landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred) 245 | 246 | all_loss = cls_loss*0.8+box_offset_loss*0.6+landmark_loss*1.5 247 | 248 | if batch_idx%frequent==0: 249 | accuracy=compute_accuracy(cls_pred,gt_label) 250 | 251 | show1 = accuracy.data.tolist()[0] 252 | show2 = cls_loss.data.tolist()[0] 253 | show3 = box_offset_loss.data.tolist()[0] 254 | show4 = landmark_loss.data.tolist()[0] 255 | show5 = all_loss.data.tolist()[0] 256 | 257 | print("%s : Epoch: %d, Step: %d, accuracy: %s, det loss: %s, bbox loss: %s, landmark loss: %s, all_loss: %s, lr:%s "%(datetime.datetime.now(),cur_epoch,batch_idx, show1,show2,show3,show4,show5,base_lr)) 258 | accuracy_list.append(accuracy) 259 | cls_loss_list.append(cls_loss) 260 | bbox_loss_list.append(box_offset_loss) 261 | landmark_loss_list.append(landmark_loss) 262 | 263 | optimizer.zero_grad() 264 | all_loss.backward() 265 | optimizer.step() 266 | 267 | 268 | accuracy_avg = torch.mean(torch.cat(accuracy_list)) 269 | cls_loss_avg = torch.mean(torch.cat(cls_loss_list)) 270 | bbox_loss_avg = torch.mean(torch.cat(bbox_loss_list)) 271 | landmark_loss_avg = torch.mean(torch.cat(landmark_loss_list)) 272 | 273 | show6 = accuracy_avg.data.tolist()[0] 274 | show7 = cls_loss_avg.data.tolist()[0] 275 | show8 = bbox_loss_avg.data.tolist()[0] 276 | show9 = landmark_loss_avg.data.tolist()[0] 277 | 278 | print("Epoch: %d, accuracy: %s, cls loss: %s, bbox loss: %s, landmark loss: %s " % (cur_epoch, show6, show7, show8, show9)) 279 | torch.save(net.state_dict(), os.path.join(model_store_path,"onet_epoch_%d.pt" % cur_epoch)) 280 | torch.save(net, os.path.join(model_store_path,"onet_epoch_model_%d.pkl" % cur_epoch)) 281 | 282 | -------------------------------------------------------------------------------- /dface/train_net/train_o_net.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | from dface.core.imagedb import ImageDB 4 | import dface.train_net.train as train 5 | import dface.config as config 6 | import os 7 | 8 | 9 | 10 | def train_net(annotation_file, model_store_path, 11 | end_epoch=16, frequent=200, lr=0.01, batch_size=128, use_cuda=False): 12 | 13 | imagedb = ImageDB(annotation_file) 14 | gt_imdb = imagedb.load_imdb() 15 | gt_imdb = imagedb.append_flipped_images(gt_imdb) 16 | 17 | train.train_onet(model_store_path=model_store_path, end_epoch=end_epoch, imdb=gt_imdb, batch_size=batch_size, frequent=frequent, base_lr=lr, use_cuda=use_cuda) 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser(description='Train ONet', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | 24 | parser.add_argument('--anno_file', dest='annotation_file', 25 | default=os.path.join(config.ANNO_STORE_DIR,config.ONET_TRAIN_IMGLIST_FILENAME), help='training data annotation file', type=str) 26 | parser.add_argument('--model_path', dest='model_store_path', help='training model store directory', 27 | default=config.MODEL_STORE_DIR, type=str) 28 | parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training', 29 | default=config.END_EPOCH, type=int) 30 | parser.add_argument('--frequent', dest='frequent', help='frequency of logging', 31 | default=200, type=int) 32 | parser.add_argument('--lr', dest='lr', help='learning rate', 33 | default=0.002, type=float) 34 | parser.add_argument('--batch_size', dest='batch_size', help='train batch size', 35 | default=1000, type=int) 36 | parser.add_argument('--gpu', dest='use_cuda', help='train with gpu', 37 | default=config.USE_CUDA, type=bool) 38 | parser.add_argument('--prefix_path', dest='', help='training data annotation images prefix root path', type=str) 39 | 40 | args = parser.parse_args() 41 | return args 42 | 43 | if __name__ == '__main__': 44 | args = parse_args() 45 | print('train ONet argument:') 46 | print(args) 47 | 48 | 49 | train_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path, 50 | end_epoch=args.end_epoch, frequent=args.frequent, lr=args.lr, batch_size=args.batch_size, use_cuda=args.use_cuda) 51 | -------------------------------------------------------------------------------- /dface/train_net/train_p_net.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | from dface.core.imagedb import ImageDB 4 | from dface.train_net.train import train_pnet 5 | import dface.config as config 6 | import os 7 | 8 | 9 | 10 | def train_net(annotation_file, model_store_path, 11 | end_epoch=16, frequent=200, lr=0.01, batch_size=128, use_cuda=False): 12 | 13 | imagedb = ImageDB(annotation_file) 14 | gt_imdb = imagedb.load_imdb() 15 | gt_imdb = imagedb.append_flipped_images(gt_imdb) 16 | 17 | train_pnet(model_store_path=model_store_path, end_epoch=end_epoch, imdb=gt_imdb, batch_size=batch_size, frequent=frequent, base_lr=lr, use_cuda=use_cuda) 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser(description='Train PNet', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | 24 | parser.add_argument('--anno_file', dest='annotation_file', 25 | default=os.path.join(config.ANNO_STORE_DIR,config.PNET_TRAIN_IMGLIST_FILENAME), help='training data annotation file', type=str) 26 | parser.add_argument('--model_path', dest='model_store_path', help='training model store directory', 27 | default=config.MODEL_STORE_DIR, type=str) 28 | parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training', 29 | default=config.END_EPOCH, type=int) 30 | parser.add_argument('--frequent', dest='frequent', help='frequency of logging', 31 | default=200, type=int) 32 | parser.add_argument('--lr', dest='lr', help='learning rate', 33 | default=config.TRAIN_LR, type=float) 34 | parser.add_argument('--batch_size', dest='batch_size', help='train batch size', 35 | default=config.TRAIN_BATCH_SIZE, type=int) 36 | parser.add_argument('--gpu', dest='use_cuda', help='train with gpu', 37 | default=config.USE_CUDA, type=bool) 38 | parser.add_argument('--prefix_path', dest='', help='training data annotation images prefix root path', type=str) 39 | 40 | args = parser.parse_args() 41 | return args 42 | 43 | if __name__ == '__main__': 44 | args = parse_args() 45 | print('train Pnet argument:') 46 | print(args) 47 | 48 | train_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path, 49 | end_epoch=args.end_epoch, frequent=args.frequent, lr=args.lr, batch_size=args.batch_size, use_cuda=args.use_cuda) 50 | -------------------------------------------------------------------------------- /dface/train_net/train_r_net.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | from dface.core.imagedb import ImageDB 4 | import dface.train_net.train as train 5 | import dface.config as config 6 | import os 7 | 8 | 9 | 10 | def train_net(annotation_file, model_store_path, 11 | end_epoch=16, frequent=200, lr=0.01, batch_size=128, use_cuda=False): 12 | 13 | imagedb = ImageDB(annotation_file) 14 | gt_imdb = imagedb.load_imdb() 15 | gt_imdb = imagedb.append_flipped_images(gt_imdb) 16 | 17 | train.train_rnet(model_store_path=model_store_path, end_epoch=end_epoch, imdb=gt_imdb, batch_size=batch_size, frequent=frequent, base_lr=lr, use_cuda=use_cuda) 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser(description='Train RNet', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | 23 | 24 | parser.add_argument('--anno_file', dest='annotation_file', 25 | default=os.path.join(config.ANNO_STORE_DIR,config.RNET_TRAIN_IMGLIST_FILENAME), help='training data annotation file', type=str) 26 | parser.add_argument('--model_path', dest='model_store_path', help='training model store directory', 27 | default=config.MODEL_STORE_DIR, type=str) 28 | parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training', 29 | default=config.END_EPOCH, type=int) 30 | parser.add_argument('--frequent', dest='frequent', help='frequency of logging', 31 | default=200, type=int) 32 | parser.add_argument('--lr', dest='lr', help='learning rate', 33 | default=config.TRAIN_LR, type=float) 34 | parser.add_argument('--batch_size', dest='batch_size', help='train batch size', 35 | default=config.TRAIN_BATCH_SIZE, type=int) 36 | parser.add_argument('--gpu', dest='use_cuda', help='train with gpu', 37 | default=config.USE_CUDA, type=bool) 38 | parser.add_argument('--prefix_path', dest='', help='training data annotation images prefix root path', type=str) 39 | 40 | args = parser.parse_args() 41 | return args 42 | 43 | if __name__ == '__main__': 44 | args = parse_args() 45 | print('train Rnet argument:') 46 | print(args) 47 | 48 | 49 | train_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path, 50 | end_epoch=args.end_epoch, frequent=args.frequent, lr=args.lr, batch_size=args.batch_size, use_cuda=args.use_cuda) 51 | -------------------------------------------------------------------------------- /environment-win64.yml: -------------------------------------------------------------------------------- 1 | name: ai_gpu 2 | channels: 3 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free 4 | - peterjc123 5 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ 6 | - defaults 7 | dependencies: 8 | - _ipyw_jlab_nb_ext_conf=0.1.0=py36he6757f0_0 9 | - alabaster=0.7.10=py36hcd07829_0 10 | - anaconda-client=1.6.5=py36hd36550c_0 11 | - anaconda-navigator=1.6.10=py36h51c3d4f_0 12 | - anaconda-project=0.8.0=py36h8b3bf89_0 13 | - asn1crypto=0.22.0=py36h8e79faa_1 14 | - astroid=1.5.3=py36h9d85297_0 15 | - astropy=2.0.2=py36h06391c4_4 16 | - babel=2.5.0=py36h35444c1_0 17 | - backports=1.0=py36h81696a8_1 18 | - backports.shutil_get_terminal_size=1.0.0=py36h79ab834_2 19 | - beautifulsoup4=4.6.0=py36hd4cc5e8_1 20 | - bitarray=0.8.1=py36h6af124b_0 21 | - bkcharts=0.2=py36h7e685f7_0 22 | - blaze=0.11.3=py36h8a29ca5_0 23 | - bleach=2.0.0=py36h0a7e3d6_0 24 | - bokeh=0.12.10=py36h0be3b39_0 25 | - boto=2.48.0=py36h1a776d2_1 26 | - bottleneck=1.2.1=py36hd119dfa_0 27 | - bzip2=1.0.6=vc14hdec8e7a_1 28 | - ca-certificates=2017.08.26=h94faf87_0 29 | - cachecontrol=0.12.3=py36hfe50d7b_0 30 | - certifi=2017.7.27.1=py36h043bc9e_0 31 | - chardet=3.0.4=py36h420ce6e_1 32 | - click=6.7=py36hec8c647_0 33 | - cloudpickle=0.4.0=py36h639d8dc_0 34 | - clyent=1.2.2=py36hb10d595_1 35 | - colorama=0.3.9=py36h029ae33_0 36 | - comtypes=1.1.2=py36heb9b3d1_0 37 | - conda-verify=2.0.0=py36h065de53_0 38 | - console_shortcut=0.1.1=h6bb2dd7_3 39 | - contextlib2=0.5.5=py36he5d52c0_0 40 | - cryptography=2.0.3=py36h123decb_1 41 | - curl=7.55.1=vc14hdaba4a4_3 42 | - cycler=0.10.0=py36h009560c_0 43 | - cython=0.26.1=py36h18049ac_0 44 | - cytoolz=0.8.2=py36h547e66e_0 45 | - dask=0.15.3=py36h396fcb9_0 46 | - dask-core=0.15.3=py36hd651449_0 47 | - datashape=0.5.4=py36h5770b85_0 48 | - decorator=4.1.2=py36he63a57b_0 49 | - distlib=0.2.5=py36h51371be_0 50 | - distributed=1.19.1=py36h8504682_0 51 | - docutils=0.14=py36h6012d8f_0 52 | - entrypoints=0.2.3=py36hfd66bb0_2 53 | - et_xmlfile=1.0.1=py36h3d2d736_0 54 | - fastcache=1.0.2=py36hffdae1b_0 55 | - filelock=2.0.12=py36hd7ddd41_0 56 | - flask=0.12.2=py36h98b5e8f_0 57 | - flask-cors=3.0.3=py36h8a3855d_0 58 | - freetype=2.8=vc14h17c9bdf_0 59 | - get_terminal_size=1.0.0=h38e98db_0 60 | - gevent=1.2.2=py36h342a76c_0 61 | - glob2=0.5=py36h11cc1bd_1 62 | - greenlet=0.4.12=py36ha00ad21_0 63 | - h5py=2.7.0=py36hfbe0a52_1 64 | - hdf5=1.10.1=vc14hb361328_0 65 | - heapdict=1.0.0=py36h21fa5f4_0 66 | - html5lib=0.999999999=py36ha09b1f3_0 67 | - icc_rt=2017.0.4=h97af966_0 68 | - icu=58.2=vc14hc45fdbb_0 69 | - idna=2.6=py36h148d497_1 70 | - imageio=2.2.0=py36had6c2d2_0 71 | - imagesize=0.7.1=py36he29f638_0 72 | - intel-openmp=2018.0.0=hcd89f80_7 73 | - ipykernel=4.6.1=py36hbb77b34_0 74 | - ipython=6.1.0=py36h236ecc8_1 75 | - ipython_genutils=0.2.0=py36h3c5d0ee_0 76 | - ipywidgets=7.0.0=py36h2e74ada_0 77 | - isort=4.2.15=py36h6198cc5_0 78 | - itsdangerous=0.24=py36hb6c5a24_1 79 | - jdcal=1.3=py36h64a5255_0 80 | - jedi=0.10.2=py36hed927a0_0 81 | - jinja2=2.9.6=py36h10aa3a0_1 82 | - jpeg=9b=vc14h4d7706e_1 83 | - jsonschema=2.6.0=py36h7636477_0 84 | - jupyter=1.0.0=py36h422fd7e_2 85 | - jupyter_client=5.1.0=py36h9902a9a_0 86 | - jupyter_console=5.2.0=py36h6d89b47_1 87 | - jupyter_core=4.3.0=py36h511e818_0 88 | - jupyterlab=0.27.0=py36h34cc53b_2 89 | - jupyterlab_launcher=0.4.0=py36h22c3ccf_0 90 | - lazy-object-proxy=1.3.1=py36hd1c21d2_0 91 | - libiconv=1.15=vc14h29686d3_5 92 | - libpng=1.6.32=vc14h5163883_3 93 | - libssh2=1.8.0=vc14hcf584a9_2 94 | - libtiff=4.0.9=h0f13578_0 95 | - libxml2=2.9.4=vc14h8fd0f11_5 96 | - libxslt=1.1.29=vc14hf85b8d4_5 97 | - llvmlite=0.20.0=py36_0 98 | - locket=0.2.0=py36hfed976d_1 99 | - lockfile=0.12.2=py36h0468280_0 100 | - lxml=4.1.0=py36h0dcd83c_0 101 | - lzo=2.10=vc14h0a64fa6_1 102 | - markupsafe=1.0=py36h0e26971_1 103 | - matplotlib=2.1.0=py36h11b4b9c_0 104 | - mccabe=0.6.1=py36hb41005a_1 105 | - menuinst=1.4.10=py36h42196fb_0 106 | - mistune=0.7.4=py36h4874169_0 107 | - mpmath=0.19=py36he326802_2 108 | - msgpack-python=0.4.8=py36h58b1e9d_0 109 | - multipledispatch=0.4.9=py36he44c36e_0 110 | - navigator-updater=0.1.0=py36h8a7b86b_0 111 | - nbconvert=5.3.1=py36h8dc0fde_0 112 | - nbformat=4.4.0=py36h3a5bc1b_0 113 | - networkx=2.0=py36hff991e3_0 114 | - nltk=3.2.4=py36hd0e0a39_0 115 | - nose=1.3.7=py36h1c3779e_2 116 | - notebook=5.0.0=py36hd9fbf6f_2 117 | - numba=0.35.0=np113py36_10 118 | - numpydoc=0.7.0=py36ha25429e_0 119 | - odo=0.5.1=py36h7560279_0 120 | - olefile=0.44=py36h0a7bdd2_0 121 | - opencv=3.3.1=py36h20b85fd_1 122 | - openpyxl=2.4.8=py36hf3b77f6_1 123 | - openssl=1.0.2l=vc14hcac20b0_2 124 | - packaging=16.8=py36ha0986f6_1 125 | - pandas=0.20.3=py36hce827b7_2 126 | - pandoc=1.19.2.1=hb2460c7_1 127 | - pandocfilters=1.4.2=py36h3ef6317_1 128 | - partd=0.3.8=py36hc8e763b_0 129 | - path.py=10.3.1=py36h3dd8b46_0 130 | - pathlib2=2.3.0=py36h7bfb78b_0 131 | - patsy=0.4.1=py36h42cefec_0 132 | - pep8=1.7.0=py36h0f3d67a_0 133 | - pickleshare=0.7.4=py36h9de030f_0 134 | - pillow=4.2.1=py36hdb25ab2_0 135 | - pip=9.0.1=py36hadba87b_3 136 | - pkginfo=1.4.1=py36hb0f9cfa_1 137 | - ply=3.10=py36h1211beb_0 138 | - progress=1.3=py36hbeca8d3_0 139 | - prompt_toolkit=1.0.15=py36h60b8f86_0 140 | - psutil=5.4.0=py36h4e662fb_0 141 | - py=1.4.34=py36ha4aca3a_1 142 | - pycodestyle=2.3.1=py36h7cc55cd_0 143 | - pycosat=0.6.2=py36hf17546d_1 144 | - pycparser=2.18=py36hd053e01_1 145 | - pycrypto=2.6.1=py36he68e6e2_1 146 | - pycurl=7.43.0=py36h086bf4c_3 147 | - pyflakes=1.6.0=py36h0b975d6_0 148 | - pygments=2.2.0=py36hb010967_0 149 | - pylint=1.7.4=py36ha4e6ded_0 150 | - pyodbc=4.0.17=py36h0006bc2_0 151 | - pyopenssl=17.2.0=py36h15ca2fc_0 152 | - pyparsing=2.2.0=py36h785a196_1 153 | - pyqt=5.6.0=py36hb5ed885_5 154 | - pysocks=1.6.7=py36h698d350_1 155 | - pytables=3.4.2=py36h71138e3_2 156 | - pytest=3.2.1=py36h753b05e_1 157 | - python=3.6.3=h9e2ca53_1 158 | - python-dateutil=2.6.1=py36h509ddcb_1 159 | - pytz=2017.2=py36h05d413f_1 160 | - pywavelets=0.5.2=py36hc649158_0 161 | - pywin32=221=py36h9c10281_0 162 | - pyyaml=3.12=py36h1d1928f_1 163 | - pyzmq=16.0.2=py36h38c27d9_2 164 | - qt=5.6.2=vc14h6f8c307_12 165 | - qtawesome=0.4.4=py36h5aa48f6_0 166 | - qtconsole=4.3.1=py36h99a29a9_0 167 | - qtpy=1.3.1=py36hb8717c5_0 168 | - requests=2.18.4=py36h4371aae_1 169 | - rope=0.10.5=py36hcaf5641_0 170 | - ruamel_yaml=0.11.14=py36h9b16331_2 171 | - scikit-image=0.13.0=py36h6dffa3f_1 172 | - seaborn=0.8.0=py36h62cb67c_0 173 | - setuptools=36.5.0=py36h65f9e6e_0 174 | - simplegeneric=0.8.1=py36heab741f_0 175 | - singledispatch=3.4.0.3=py36h17d0c80_0 176 | - sip=4.18.1=py36h9c25514_2 177 | - six=1.11.0=py36h4db2310_1 178 | - snowballstemmer=1.2.1=py36h763602f_0 179 | - sortedcollections=0.5.3=py36hbefa0ab_0 180 | - sortedcontainers=1.5.7=py36ha90ac20_0 181 | - sphinx=1.6.3=py36h9bb690b_0 182 | - sphinxcontrib=1.0=py36hbbac3d2_1 183 | - sphinxcontrib-websupport=1.0.1=py36hb5e5916_1 184 | - spyder=3.2.4=py36h8845eaa_0 185 | - sqlalchemy=1.1.13=py36h5948d12_0 186 | - sqlite=3.20.1=vc14h7ce8c62_1 187 | - statsmodels=0.8.0=py36h6189b4c_0 188 | - sympy=1.1.1=py36h96708e0_0 189 | - tblib=1.3.2=py36h30f5020_0 190 | - testpath=0.3.1=py36h2698cfe_0 191 | - tk=8.6.7=vc14hb68737d_1 192 | - toolz=0.8.2=py36he152a52_0 193 | - tornado=4.5.2=py36h57f6048_0 194 | - traitlets=4.3.2=py36h096827d_0 195 | - typing=3.6.2=py36hb035bda_0 196 | - unicodecsv=0.14.1=py36h6450c06_0 197 | - urllib3=1.22=py36h276f60a_0 198 | - vs2015_runtime=14.0.25123=hd4c4e62_2 199 | - wcwidth=0.1.7=py36h3d5aa90_0 200 | - webencodings=0.5.1=py36h67c50ae_1 201 | - werkzeug=0.12.2=py36h866a736_0 202 | - wheel=0.29.0=py36h6ce6cde_1 203 | - widgetsnbextension=3.0.2=py36h364476f_1 204 | - win_inet_pton=1.0.1=py36he67d7fd_1 205 | - win_unicode_console=0.5=py36hcdbd4b5_0 206 | - wincertstore=0.2=py36h7fe50ca_0 207 | - wrapt=1.10.11=py36he5f5981_0 208 | - xlrd=1.1.0=py36h1cb58dc_1 209 | - xlsxwriter=1.0.2=py36hf723b7d_0 210 | - xlwings=0.11.4=py36hd3cf94d_0 211 | - xlwt=1.3.0=py36h1a4751e_0 212 | - yaml=0.1.7=vc14hb31d195_1 213 | - zict=0.1.3=py36h2d8e73e_0 214 | - zlib=1.2.11=vc14h1cdd9ab_1 215 | - anaconda=custom=py36_0 216 | - cffi=1.10.0=py36_0 217 | - mkl=2017.0.3=0 218 | - mkl-service=1.1.2=py36_3 219 | - numexpr=2.6.2=np113py36_0 220 | - numpy=1.13.1=py36_0 221 | - scikit-learn=0.19.0=np113py36_0 222 | - scipy=0.19.1=np113py36_0 223 | - cuda80=1.0=h205658b_0 224 | - pytorch=0.3.0=py36_0.3.0cu80 225 | - vc=14.1=h21ff451_0 226 | - vs2017_runtime=15.4.27004.2010=0 227 | - pip: 228 | - backports.shutil-get-terminal-size==1.0.0 229 | - et-xmlfile==1.0.1 230 | - ipython-genutils==0.2.0 231 | - jupyter-client==5.1.0 232 | - jupyter-console==5.2.0 233 | - jupyter-core==4.3.0 234 | - jupyterlab-launcher==0.4.0 235 | - prompt-toolkit==1.0.15 236 | - ruamel-yaml==0.11.14 237 | - tables==3.4.2 238 | - torch==0.3.0b0+591e73e 239 | - torchvision==0.2.0 240 | - win-inet-pton==1.0.1 241 | - win-unicode-console==0.5 242 | prefix: D:\Tools\Anaconda3\envs\ai_gpu 243 | 244 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: pytorch 2 | channels: 3 | - soumith 4 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free 5 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ 6 | - defaults 7 | dependencies: 8 | - cairo=1.14.8=0 9 | - certifi=2016.2.28=py27_0 10 | - cffi=1.10.0=py27_0 11 | - fontconfig=2.12.1=3 12 | - freetype=2.5.5=2 13 | - glib=2.50.2=1 14 | - harfbuzz=0.9.39=2 15 | - hdf5=1.8.17=2 16 | - jbig=2.1=0 17 | - jpeg=8d=2 18 | - libffi=3.2.1=1 19 | - libgcc=5.2.0=0 20 | - libiconv=1.14=0 21 | - libpng=1.6.30=1 22 | - libtiff=4.0.6=2 23 | - libxml2=2.9.4=0 24 | - mkl=2017.0.3=0 25 | - numpy=1.12.1=py27_0 26 | - olefile=0.44=py27_0 27 | - opencv=3.1.0=np112py27_1 28 | - openssl=1.0.2l=0 29 | - pcre=8.39=1 30 | - pillow=3.4.2=py27_0 31 | - pip=9.0.1=py27_1 32 | - pixman=0.34.0=0 33 | - pycparser=2.18=py27_0 34 | - python=2.7.13=0 35 | - readline=6.2=2 36 | - setuptools=36.4.0=py27_1 37 | - six=1.10.0=py27_0 38 | - sqlite=3.13.0=0 39 | - tk=8.5.18=0 40 | - wheel=0.29.0=py27_0 41 | - xz=5.2.3=0 42 | - zlib=1.2.11=0 43 | - cycler=0.10.0=py27_0 44 | - dbus=1.10.20=0 45 | - expat=2.1.0=0 46 | - functools32=3.2.3.2=py27_0 47 | - gst-plugins-base=1.8.0=0 48 | - gstreamer=1.8.0=0 49 | - icu=54.1=0 50 | - libxcb=1.12=1 51 | - matplotlib=2.0.2=np112py27_0 52 | - pycairo=1.10.0=py27_0 53 | - pyparsing=2.2.0=py27_0 54 | - pyqt=5.6.0=py27_2 55 | - python-dateutil=2.6.1=py27_0 56 | - pytz=2017.2=py27_0 57 | - qt=5.6.2=2 58 | - sip=4.18=py27_0 59 | - subprocess32=3.2.7=py27_0 60 | - cuda80=1.0=0 61 | - pytorch=0.2.0=py27hc03bea1_4cu80 62 | - torchvision=0.1.9=py27hdb88a65_1 63 | - pip: 64 | - torch==0.2.0.post4 65 | prefix: /home/asy/.conda/envs/pytorch 66 | 67 | -------------------------------------------------------------------------------- /environment_osx.yaml: -------------------------------------------------------------------------------- 1 | name: dface 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - backports=1.0=py27hb4f9756_1 7 | - backports.functools_lru_cache=1.4=py27h2aca819_1 8 | - backports_abc=0.5=py27h6972548_0 9 | - bzip2=1.0.6=h649919c_2 10 | - ca-certificates=2017.08.26=ha1e5d58_0 11 | - cairo=1.14.10=h913ea44_6 12 | - certifi=2017.11.5=py27hfa9a1c4_0 13 | - cffi=1.11.4=py27h342bebf_0 14 | - cycler=0.10.0=py27hfc73c78_0 15 | - ffmpeg=3.4=h766ddd1_0 16 | - fontconfig=2.12.4=hffb9db1_2 17 | - freetype=2.8=h12048fb_1 18 | - functools32=3.2.3.2=py27h8ceab06_1 19 | - gettext=0.19.8.1=h15daf44_3 20 | - glib=2.53.6=h33f6a65_2 21 | - graphite2=1.3.10=h233cf8b_0 22 | - harfbuzz=1.5.0=h6db888e_0 23 | - hdf5=1.10.1=ha036c08_1 24 | - icu=58.2=h4b95b61_1 25 | - intel-openmp=2018.0.0=h8158457_8 26 | - jasper=1.900.1=h1f36771_4 27 | - jpeg=9b=he5867d9_2 28 | - libcxx=4.0.1=h579ed51_0 29 | - libcxxabi=4.0.1=hebd6815_0 30 | - libedit=3.1=hb4e282d_0 31 | - libffi=3.2.1=h475c297_4 32 | - libgfortran=3.0.1=h93005f0_2 33 | - libiconv=1.15=hdd342a3_7 34 | - libopus=1.2.1=h169cedb_0 35 | - libpng=1.6.32=hd1e8b91_4 36 | - libprotobuf=3.4.1=h326466f_0 37 | - libtiff=4.0.9=h0dac147_0 38 | - libvpx=1.6.1=h057a404_0 39 | - libxml2=2.9.4=hf05c021_6 40 | - matplotlib=2.1.1=py27hb768455_0 41 | - mkl=2018.0.1=hfbd8650_4 42 | - ncurses=6.0=hd04f020_2 43 | - numpy=1.14.0=py27h8a80b8c_0 44 | - olefile=0.44=py27h73ba740_0 45 | - opencv=3.3.1=py27h60a5f38_1 46 | - openssl=1.0.2n=hdbc3d79_0 47 | - pcre=8.41=hfb6ab37_1 48 | - pillow=5.0.0=py27hfcce615_0 49 | - pip=9.0.1=py27h1567d89_4 50 | - pixman=0.34.0=hca0a616_3 51 | - pycparser=2.18=py27h0d28d88_1 52 | - pyparsing=2.2.0=py27h5bb6aaf_0 53 | - python=2.7.14=hde5916a_29 54 | - python-dateutil=2.6.1=py27hd56c96b_1 55 | - pytz=2017.3=py27h803c07a_0 56 | - readline=7.0=hc1231fa_4 57 | - setuptools=38.4.0=py27_0 58 | - singledispatch=3.4.0.3=py27he22c18d_0 59 | - six=1.11.0=py27h7252ba3_1 60 | - sqlite=3.20.1=h7e4c145_2 61 | - ssl_match_hostname=3.5.0.1=py27h8780752_2 62 | - subprocess32=3.2.7=py27h24b2887_0 63 | - tk=8.6.7=h35a86e2_3 64 | - tornado=4.5.3=py27_0 65 | - wheel=0.30.0=py27h677a027_1 66 | - xz=5.2.3=h0278029_2 67 | - zlib=1.2.11=hf3cbc9b_2 68 | - pytorch=0.3.0=py27_cuda0.0_cudnn0.0he480db7_4 69 | - torchvision=0.2.0=py27hfc0307a_1 70 | - pip: 71 | - backports-abc==0.5 72 | - backports.functools-lru-cache==1.4 73 | - backports.ssl-match-hostname==3.5.0.1 74 | - torch==0.3.0.post4 75 | prefix: /Users/hfu/anaconda2/envs/dface 76 | 77 | -------------------------------------------------------------------------------- /log/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/log/__init__.py -------------------------------------------------------------------------------- /log/info: -------------------------------------------------------------------------------- 1 | log dir -------------------------------------------------------------------------------- /model_store/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/model_store/__init__.py -------------------------------------------------------------------------------- /model_store/info: -------------------------------------------------------------------------------- 1 | This directory store trained model net parameters and structure -------------------------------------------------------------------------------- /model_store/onet_epoch.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/model_store/onet_epoch.pt -------------------------------------------------------------------------------- /model_store/pnet_epoch.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/model_store/pnet_epoch.pt -------------------------------------------------------------------------------- /model_store/rnet_epoch.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/model_store/rnet_epoch.pt -------------------------------------------------------------------------------- /test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/test.jpg -------------------------------------------------------------------------------- /test_image.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from dface.core.detect import create_mtcnn_net, MtcnnDetector 3 | import dface.core.vision as vision 4 | 5 | 6 | 7 | 8 | if __name__ == '__main__': 9 | 10 | pnet, rnet, onet = create_mtcnn_net(p_model_path="./model_store/pnet_epoch.pt", r_model_path="./model_store/rnet_epoch.pt", o_model_path="./model_store/onet_epoch.pt", use_cuda=False) 11 | mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24) 12 | 13 | img = cv2.imread("./test.jpg") 14 | img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 15 | #b, g, r = cv2.split(img) 16 | #img2 = cv2.merge([r, g, b]) 17 | 18 | bboxs, landmarks = mtcnn_detector.detect_face(img) 19 | # print box_align 20 | 21 | vision.vis_face(img_bg,bboxs,landmarks) 22 | --------------------------------------------------------------------------------