├── .gitignore
├── LICENSE
├── README.md
├── README_zh.md
├── __init__.py
├── anno_store
├── __init__.py
├── info
└── wider_origin_anno.txt
├── dface
├── __init__.py
├── config.py
├── core
│ ├── __init__.py
│ ├── detect.py
│ ├── image_reader.py
│ ├── image_tools.py
│ ├── imagedb.py
│ ├── models.py
│ ├── nms.py
│ ├── resnet_inception_v2.py
│ ├── roc.py
│ ├── utils.py
│ └── vision.py
├── prepare_data
│ ├── __init__.py
│ ├── assemble.py
│ ├── assemble_onet_imglist.py
│ ├── assemble_pnet_imglist.py
│ ├── assemble_rnet_imglist.py
│ ├── gen_Onet_train_data.py
│ ├── gen_Pnet_train_data.py
│ ├── gen_Rnet_train_data.py
│ ├── gen_landmark_12.py
│ ├── gen_landmark_24.py
│ ├── gen_landmark_48.py
│ ├── gen_landmark_net_48.py
│ └── widerface_annotation_gen
│ │ ├── __init__.py
│ │ ├── transform.py
│ │ ├── wider_face_train.mat
│ │ └── wider_loader.py
└── train_net
│ ├── __init__.py
│ ├── train.py
│ ├── train_o_net.py
│ ├── train_p_net.py
│ └── train_r_net.py
├── environment-win64.yml
├── environment.yml
├── environment_osx.yaml
├── log
├── __init__.py
└── info
├── model_store
├── __init__.py
├── info
├── onet_epoch.pt
├── pnet_epoch.pt
└── rnet_epoch.pt
├── test.jpg
└── test_image.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *.pyc
3 | log/*.log
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2017- Jin Kuaikuai(314127900@qq.com)
2 | All rights reserved.
3 | Apache License
4 | Version 2.0, January 2004
5 | http://www.apache.org/licenses/
6 |
7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8 |
9 | 1. Definitions.
10 |
11 | "License" shall mean the terms and conditions for use, reproduction,
12 | and distribution as defined by Sections 1 through 9 of this document.
13 |
14 | "Licensor" shall mean the copyright owner or entity authorized by
15 | the copyright owner that is granting the License.
16 |
17 | "Legal Entity" shall mean the union of the acting entity and all
18 | other entities that control, are controlled by, or are under common
19 | control with that entity. For the purposes of this definition,
20 | "control" means (i) the power, direct or indirect, to cause the
21 | direction or management of such entity, whether by contract or
22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
23 | outstanding shares, or (iii) beneficial ownership of such entity.
24 |
25 | "You" (or "Your") shall mean an individual or Legal Entity
26 | exercising permissions granted by this License.
27 |
28 | "Source" form shall mean the preferred form for making modifications,
29 | including but not limited to software source code, documentation
30 | source, and configuration files.
31 |
32 | "Object" form shall mean any form resulting from mechanical
33 | transformation or translation of a Source form, including but
34 | not limited to compiled object code, generated documentation,
35 | and conversions to other media types.
36 |
37 | "Work" shall mean the work of authorship, whether in Source or
38 | Object form, made available under the License, as indicated by a
39 | copyright notice that is included in or attached to the work
40 | (an example is provided in the Appendix below).
41 |
42 | "Derivative Works" shall mean any work, whether in Source or Object
43 | form, that is based on (or derived from) the Work and for which the
44 | editorial revisions, annotations, elaborations, or other modifications
45 | represent, as a whole, an original work of authorship. For the purposes
46 | of this License, Derivative Works shall not include works that remain
47 | separable from, or merely link (or bind by name) to the interfaces of,
48 | the Work and Derivative Works thereof.
49 |
50 | "Contribution" shall mean any work of authorship, including
51 | the original version of the Work and any modifications or additions
52 | to that Work or Derivative Works thereof, that is intentionally
53 | submitted to Licensor for inclusion in the Work by the copyright owner
54 | or by an individual or Legal Entity authorized to submit on behalf of
55 | the copyright owner. For the purposes of this definition, "submitted"
56 | means any form of electronic, verbal, or written communication sent
57 | to the Licensor or its representatives, including but not limited to
58 | communication on electronic mailing lists, source code control systems,
59 | and issue tracking systems that are managed by, or on behalf of, the
60 | Licensor for the purpose of discussing and improving the Work, but
61 | excluding communication that is conspicuously marked or otherwise
62 | designated in writing by the copyright owner as "Not a Contribution."
63 |
64 | "Contributor" shall mean Licensor and any individual or Legal Entity
65 | on behalf of whom a Contribution has been received by Licensor and
66 | subsequently incorporated within the Work.
67 |
68 | 2. Grant of Copyright License. Subject to the terms and conditions of
69 | this License, each Contributor hereby grants to You a perpetual,
70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71 | copyright license to reproduce, prepare Derivative Works of,
72 | publicly display, publicly perform, sublicense, and distribute the
73 | Work and such Derivative Works in Source or Object form.
74 |
75 | 3. Grant of Patent License. Subject to the terms and conditions of
76 | this License, each Contributor hereby grants to You a perpetual,
77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78 | (except as stated in this section) patent license to make, have made,
79 | use, offer to sell, sell, import, and otherwise transfer the Work,
80 | where such license applies only to those patent claims licensable
81 | by such Contributor that are necessarily infringed by their
82 | Contribution(s) alone or by combination of their Contribution(s)
83 | with the Work to which such Contribution(s) was submitted. If You
84 | institute patent litigation against any entity (including a
85 | cross-claim or counterclaim in a lawsuit) alleging that the Work
86 | or a Contribution incorporated within the Work constitutes direct
87 | or contributory patent infringement, then any patent licenses
88 | granted to You under this License for that Work shall terminate
89 | as of the date such litigation is filed.
90 |
91 | 4. Redistribution. You may reproduce and distribute copies of the
92 | Work or Derivative Works thereof in any medium, with or without
93 | modifications, and in Source or Object form, provided that You
94 | meet the following conditions:
95 |
96 | (a) You must give any other recipients of the Work or
97 | Derivative Works a copy of this License; and
98 |
99 | (b) You must cause any modified files to carry prominent notices
100 | stating that You changed the files; and
101 |
102 | (c) You must retain, in the Source form of any Derivative Works
103 | that You distribute, all copyright, patent, trademark, and
104 | attribution notices from the Source form of the Work,
105 | excluding those notices that do not pertain to any part of
106 | the Derivative Works; and
107 |
108 | (d) If the Work includes a "NOTICE" text file as part of its
109 | distribution, then any Derivative Works that You distribute must
110 | include a readable copy of the attribution notices contained
111 | within such NOTICE file, excluding those notices that do not
112 | pertain to any part of the Derivative Works, in at least one
113 | of the following places: within a NOTICE text file distributed
114 | as part of the Derivative Works; within the Source form or
115 | documentation, if provided along with the Derivative Works; or,
116 | within a display generated by the Derivative Works, if and
117 | wherever such third-party notices normally appear. The contents
118 | of the NOTICE file are for informational purposes only and
119 | do not modify the License. You may add Your own attribution
120 | notices within Derivative Works that You distribute, alongside
121 | or as an addendum to the NOTICE text from the Work, provided
122 | that such additional attribution notices cannot be construed
123 | as modifying the License.
124 |
125 | You may add Your own copyright statement to Your modifications and
126 | may provide additional or different license terms and conditions
127 | for use, reproduction, or distribution of Your modifications, or
128 | for any such Derivative Works as a whole, provided Your use,
129 | reproduction, and distribution of the Work otherwise complies with
130 | the conditions stated in this License.
131 |
132 | 5. Submission of Contributions. Unless You explicitly state otherwise,
133 | any Contribution intentionally submitted for inclusion in the Work
134 | by You to the Licensor shall be under the terms and conditions of
135 | this License, without any additional terms or conditions.
136 | Notwithstanding the above, nothing herein shall supersede or modify
137 | the terms of any separate license agreement you may have executed
138 | with Licensor regarding such Contributions.
139 |
140 | 6. Trademarks. This License does not grant permission to use the trade
141 | names, trademarks, service marks, or product names of the Licensor,
142 | except as required for reasonable and customary use in describing the
143 | origin of the Work and reproducing the content of the NOTICE file.
144 |
145 | 7. Disclaimer of Warranty. Unless required by applicable law or
146 | agreed to in writing, Licensor provides the Work (and each
147 | Contributor provides its Contributions) on an "AS IS" BASIS,
148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 | implied, including, without limitation, any warranties or conditions
150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 | PARTICULAR PURPOSE. You are solely responsible for determining the
152 | appropriateness of using or redistributing the Work and assume any
153 | risks associated with Your exercise of permissions under this License.
154 |
155 | 8. Limitation of Liability. In no event and under no legal theory,
156 | whether in tort (including negligence), contract, or otherwise,
157 | unless required by applicable law (such as deliberate and grossly
158 | negligent acts) or agreed to in writing, shall any Contributor be
159 | liable to You for damages, including any direct, indirect, special,
160 | incidental, or consequential damages of any character arising as a
161 | result of this License or out of the use or inability to use the
162 | Work (including but not limited to damages for loss of goodwill,
163 | work stoppage, computer failure or malfunction, or any and all
164 | other commercial damages or losses), even if such Contributor
165 | has been advised of the possibility of such damages.
166 |
167 | 9. Accepting Warranty or Additional Liability. While redistributing
168 | the Work or Derivative Works thereof, You may choose to offer,
169 | and charge a fee for, acceptance of support, warranty, indemnity,
170 | or other liability obligations and/or rights consistent with this
171 | License. However, in accepting such obligations, You may act only
172 | on Your own behalf and on Your sole responsibility, not on behalf
173 | of any other Contributor, and only if You agree to indemnify,
174 | defend, and hold each Contributor harmless for any liability
175 | incurred by, or claims asserted against, such Contributor by reason
176 | of your accepting any such warranty or additional liability.
177 |
178 | END OF TERMS AND CONDITIONS
179 |
180 | APPENDIX: How to apply the Apache License to your work.
181 |
182 | To apply the Apache License to your work, attach the following
183 | boilerplate notice, with the fields enclosed by brackets "{}"
184 | replaced with your own identifying information. (Don't include
185 | the brackets!) The text should be enclosed in the appropriate
186 | comment syntax for the file format. We also recommend that a
187 | file or class name and description of purpose be included on the
188 | same "printed page" as the copyright notice for easier
189 | identification within third-party archives.
190 |
191 | Copyright Jin Kuaikuai
192 |
193 | Licensed under the Apache License, Version 2.0 (the "License");
194 | you may not use this file except in compliance with the License.
195 | You may obtain a copy of the License at
196 |
197 | http://www.apache.org/licenses/LICENSE-2.0
198 |
199 | Unless required by applicable law or agreed to in writing, software
200 | distributed under the License is distributed on an "AS IS" BASIS,
201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 | See the License for the specific language governing permissions and
203 | limitations under the License.
204 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 |
5 | -----------------
6 | # Dface • [](https://opensource.org/licenses/Apache-2.0)
7 |
8 |
9 | | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** |
10 | |-----------------|---------------------|------------------|-------------------|
11 | | [](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg) | [](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg) | [](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg) | [](http://dftech.oss-cn-hangzhou.aliyuncs.com/opendface/img/build_pass.svg) |
12 |
13 |
14 | **Free and open source face detection. Based on the MTCNN**
15 |
16 | [Official Website(https://dface.tech)](https://dface.tech)
17 |
18 | **We also provide fully face recognize SDK, Contains tracking, detection, face recognition, face anti-spoofing and so on. See [dface.tech](https://dface.tech) for details.**
19 | 
20 |
21 |
22 | **Dface** is an open source software for face detection and recognition. All features implemented by the **[pytorch](https://github.com/pytorch/pytorch)** (the facebook deeplearning framework). With PyTorch, we use a technique called reverse-mode auto-differentiation, which allows developer to change the way your network behaves arbitrarily with zero lag or overhead.
23 | DFace inherit these advanced characteristic, that make it dynamic and ease code review.
24 |
25 | DFace support GPU acceleration with NVIDIA cuda. We highly recommend you use the linux GPU version.It's very fast and extremely realtime.
26 |
27 | Our inspiration comes from several research papers on this topic, as well as current and past work such as [Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878) and face recognition topic [FaceNet: A Unified Embedding for Face Recognition and Clustering](https://arxiv.org/abs/1503.03832)
28 |
29 | **MTCNN Structure**
30 |
31 | 
32 | 
33 | 
34 |
35 | **If you want to contribute to DFace, please review the CONTRIBUTING.md in the project.We use [Slack](https://dfaceio.slack.com/) for tracking requests and bugs. Also you can following the QQ group 681403076 or my wechat jinkuaikuai005**
36 |
37 |
38 | ## TODO(contribute to DFace)
39 | - Based on cener loss or triplet loss implement the face conpare. Recommended Model is ResNet inception v2. Refer this [Paper](https://arxiv.org/abs/1503.03832) and [FaceNet](https://github.com/davidsandberg/facenet)
40 | - Face Anti-Spoofing, distinguish from face light and texture。Recomend with the LBP algorithm and SVM.
41 | - 3D mask Anti-Spoofing.
42 | - Mobile first with caffe2 and c++.
43 | - Tensor rt migration.
44 | - Docker support, gpu version
45 |
46 | ## Installation
47 |
48 | DFace has two major module, detection and recognition.In these two, We provide all tutorials about how to train a model and running.
49 | First setting a pytorch and cv2. We suggest Anaconda to make a virtual and independent python envirment.**If you want to train on GPU,please install Nvidia cuda and cudnn.**
50 |
51 | ### Requirements
52 | * cuda 8.0
53 | * anaconda
54 | * pytorch
55 | * torchvision
56 | * cv2
57 | * matplotlib
58 |
59 |
60 | ```shell
61 | git clone https://github.com/kuaikuaikim/dface.git
62 | ```
63 |
64 |
65 | Also we provide a anaconda environment dependency list called environment.yml (windows please use environment-win64.yml,Mac environment_osx.yaml) in the root path.
66 | You can create your DFace environment very easily.
67 | ```shell
68 | cd DFace
69 |
70 | conda env create -f path/to/environment.yml
71 | ```
72 |
73 | Add Dface to your local python path
74 |
75 | ```shell
76 | export PYTHONPATH=$PYTHONPATH:{your local DFace root path}
77 | ```
78 |
79 |
80 | ### Face Detetion and Recognition
81 |
82 | If you are interested in how to train a mtcnn model, you can follow next step.
83 |
84 | #### Train mtcnn Model
85 | MTCNN have three networks called **PNet**, **RNet** and **ONet**.So we should train it on three stage, and each stage depend on previous network which will generate train data to feed current train net, also propel the minimum loss between two networks.
86 | Please download the train face **datasets** before your training. We use **[WIDER FACE](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/)** and **[CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html)** .WIDER FACE is used for training face classification and face bounding box, also CelebA is used for face landmarks. The original wider face annotation file is matlab format, you must transform it to text. I have put the transformed annotation text file into [anno_store/wider_origin_anno.txt](https://github.com/kuaikuaikim/DFace/blob/master/anno_store/wider_origin_anno.txt). This file is related to the following parameter called --anno_file.
87 |
88 |
89 | * Create the DFace train data temporary folder, this folder is involved in the following parameter --dface_traindata_store
90 |
91 | ```shell
92 | mkdir {your dface traindata folder}
93 | ```
94 |
95 |
96 | * Generate PNet Train data and annotation file
97 |
98 | ```shell
99 | python dface/prepare_data/gen_Pnet_train_data.py --prefix_path {annotation file image prefix path, just your local wider face images folder} --dface_traindata_store {dface train data temporary folder you made before } --anno_file {wider face original combined annotation file, default anno_store/wider_origin_anno.txt}
100 | ```
101 | * Assemble annotation file and shuffle it
102 |
103 | ```shell
104 | python dface/prepare_data/assemble_pnet_imglist.py
105 | ```
106 | * Train PNet model
107 |
108 | ```shell
109 | python dface/train_net/train_p_net.py
110 | ```
111 | * Generate RNet Train data and annotation file
112 |
113 | ```shell
114 | python dface/prepare_data/gen_Rnet_train_data.py --prefix_path {annotation file image prefix path, just your local wider face images folder} --dface_traindata_store {dface train data temporary folder you made before } --anno_file {wider face original combined annotation file, default anno_store/wider_origin_anno.txt} --pmodel_file {your PNet model file trained before}
115 | ```
116 | * Assemble annotation file and shuffle it
117 |
118 | ```shell
119 | python dface/prepare_data/assemble_rnet_imglist.py
120 | ```
121 | * Train RNet model
122 |
123 | ```shell
124 | python dface/train_net/train_r_net.py
125 | ```
126 | * Generate ONet Train data and annotation file
127 |
128 | ```shell
129 | python dface/prepare_data/gen_Onet_train_data.py --prefix_path {annotation file image prefix path, just your local wider face images folder} --dface_traindata_store {dface train data temporary folder you made before } --anno_file {wider face original combined annotation file, default anno_store/wider_origin_anno.txt} --pmodel_file {your PNet model file trained before} --rmodel_file {your RNet model file trained before}
130 | ```
131 | * Generate ONet Train landmarks data
132 |
133 | ```shell
134 | python dface/prepare_data/gen_landmark_48.py
135 | ```
136 | * Assemble annotation file and shuffle it
137 |
138 | ```shell
139 | python dface/prepare_data/assemble_onet_imglist.py
140 | ```
141 | * Train ONet model
142 |
143 | ```shell
144 | python dface/train_net/train_o_net.py
145 | ```
146 |
147 | #### Test face detection
148 | **If you don't want to train,i have put onet_epoch.pt,pnet_epoch.pt,rnet_epoch.pt in model_store folder.You just try test_image.py**
149 |
150 | ```shell
151 | python test_image.py
152 | ```
153 |
154 |
155 | ## Demo
156 |
157 | 
158 |
159 |
160 | ### QQ交流群
161 | 
162 |
163 |
164 | #### 681403076
165 |
166 | #### 本人微信(wechat)
167 | ##### cobbestne
168 |
169 |
170 | ## License
171 |
172 | [Apache License 2.0](LICENSE)
173 |
174 |
175 | ## Reference
176 |
177 | * [OpenFace](https://github.com/cmusatyalab/openface)
178 |
--------------------------------------------------------------------------------
/README_zh.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 |
5 | -----------------
6 | # DFace (Deeplearning Face)• [](https://opensource.org/licenses/Apache-2.0)
7 |
8 |
9 | | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** |
10 | |-----------------|---------------------|------------------|-------------------|
11 | | [](http://pic.dface.io/pass.svg) | [](http://pic.dface.io/pass.svg) | [](http://pic.dface.io/pass.svg) | [](http://pic.dface.io/pass.svg) |
12 |
13 |
14 | **基于多任务卷积网络(MTCNN)和Center-Loss的多人实时人脸检测和人脸识别系统。**
15 |
16 |
17 | [Slack 聊天组](https://dfaceio.slack.com/)
18 |
19 |
20 |
21 | **DFace** 是个开源的深度学习人脸检测和人脸识别系统。所有功能都采用 **[pytorch](https://github.com/pytorch/pytorch)** 框架开发。pytorch是一个由facebook开发的深度学习框架,它包含了一些比较有趣的高级特性,例如自动求导,动态构图等。DFace天然的继承了这些优点,使得它的训练过程可以更加简单方便,并且实现的代码可以更加清晰易懂。
22 | DFace可以利用CUDA来支持GPU加速模式。我们建议尝试linux GPU这种模式,它几乎可以实现实时的效果。
23 | 所有的灵感都来源于学术界最近的一些研究成果,例如 [Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878) 和 [FaceNet: A Unified Embedding for Face Recognition and Clustering](https://arxiv.org/abs/1503.03832)
24 |
25 |
26 | **MTCNN 结构**
27 |
28 | 
29 |
30 |
31 | ** 如果你对DFace感兴趣并且想参与到这个项目中, 以下TODO是一些需要实现的功能,我定期会更新,它会实时展示一些需要开发的清单。提交你的fork request,我会用issues来跟踪和反馈所有的问题。也可以加DFace的官方Q群 681403076 也可以加本人微信 jinkuaikuai005 **
32 |
33 | ### TODO(需要开发的功能)
34 | - 基于center loss 或者triplet loss原理开发人脸对比功能,模型采用ResNet inception v2. 该功能能够比较两张人脸图片的相似性。具体可以参考 [Paper](https://arxiv.org/abs/1503.03832)和[FaceNet](https://github.com/davidsandberg/facenet)
35 | - 反欺诈功能,根据光线,质地等人脸特性来防止照片攻击,视频攻击,回放攻击等。具体可参考LBP算法和SVM训练模型。
36 | - 3D人脸反欺诈。
37 | - mobile移植,根据ONNX标准把pytorch训练好的模型迁移到caffe2,一些numpy算法改用c++实现。
38 | - Tensor RT移植,高并发。
39 | - Docker支持,gpu版
40 |
41 | ## 安装
42 | DFace主要有两大模块,人脸检测和人脸识别。我会提供所有模型训练和运行的详细步骤。你首先需要构建一个pytorch和cv2的python环境,我推荐使用Anaconda来设置一个独立的虚拟环境。目前作者倾向于Linux Ubuntu安装环境。感谢山东一位网友提供windows DFace安装体验,windos安装教程具体
43 | 可参考他的[博客](http://www.alearner.top/index.php/2017/12/23/dface-pytorch-win64-gpu)
44 |
45 |
46 | ### 依赖
47 | * cuda 8.0
48 | * anaconda
49 | * pytorch
50 | * torchvision
51 | * cv2
52 | * matplotlib
53 |
54 | ```shell
55 | git clone https://github.com/kuaikuaikim/DFace.git
56 | ```
57 |
58 | 在这里我提供了一个anaconda的环境依赖文件environment.yml (windows请用environment-win64.yml),它能方便你构建自己的虚拟环境。
59 |
60 | ```shell
61 | cd dface
62 |
63 | conda env create -f environment.yml
64 | ```
65 |
66 | 添加python搜索模块路径
67 |
68 | ```shell
69 | export PYTHONPATH=$PYTHONPATH:{your local DFace root path}
70 | ```
71 |
72 |
73 |
74 | ### 人脸识别和检测
75 |
76 | 如果你对mtcnn模型感兴趣,以下过程可能会帮助到你。
77 |
78 | #### 训练mtcnn模型
79 |
80 | MTCNN主要有三个网络,叫做**PNet**, **RNet** 和 **ONet**。因此我们的训练过程也需要分三步先后进行。为了更好的实现效果,当前被训练的网络都将依赖于上一个训练好的网络来生成数据。所有的人脸数据集都来自 **[WIDER FACE](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/)** 和 **[CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html)**。WIDER FACE仅提供了大量的人脸边框定位数据,而CelebA包含了人脸关键点定位数据。以下训练除了 生成ONet的人脸关键点训练数据和标注文件 该步骤使用CelebA数据集,其他一律使用WIDER FACE。如果使用wider face的 wider_face_train.mat 注解文件需要转换成txt格式的,我这里用h5py写了个 [转换脚本](https://github.com/kuaikuaikim/DFace/blob/master/dface/prepare_data/widerface_annotation_gen/transform.py). 这里我提供一个已经转换好的wider face注解文件 [anno_store/wider_origin_anno.txt](https://github.com/kuaikuaikim/DFace/blob/master/anno_store/info/wider_origin_anno.txt), 以下训练过程参数名--anno_file默认就是使用该转换好的注解文件。
81 |
82 |
83 | * 创建 dface 训练数据临时目录,对应于以下所有的参数名 --dface_traindata_store
84 | ```shell
85 | mkdir {your dface traindata folder}
86 | ```
87 |
88 |
89 | * 生成PNet训练数据和标注文件
90 |
91 | ```shell
92 | python dface/prepare_data/gen_Pnet_train_data.py --prefix_path {注解文件中图片的目录前缀,就是wider face图片所在目录} --dface_traindata_store {之前创建的dface训练数据临时目录} --anno_file {wider face 注解文件,可以不填,默认使用anno_store/wider_origin_anno.txt}
93 | ```
94 | * 乱序合并标注文件
95 |
96 | ```shell
97 | python dface/prepare_data/assemble_pnet_imglist.py
98 | ```
99 |
100 | * 训练PNet模型
101 |
102 |
103 | ```shell
104 | python dface/train_net/train_p_net.py
105 | ```
106 | * 生成RNet训练数据和标注文件
107 |
108 | ```shell
109 | python dface/prepare_data/gen_Rnet_train_data.py --prefix_path {注解文件中图片的目录前缀,就是wider face图片所在目录} --dface_traindata_store {之前创建的dface训练数据临时目录} --anno_file {wider face 注解文件,可以不填,默认使用anno_store/wider_origin_anno.txt} --pmodel_file {之前训练的Pnet模型文件}
110 | ```
111 | * 乱序合并标注文件
112 |
113 | ```shell
114 | python dface/prepare_data/assemble_rnet_imglist.py
115 | ```
116 |
117 | * 训练RNet模型
118 |
119 | ```shell
120 | python dface/train_net/train_r_net.py
121 | ```
122 |
123 | * 生成ONet训练数据和标注文件
124 |
125 | ```shell
126 | python dface/prepare_data/gen_Onet_train_data.py --prefix_path {注解文件中图片的目录前缀,就是wider face图片所在目录} --dface_traindata_store {之前创建的dface训练数据临时目录} --anno_file {wider face 注解文件,可以不填,默认使用anno_store/wider_origin_anno.txt} --pmodel_file {之前训练的Pnet模型文件} --rmodel_file {之前训练的Rnet模型文件}
127 | ```
128 |
129 | * 生成ONet的人脸五官关键点训练数据和标注文件
130 |
131 | ```shell
132 | python dface/prepare_data/gen_landmark_48.py
133 | ```
134 |
135 | * 乱序合并标注文件(包括人脸五官关键点)
136 |
137 | ```shell
138 | python dface/prepare_data/assemble_onet_imglist.py
139 | ```
140 |
141 | * 训练ONet模型
142 |
143 | ```shell
144 | python dface/train_net/train_o_net.py
145 | ```
146 |
147 | #### 测试人脸检测
148 | ```shell
149 | python test_image.py
150 | ```
151 |
152 | ### 人脸对比
153 |
154 | @TODO 根据center loss实现人脸识别
155 |
156 | ## 测试效果
157 | 
158 |
159 |
160 | ### QQ交流群(模型获取请加群)
161 |
162 | #### 681403076
163 |
164 | 
165 |
166 | #### 本人微信
167 |
168 | ##### jinkuaikuai005
169 |
170 | 
171 |
172 |
173 |
174 | ## License
175 |
176 | [Apache License 2.0](LICENSE)
177 |
178 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/__init__.py
--------------------------------------------------------------------------------
/anno_store/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/anno_store/__init__.py
--------------------------------------------------------------------------------
/anno_store/info:
--------------------------------------------------------------------------------
1 | This directory store the annotation files of train data
--------------------------------------------------------------------------------
/dface/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/__init__.py
--------------------------------------------------------------------------------
/dface/config.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 |
4 | MODEL_STORE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/model_store"
5 |
6 |
7 | ANNO_STORE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/anno_store"
8 |
9 |
10 | LOG_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/log"
11 |
12 |
13 | USE_CUDA = True
14 |
15 |
16 | TRAIN_BATCH_SIZE = 512
17 |
18 | TRAIN_LR = 0.01
19 |
20 | END_EPOCH = 10
21 |
22 |
23 | PNET_POSTIVE_ANNO_FILENAME = "pos_12.txt"
24 | PNET_NEGATIVE_ANNO_FILENAME = "neg_12.txt"
25 | PNET_PART_ANNO_FILENAME = "part_12.txt"
26 | PNET_LANDMARK_ANNO_FILENAME = "landmark_12.txt"
27 |
28 |
29 | RNET_POSTIVE_ANNO_FILENAME = "pos_24.txt"
30 | RNET_NEGATIVE_ANNO_FILENAME = "neg_24.txt"
31 | RNET_PART_ANNO_FILENAME = "part_24.txt"
32 | RNET_LANDMARK_ANNO_FILENAME = "landmark_24.txt"
33 |
34 |
35 | ONET_POSTIVE_ANNO_FILENAME = "pos_48.txt"
36 | ONET_NEGATIVE_ANNO_FILENAME = "neg_48.txt"
37 | ONET_PART_ANNO_FILENAME = "part_48.txt"
38 | ONET_LANDMARK_ANNO_FILENAME = "landmark_48.txt"
39 |
40 | PNET_TRAIN_IMGLIST_FILENAME = "imglist_anno_12.txt"
41 | RNET_TRAIN_IMGLIST_FILENAME = "imglist_anno_24.txt"
42 | ONET_TRAIN_IMGLIST_FILENAME = "imglist_anno_48.txt"
--------------------------------------------------------------------------------
/dface/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/core/__init__.py
--------------------------------------------------------------------------------
/dface/core/detect.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import time
3 | import numpy as np
4 | import torch
5 | from torch.autograd.variable import Variable
6 | from dface.core.models import PNet,RNet,ONet
7 | import dface.core.utils as utils
8 | import dface.core.image_tools as image_tools
9 |
10 |
11 | def create_mtcnn_net(p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True):
12 |
13 | pnet, rnet, onet = None, None, None
14 |
15 | if p_model_path is not None:
16 | pnet = PNet(use_cuda=use_cuda)
17 | if(use_cuda):
18 | pnet.load_state_dict(torch.load(p_model_path))
19 | pnet.cuda()
20 | else:
21 | # forcing all GPU tensors to be in CPU while loading
22 | pnet.load_state_dict(torch.load(p_model_path, map_location=lambda storage, loc: storage))
23 | pnet.eval()
24 |
25 | if r_model_path is not None:
26 | rnet = RNet(use_cuda=use_cuda)
27 | if (use_cuda):
28 | rnet.load_state_dict(torch.load(r_model_path))
29 | rnet.cuda()
30 | else:
31 | rnet.load_state_dict(torch.load(r_model_path, map_location=lambda storage, loc: storage))
32 | rnet.eval()
33 |
34 | if o_model_path is not None:
35 | onet = ONet(use_cuda=use_cuda)
36 | if (use_cuda):
37 | onet.load_state_dict(torch.load(o_model_path))
38 | onet.cuda()
39 | else:
40 | onet.load_state_dict(torch.load(o_model_path, map_location=lambda storage, loc: storage))
41 | onet.eval()
42 |
43 | return pnet,rnet,onet
44 |
45 |
46 |
47 |
48 | class MtcnnDetector(object):
49 | """
50 | P,R,O net face detection and landmarks align
51 | """
52 | def __init__(self,
53 | pnet = None,
54 | rnet = None,
55 | onet = None,
56 | min_face_size=12,
57 | stride=2,
58 | threshold=[0.6, 0.7, 0.7],
59 | scale_factor=0.709,
60 | ):
61 |
62 | self.pnet_detector = pnet
63 | self.rnet_detector = rnet
64 | self.onet_detector = onet
65 | self.min_face_size = min_face_size
66 | self.stride=stride
67 | self.thresh = threshold
68 | self.scale_factor = scale_factor
69 |
70 |
71 | def unique_image_format(self,im):
72 | if not isinstance(im,np.ndarray):
73 | if im.mode == 'I':
74 | im = np.array(im, np.int32, copy=False)
75 | elif im.mode == 'I;16':
76 | im = np.array(im, np.int16, copy=False)
77 | else:
78 | im = np.asarray(im)
79 | return im
80 |
81 | def square_bbox(self, bbox):
82 | """
83 | convert bbox to square
84 | Parameters:
85 | ----------
86 | bbox: numpy array , shape n x m
87 | input bbox
88 | Returns:
89 | -------
90 | square bbox
91 | """
92 | square_bbox = bbox.copy()
93 |
94 | h = bbox[:, 3] - bbox[:, 1] + 1
95 | w = bbox[:, 2] - bbox[:, 0] + 1
96 | l = np.maximum(h,w)
97 | square_bbox[:, 0] = bbox[:, 0] + w*0.5 - l*0.5
98 | square_bbox[:, 1] = bbox[:, 1] + h*0.5 - l*0.5
99 |
100 | square_bbox[:, 2] = square_bbox[:, 0] + l - 1
101 | square_bbox[:, 3] = square_bbox[:, 1] + l - 1
102 | return square_bbox
103 |
104 |
105 | def generate_bounding_box(self, map, reg, scale, threshold):
106 | """
107 | generate bbox from feature map
108 | Parameters:
109 | ----------
110 | map: numpy array , n x m x 1
111 | detect score for each position
112 | reg: numpy array , n x m x 4
113 | bbox
114 | scale: float number
115 | scale of this detection
116 | threshold: float number
117 | detect threshold
118 | Returns:
119 | -------
120 | bbox array
121 | """
122 | stride = 2
123 | cellsize = 12
124 |
125 | t_index = np.where(map > threshold)
126 |
127 | # find nothing
128 | if t_index[0].size == 0:
129 | return np.array([])
130 |
131 | dx1, dy1, dx2, dy2 = [reg[0, t_index[0], t_index[1], i] for i in range(4)]
132 | reg = np.array([dx1, dy1, dx2, dy2])
133 |
134 | # lefteye_dx, lefteye_dy, righteye_dx, righteye_dy, nose_dx, nose_dy, \
135 | # leftmouth_dx, leftmouth_dy, rightmouth_dx, rightmouth_dy = [landmarks[0, t_index[0], t_index[1], i] for i in range(10)]
136 | #
137 | # landmarks = np.array([lefteye_dx, lefteye_dy, righteye_dx, righteye_dy, nose_dx, nose_dy, leftmouth_dx, leftmouth_dy, rightmouth_dx, rightmouth_dy])
138 |
139 |
140 |
141 | score = map[t_index[0], t_index[1], 0]
142 | boundingbox = np.vstack([np.round((stride * t_index[1]) / scale),
143 | np.round((stride * t_index[0]) / scale),
144 | np.round((stride * t_index[1] + cellsize) / scale),
145 | np.round((stride * t_index[0] + cellsize) / scale),
146 | score,
147 | reg,
148 | # landmarks
149 | ])
150 |
151 | return boundingbox.T
152 |
153 |
154 | def resize_image(self, img, scale):
155 | """
156 | resize image and transform dimention to [batchsize, channel, height, width]
157 | Parameters:
158 | ----------
159 | img: numpy array , height x width x channel
160 | input image, channels in BGR order here
161 | scale: float number
162 | scale factor of resize operation
163 | Returns:
164 | -------
165 | transformed image tensor , 1 x channel x height x width
166 | """
167 | height, width, channels = img.shape
168 | new_height = int(height * scale) # resized new height
169 | new_width = int(width * scale) # resized new width
170 | new_dim = (new_width, new_height)
171 | img_resized = cv2.resize(img, new_dim, interpolation=cv2.INTER_LINEAR) # resized image
172 | return img_resized
173 |
174 |
175 | def pad(self, bboxes, w, h):
176 | """
177 | pad the the boxes
178 | Parameters:
179 | ----------
180 | bboxes: numpy array, n x 5
181 | input bboxes
182 | w: float number
183 | width of the input image
184 | h: float number
185 | height of the input image
186 | Returns :
187 | ------
188 | dy, dx : numpy array, n x 1
189 | start point of the bbox in target image
190 | edy, edx : numpy array, n x 1
191 | end point of the bbox in target image
192 | y, x : numpy array, n x 1
193 | start point of the bbox in original image
194 | ex, ex : numpy array, n x 1
195 | end point of the bbox in original image
196 | tmph, tmpw: numpy array, n x 1
197 | height and width of the bbox
198 | """
199 |
200 | tmpw = (bboxes[:, 2] - bboxes[:, 0] + 1).astype(np.int32)
201 | tmph = (bboxes[:, 3] - bboxes[:, 1] + 1).astype(np.int32)
202 | numbox = bboxes.shape[0]
203 |
204 | dx = np.zeros((numbox, ))
205 | dy = np.zeros((numbox, ))
206 | edx, edy = tmpw.copy()-1, tmph.copy()-1
207 |
208 | x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
209 |
210 | tmp_index = np.where(ex > w-1)
211 | edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index]
212 | ex[tmp_index] = w - 1
213 |
214 | tmp_index = np.where(ey > h-1)
215 | edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index]
216 | ey[tmp_index] = h - 1
217 |
218 | tmp_index = np.where(x < 0)
219 | dx[tmp_index] = 0 - x[tmp_index]
220 | x[tmp_index] = 0
221 |
222 | tmp_index = np.where(y < 0)
223 | dy[tmp_index] = 0 - y[tmp_index]
224 | y[tmp_index] = 0
225 |
226 | return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
227 | return_list = [item.astype(np.int32) for item in return_list]
228 |
229 | return return_list
230 |
231 |
232 | def detect_pnet(self, im):
233 | """Get face candidates through pnet
234 |
235 | Parameters:
236 | ----------
237 | im: numpy array
238 | input image array
239 |
240 | Returns:
241 | -------
242 | boxes: numpy array
243 | detected boxes before calibration
244 | boxes_align: numpy array
245 | boxes after calibration
246 | """
247 |
248 | # im = self.unique_image_format(im)
249 |
250 | h, w, c = im.shape
251 | net_size = 12
252 |
253 | current_scale = float(net_size) / self.min_face_size # find initial scale
254 | im_resized = self.resize_image(im, current_scale)
255 | current_height, current_width, _ = im_resized.shape
256 |
257 | # fcn
258 | all_boxes = list()
259 | while min(current_height, current_width) > net_size:
260 | feed_imgs = []
261 | image_tensor = image_tools.convert_image_to_tensor(im_resized)
262 | feed_imgs.append(image_tensor)
263 | feed_imgs = torch.stack(feed_imgs)
264 | feed_imgs = Variable(feed_imgs)
265 |
266 |
267 | if self.pnet_detector.use_cuda:
268 | feed_imgs = feed_imgs.cuda()
269 |
270 | cls_map, reg = self.pnet_detector(feed_imgs)
271 |
272 | cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(cls_map.cpu())
273 | reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())
274 | # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu())
275 |
276 |
277 | boxes = self.generate_bounding_box(cls_map_np[ 0, :, :], reg_np, current_scale, self.thresh[0])
278 |
279 | current_scale *= self.scale_factor
280 | im_resized = self.resize_image(im, current_scale)
281 | current_height, current_width, _ = im_resized.shape
282 |
283 | if boxes.size == 0:
284 | continue
285 | keep = utils.nms(boxes[:, :5], 0.5, 'Union')
286 | boxes = boxes[keep]
287 | all_boxes.append(boxes)
288 |
289 | if len(all_boxes) == 0:
290 | return None, None
291 |
292 | all_boxes = np.vstack(all_boxes)
293 |
294 | # merge the detection from first stage
295 | keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
296 | all_boxes = all_boxes[keep]
297 | # boxes = all_boxes[:, :5]
298 |
299 | bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
300 | bh = all_boxes[:, 3] - all_boxes[:, 1] + 1
301 |
302 | # landmark_keep = all_boxes[:, 9:].reshape((5,2))
303 |
304 |
305 | boxes = np.vstack([all_boxes[:,0],
306 | all_boxes[:,1],
307 | all_boxes[:,2],
308 | all_boxes[:,3],
309 | all_boxes[:,4],
310 | # all_boxes[:, 0] + all_boxes[:, 9] * bw,
311 | # all_boxes[:, 1] + all_boxes[:,10] * bh,
312 | # all_boxes[:, 0] + all_boxes[:, 11] * bw,
313 | # all_boxes[:, 1] + all_boxes[:, 12] * bh,
314 | # all_boxes[:, 0] + all_boxes[:, 13] * bw,
315 | # all_boxes[:, 1] + all_boxes[:, 14] * bh,
316 | # all_boxes[:, 0] + all_boxes[:, 15] * bw,
317 | # all_boxes[:, 1] + all_boxes[:, 16] * bh,
318 | # all_boxes[:, 0] + all_boxes[:, 17] * bw,
319 | # all_boxes[:, 1] + all_boxes[:, 18] * bh
320 | ])
321 |
322 | boxes = boxes.T
323 |
324 | align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw
325 | align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh
326 | align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw
327 | align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh
328 |
329 | # refine the boxes
330 | boxes_align = np.vstack([ align_topx,
331 | align_topy,
332 | align_bottomx,
333 | align_bottomy,
334 | all_boxes[:, 4],
335 | # align_topx + all_boxes[:,9] * bw,
336 | # align_topy + all_boxes[:,10] * bh,
337 | # align_topx + all_boxes[:,11] * bw,
338 | # align_topy + all_boxes[:,12] * bh,
339 | # align_topx + all_boxes[:,13] * bw,
340 | # align_topy + all_boxes[:,14] * bh,
341 | # align_topx + all_boxes[:,15] * bw,
342 | # align_topy + all_boxes[:,16] * bh,
343 | # align_topx + all_boxes[:,17] * bw,
344 | # align_topy + all_boxes[:,18] * bh,
345 | ])
346 | boxes_align = boxes_align.T
347 |
348 | return boxes, boxes_align
349 |
350 | def detect_rnet(self, im, dets):
351 | """Get face candidates using rnet
352 |
353 | Parameters:
354 | ----------
355 | im: numpy array
356 | input image array
357 | dets: numpy array
358 | detection results of pnet
359 |
360 | Returns:
361 | -------
362 | boxes: numpy array
363 | detected boxes before calibration
364 | boxes_align: numpy array
365 | boxes after calibration
366 | """
367 | h, w, c = im.shape
368 |
369 | if dets is None:
370 | return None,None
371 |
372 | dets = self.square_bbox(dets)
373 | dets[:, 0:4] = np.round(dets[:, 0:4])
374 |
375 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
376 | num_boxes = dets.shape[0]
377 |
378 | '''
379 | # helper for setting RNet batch size
380 | batch_size = self.rnet_detector.batch_size
381 | ratio = float(num_boxes) / batch_size
382 | if ratio > 3 or ratio < 0.3:
383 | print "You may need to reset RNet batch size if this info appears frequently, \
384 | face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
385 | '''
386 |
387 | # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
388 | cropped_ims_tensors = []
389 | for i in range(num_boxes):
390 | tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
391 | tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = im[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
392 | crop_im = cv2.resize(tmp, (24, 24))
393 | crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
394 | # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
395 | cropped_ims_tensors.append(crop_im_tensor)
396 | feed_imgs = Variable(torch.stack(cropped_ims_tensors))
397 |
398 | if self.rnet_detector.use_cuda:
399 | feed_imgs = feed_imgs.cuda()
400 |
401 | cls_map, reg = self.rnet_detector(feed_imgs)
402 |
403 | cls_map = cls_map.cpu().data.numpy()
404 | reg = reg.cpu().data.numpy()
405 | # landmark = landmark.cpu().data.numpy()
406 |
407 |
408 | keep_inds = np.where(cls_map > self.thresh[1])[0]
409 |
410 | if len(keep_inds) > 0:
411 | boxes = dets[keep_inds]
412 | cls = cls_map[keep_inds]
413 | reg = reg[keep_inds]
414 | # landmark = landmark[keep_inds]
415 | else:
416 | return None, None
417 |
418 | keep = utils.nms(boxes, 0.7)
419 |
420 | if len(keep) == 0:
421 | return None, None
422 |
423 | keep_cls = cls[keep]
424 | keep_boxes = boxes[keep]
425 | keep_reg = reg[keep]
426 | # keep_landmark = landmark[keep]
427 |
428 |
429 | bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
430 | bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1
431 |
432 |
433 | boxes = np.vstack([ keep_boxes[:,0],
434 | keep_boxes[:,1],
435 | keep_boxes[:,2],
436 | keep_boxes[:,3],
437 | keep_cls[:,0],
438 | # keep_boxes[:,0] + keep_landmark[:, 0] * bw,
439 | # keep_boxes[:,1] + keep_landmark[:, 1] * bh,
440 | # keep_boxes[:,0] + keep_landmark[:, 2] * bw,
441 | # keep_boxes[:,1] + keep_landmark[:, 3] * bh,
442 | # keep_boxes[:,0] + keep_landmark[:, 4] * bw,
443 | # keep_boxes[:,1] + keep_landmark[:, 5] * bh,
444 | # keep_boxes[:,0] + keep_landmark[:, 6] * bw,
445 | # keep_boxes[:,1] + keep_landmark[:, 7] * bh,
446 | # keep_boxes[:,0] + keep_landmark[:, 8] * bw,
447 | # keep_boxes[:,1] + keep_landmark[:, 9] * bh,
448 | ])
449 |
450 | align_topx = keep_boxes[:,0] + keep_reg[:,0] * bw
451 | align_topy = keep_boxes[:,1] + keep_reg[:,1] * bh
452 | align_bottomx = keep_boxes[:,2] + keep_reg[:,2] * bw
453 | align_bottomy = keep_boxes[:,3] + keep_reg[:,3] * bh
454 |
455 | boxes_align = np.vstack([align_topx,
456 | align_topy,
457 | align_bottomx,
458 | align_bottomy,
459 | keep_cls[:, 0],
460 | # align_topx + keep_landmark[:, 0] * bw,
461 | # align_topy + keep_landmark[:, 1] * bh,
462 | # align_topx + keep_landmark[:, 2] * bw,
463 | # align_topy + keep_landmark[:, 3] * bh,
464 | # align_topx + keep_landmark[:, 4] * bw,
465 | # align_topy + keep_landmark[:, 5] * bh,
466 | # align_topx + keep_landmark[:, 6] * bw,
467 | # align_topy + keep_landmark[:, 7] * bh,
468 | # align_topx + keep_landmark[:, 8] * bw,
469 | # align_topy + keep_landmark[:, 9] * bh,
470 | ])
471 |
472 | boxes = boxes.T
473 | boxes_align = boxes_align.T
474 |
475 | return boxes, boxes_align
476 |
477 | def detect_onet(self, im, dets):
478 | """Get face candidates using onet
479 |
480 | Parameters:
481 | ----------
482 | im: numpy array
483 | input image array
484 | dets: numpy array
485 | detection results of rnet
486 |
487 | Returns:
488 | -------
489 | boxes_align: numpy array
490 | boxes after calibration
491 | landmarks_align: numpy array
492 | landmarks after calibration
493 |
494 | """
495 | h, w, c = im.shape
496 |
497 | if dets is None:
498 | return None, None
499 |
500 | dets = self.square_bbox(dets)
501 | dets[:, 0:4] = np.round(dets[:, 0:4])
502 |
503 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
504 | num_boxes = dets.shape[0]
505 |
506 |
507 | # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
508 | cropped_ims_tensors = []
509 | for i in range(num_boxes):
510 | tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
511 | tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
512 | crop_im = cv2.resize(tmp, (48, 48))
513 | crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
514 | # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
515 | cropped_ims_tensors.append(crop_im_tensor)
516 | feed_imgs = Variable(torch.stack(cropped_ims_tensors))
517 |
518 | if self.rnet_detector.use_cuda:
519 | feed_imgs = feed_imgs.cuda()
520 |
521 | cls_map, reg, landmark = self.onet_detector(feed_imgs)
522 |
523 | cls_map = cls_map.cpu().data.numpy()
524 | reg = reg.cpu().data.numpy()
525 | landmark = landmark.cpu().data.numpy()
526 |
527 | keep_inds = np.where(cls_map > self.thresh[2])[0]
528 |
529 | if len(keep_inds) > 0:
530 | boxes = dets[keep_inds]
531 | cls = cls_map[keep_inds]
532 | reg = reg[keep_inds]
533 | landmark = landmark[keep_inds]
534 | else:
535 | return None, None
536 |
537 | keep = utils.nms(boxes, 0.7, mode="Minimum")
538 |
539 | if len(keep) == 0:
540 | return None, None
541 |
542 | keep_cls = cls[keep]
543 | keep_boxes = boxes[keep]
544 | keep_reg = reg[keep]
545 | keep_landmark = landmark[keep]
546 |
547 | bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
548 | bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1
549 |
550 |
551 | align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
552 | align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
553 | align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
554 | align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh
555 |
556 | align_landmark_topx = keep_boxes[:, 0]
557 | align_landmark_topy = keep_boxes[:, 1]
558 |
559 |
560 |
561 |
562 | boxes_align = np.vstack([align_topx,
563 | align_topy,
564 | align_bottomx,
565 | align_bottomy,
566 | keep_cls[:, 0],
567 | # align_topx + keep_landmark[:, 0] * bw,
568 | # align_topy + keep_landmark[:, 1] * bh,
569 | # align_topx + keep_landmark[:, 2] * bw,
570 | # align_topy + keep_landmark[:, 3] * bh,
571 | # align_topx + keep_landmark[:, 4] * bw,
572 | # align_topy + keep_landmark[:, 5] * bh,
573 | # align_topx + keep_landmark[:, 6] * bw,
574 | # align_topy + keep_landmark[:, 7] * bh,
575 | # align_topx + keep_landmark[:, 8] * bw,
576 | # align_topy + keep_landmark[:, 9] * bh,
577 | ])
578 |
579 | boxes_align = boxes_align.T
580 |
581 | landmark = np.vstack([
582 | align_landmark_topx + keep_landmark[:, 0] * bw,
583 | align_landmark_topy + keep_landmark[:, 1] * bh,
584 | align_landmark_topx + keep_landmark[:, 2] * bw,
585 | align_landmark_topy + keep_landmark[:, 3] * bh,
586 | align_landmark_topx + keep_landmark[:, 4] * bw,
587 | align_landmark_topy + keep_landmark[:, 5] * bh,
588 | align_landmark_topx + keep_landmark[:, 6] * bw,
589 | align_landmark_topy + keep_landmark[:, 7] * bh,
590 | align_landmark_topx + keep_landmark[:, 8] * bw,
591 | align_landmark_topy + keep_landmark[:, 9] * bh,
592 | ])
593 |
594 | landmark_align = landmark.T
595 |
596 | return boxes_align, landmark_align
597 |
598 |
599 | def detect_face(self,img):
600 | """Detect face over image
601 | """
602 | boxes_align = np.array([])
603 | landmark_align =np.array([])
604 |
605 | t = time.time()
606 |
607 | # pnet
608 | if self.pnet_detector:
609 | boxes, boxes_align = self.detect_pnet(img)
610 | if boxes_align is None:
611 | return np.array([]), np.array([])
612 |
613 | t1 = time.time() - t
614 | t = time.time()
615 |
616 | # rnet
617 | if self.rnet_detector:
618 | boxes, boxes_align = self.detect_rnet(img, boxes_align)
619 | if boxes_align is None:
620 | return np.array([]), np.array([])
621 |
622 | t2 = time.time() - t
623 | t = time.time()
624 |
625 | # onet
626 | if self.onet_detector:
627 | boxes_align, landmark_align = self.detect_onet(img, boxes_align)
628 | if boxes_align is None:
629 | return np.array([]), np.array([])
630 |
631 | t3 = time.time() - t
632 | t = time.time()
633 | print("time cost " + '{:.3f}'.format(t1+t2+t3) + ' pnet {:.3f} rnet {:.3f} onet {:.3f}'.format(t1, t2, t3))
634 |
635 | return boxes_align, landmark_align
636 |
637 |
638 |
639 |
640 |
--------------------------------------------------------------------------------
/dface/core/image_reader.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 |
5 |
6 | class TrainImageReader:
7 | def __init__(self, imdb, im_size, batch_size=128, shuffle=False):
8 |
9 | self.imdb = imdb
10 | self.batch_size = batch_size
11 | self.im_size = im_size
12 | self.shuffle = shuffle
13 |
14 | self.cur = 0
15 | self.size = len(imdb)
16 | self.index = np.arange(self.size)
17 | self.num_classes = 2
18 |
19 | self.batch = None
20 | self.data = None
21 | self.label = None
22 |
23 | self.label_names= ['label', 'bbox_target', 'landmark_target']
24 | self.reset()
25 | self.get_batch()
26 |
27 | def reset(self):
28 | self.cur = 0
29 | if self.shuffle:
30 | np.random.shuffle(self.index)
31 |
32 | def iter_next(self):
33 | return self.cur + self.batch_size <= self.size
34 |
35 | def __iter__(self):
36 | return self
37 |
38 | def __next__(self):
39 | return self.next()
40 |
41 | def next(self):
42 | if self.iter_next():
43 | self.get_batch()
44 | self.cur += self.batch_size
45 | return self.data,self.label
46 | else:
47 | raise StopIteration
48 |
49 | def getindex(self):
50 | return self.cur / self.batch_size
51 |
52 | def getpad(self):
53 | if self.cur + self.batch_size > self.size:
54 | return self.cur + self.batch_size - self.size
55 | else:
56 | return 0
57 |
58 | def get_batch(self):
59 | cur_from = self.cur
60 | cur_to = min(cur_from + self.batch_size, self.size)
61 | imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)]
62 | data, label = get_minibatch(imdb)
63 | self.data = data['data']
64 | self.label = [label[name] for name in self.label_names]
65 |
66 |
67 |
68 | class TestImageLoader:
69 | def __init__(self, imdb, batch_size=1, shuffle=False):
70 | self.imdb = imdb
71 | self.batch_size = batch_size
72 | self.shuffle = shuffle
73 | self.size = len(imdb)
74 | self.index = np.arange(self.size)
75 |
76 | self.cur = 0
77 | self.data = None
78 | self.label = None
79 |
80 | self.reset()
81 | self.get_batch()
82 |
83 | def reset(self):
84 | self.cur = 0
85 | if self.shuffle:
86 | np.random.shuffle(self.index)
87 |
88 | def iter_next(self):
89 | return self.cur + self.batch_size <= self.size
90 |
91 | def __iter__(self):
92 | return self
93 |
94 | def __next__(self):
95 | return self.next()
96 |
97 | def next(self):
98 | if self.iter_next():
99 | self.get_batch()
100 | self.cur += self.batch_size
101 | return self.data
102 | else:
103 | raise StopIteration
104 |
105 | def getindex(self):
106 | return self.cur / self.batch_size
107 |
108 | def getpad(self):
109 | if self.cur + self.batch_size > self.size:
110 | return self.cur + self.batch_size - self.size
111 | else:
112 | return 0
113 |
114 | def get_batch(self):
115 | cur_from = self.cur
116 | cur_to = min(cur_from + self.batch_size, self.size)
117 | imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)]
118 | data= get_testbatch(imdb)
119 | self.data=data['data']
120 |
121 |
122 |
123 |
124 | def get_minibatch(imdb):
125 |
126 | # im_size: 12, 24 or 48
127 | num_images = len(imdb)
128 | processed_ims = list()
129 | cls_label = list()
130 | bbox_reg_target = list()
131 | landmark_reg_target = list()
132 |
133 | for i in range(num_images):
134 | im = cv2.imread(imdb[i]['image'])
135 | #im = Image.open(imdb[i]['image'])
136 |
137 | if imdb[i]['flipped']:
138 | im = im[:, ::-1, :]
139 | #im = im.transpose(Image.FLIP_LEFT_RIGHT)
140 |
141 | cls = imdb[i]['label']
142 | bbox_target = imdb[i]['bbox_target']
143 | landmark = imdb[i]['landmark_target']
144 |
145 | processed_ims.append(im)
146 | cls_label.append(cls)
147 | bbox_reg_target.append(bbox_target)
148 | landmark_reg_target.append(landmark)
149 |
150 | im_array = np.asarray(processed_ims)
151 |
152 | label_array = np.array(cls_label)
153 |
154 | bbox_target_array = np.vstack(bbox_reg_target)
155 |
156 | landmark_target_array = np.vstack(landmark_reg_target)
157 |
158 | data = {'data': im_array}
159 | label = {'label': label_array,
160 | 'bbox_target': bbox_target_array,
161 | 'landmark_target': landmark_target_array
162 | }
163 |
164 | return data, label
165 |
166 |
167 | def get_testbatch(imdb):
168 | assert len(imdb) == 1, "Single batch only"
169 | im = cv2.imread(imdb[0]['image'])
170 | data = {'data': im}
171 | return data
--------------------------------------------------------------------------------
/dface/core/image_tools.py:
--------------------------------------------------------------------------------
1 | import torchvision.transforms as transforms
2 | import torch
3 | from torch.autograd.variable import Variable
4 | import numpy as np
5 |
6 | transform = transforms.ToTensor()
7 |
8 | def convert_image_to_tensor(image):
9 | """convert an image to pytorch tensor
10 |
11 | Parameters:
12 | ----------
13 | image: numpy array , h * w * c
14 |
15 | Returns:
16 | -------
17 | image_tensor: pytorch.FloatTensor, c * h * w
18 | """
19 | image = image.astype(np.float)
20 | return transform(image)
21 | # return transform(image)
22 |
23 |
24 | def convert_chwTensor_to_hwcNumpy(tensor):
25 | """convert a group images pytorch tensor(count * c * h * w) to numpy array images(count * h * w * c)
26 | Parameters:
27 | ----------
28 | tensor: numpy array , count * c * h * w
29 |
30 | Returns:
31 | -------
32 | numpy array images: count * h * w * c
33 | """
34 |
35 | if isinstance(tensor, Variable):
36 | return np.transpose(tensor.data.numpy(), (0,2,3,1))
37 | elif isinstance(tensor, torch.FloatTensor):
38 | return np.transpose(tensor.numpy(), (0,2,3,1))
39 | else:
40 | raise Exception("covert b*c*h*w tensor to b*h*w*c numpy error.This tensor must have 4 dimension.")
--------------------------------------------------------------------------------
/dface/core/imagedb.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 |
4 | class ImageDB(object):
5 | def __init__(self, image_annotation_file, prefix_path='', mode='train'):
6 | self.prefix_path = prefix_path
7 | self.image_annotation_file = image_annotation_file
8 | self.classes = ['__background__', 'face']
9 | self.num_classes = 2
10 | self.image_set_index = self.load_image_set_index()
11 | self.num_images = len(self.image_set_index)
12 | self.mode = mode
13 |
14 |
15 | def load_image_set_index(self):
16 | """Get image index
17 |
18 | Parameters:
19 | ----------
20 | Returns:
21 | -------
22 | image_set_index: str
23 | relative path of image
24 | """
25 | assert os.path.exists(self.image_annotation_file), 'Path does not exist: {}'.format(self.image_annotation_file)
26 | with open(self.image_annotation_file, 'r') as f:
27 | image_set_index = [x.strip().split(' ')[0] for x in f.readlines()]
28 | return image_set_index
29 |
30 |
31 | def load_imdb(self):
32 | """Get and save ground truth image database
33 |
34 | Parameters:
35 | ----------
36 | Returns:
37 | -------
38 | gt_imdb: dict
39 | image database with annotations
40 | """
41 | #cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
42 | #if os.path.exists(cache_file):
43 | # with open(cache_file, 'rb') as f:
44 | # imdb = cPickle.load(f)
45 | # print '{} gt imdb loaded from {}'.format(self.name, cache_file)
46 | # return imdb
47 | gt_imdb = self.load_annotations()
48 | #with open(cache_file, 'wb') as f:
49 | # cPickle.dump(gt_imdb, f, cPickle.HIGHEST_PROTOCOL)
50 | return gt_imdb
51 |
52 |
53 | def real_image_path(self, index):
54 | """Given image index, return full path
55 |
56 | Parameters:
57 | ----------
58 | index: str
59 | relative path of image
60 | Returns:
61 | -------
62 | image_file: str
63 | full path of image
64 | """
65 |
66 | index = index.replace("\\", "/")
67 |
68 | if not os.path.exists(index):
69 | image_file = os.path.join(self.prefix_path, index)
70 | else:
71 | image_file=index
72 | if not image_file.endswith('.jpg'):
73 | image_file = image_file + '.jpg'
74 | assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file)
75 | return image_file
76 |
77 |
78 | def load_annotations(self,annotion_type=1):
79 | """Load annotations
80 |
81 | Parameters:
82 | ----------
83 | annotion_type: int
84 | 0:dsadsa
85 | 1:dsadsa
86 | Returns:
87 | -------
88 | imdb: dict
89 | image database with annotations
90 | """
91 |
92 | assert os.path.exists(self.image_annotation_file), 'annotations not found at {}'.format(self.image_annotation_file)
93 | with open(self.image_annotation_file, 'r') as f:
94 | annotations = f.readlines()
95 |
96 |
97 | imdb = []
98 | for i in range(self.num_images):
99 | annotation = annotations[i].strip().split(' ')
100 | index = annotation[0]
101 | im_path = self.real_image_path(index)
102 | imdb_ = dict()
103 | imdb_['image'] = im_path
104 |
105 | if self.mode == 'test':
106 | # gt_boxes = map(float, annotation[1:])
107 | # boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)
108 | # imdb_['gt_boxes'] = boxes
109 | pass
110 | else:
111 | label = annotation[1]
112 | imdb_['label'] = int(label)
113 | imdb_['flipped'] = False
114 | imdb_['bbox_target'] = np.zeros((4,))
115 | imdb_['landmark_target'] = np.zeros((10,))
116 | if len(annotation[2:])==4:
117 | bbox_target = annotation[2:6]
118 | imdb_['bbox_target'] = np.array(bbox_target).astype(float)
119 | if len(annotation[2:])==14:
120 | bbox_target = annotation[2:6]
121 | imdb_['bbox_target'] = np.array(bbox_target).astype(float)
122 | landmark = annotation[6:]
123 | imdb_['landmark_target'] = np.array(landmark).astype(float)
124 | imdb.append(imdb_)
125 | return imdb
126 |
127 |
128 | def append_flipped_images(self, imdb):
129 | """append flipped images to imdb
130 |
131 | Parameters:
132 | ----------
133 | imdb: imdb
134 | image database
135 | Returns:
136 | -------
137 | imdb: dict
138 | image database with flipped image annotations added
139 | """
140 | print('append flipped images to imdb', len(imdb))
141 | for i in range(len(imdb)):
142 | imdb_ = imdb[i]
143 | m_bbox = imdb_['bbox_target'].copy()
144 | m_bbox[0], m_bbox[2] = -m_bbox[2], -m_bbox[0]
145 |
146 | landmark_ = imdb_['landmark_target'].copy()
147 | landmark_ = landmark_.reshape((5, 2))
148 | landmark_ = np.asarray([(1 - x, y) for (x, y) in landmark_])
149 | landmark_[[0, 1]] = landmark_[[1, 0]]
150 | landmark_[[3, 4]] = landmark_[[4, 3]]
151 |
152 | item = {'image': imdb_['image'],
153 | 'label': imdb_['label'],
154 | 'bbox_target': m_bbox,
155 | 'landmark_target': landmark_.reshape((10)),
156 | 'flipped': True}
157 |
158 | imdb.append(item)
159 | self.image_set_index *= 2
160 | return imdb
161 |
162 |
163 |
--------------------------------------------------------------------------------
/dface/core/models.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | def weights_init(m):
7 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
8 | nn.init.xavier_uniform(m.weight.data)
9 | nn.init.constant(m.bias, 0.1)
10 |
11 |
12 |
13 | class LossFn:
14 | def __init__(self, cls_factor=1, box_factor=1, landmark_factor=1):
15 | # loss function
16 | self.cls_factor = cls_factor
17 | self.box_factor = box_factor
18 | self.land_factor = landmark_factor
19 | self.loss_cls = nn.BCELoss()
20 | self.loss_box = nn.MSELoss()
21 | self.loss_landmark = nn.MSELoss()
22 |
23 |
24 | def cls_loss(self,gt_label,pred_label):
25 | pred_label = torch.squeeze(pred_label)
26 | gt_label = torch.squeeze(gt_label)
27 | # get the mask element which >= 0, only 0 and 1 can effect the detection loss
28 | mask = torch.ge(gt_label,0)
29 | valid_gt_label = torch.masked_select(gt_label,mask)
30 | valid_pred_label = torch.masked_select(pred_label,mask)
31 | return self.loss_cls(valid_pred_label,valid_gt_label)*self.cls_factor
32 |
33 |
34 | def box_loss(self,gt_label,gt_offset,pred_offset):
35 | pred_offset = torch.squeeze(pred_offset)
36 | gt_offset = torch.squeeze(gt_offset)
37 | gt_label = torch.squeeze(gt_label)
38 |
39 | #get the mask element which != 0
40 | unmask = torch.eq(gt_label,0)
41 | mask = torch.eq(unmask,0)
42 | #convert mask to dim index
43 | chose_index = torch.nonzero(mask.data)
44 | chose_index = torch.squeeze(chose_index)
45 | #only valid element can effect the loss
46 | valid_gt_offset = gt_offset[chose_index,:]
47 | valid_pred_offset = pred_offset[chose_index,:]
48 | return self.loss_box(valid_pred_offset,valid_gt_offset)*self.box_factor
49 |
50 |
51 | def landmark_loss(self,gt_label,gt_landmark,pred_landmark):
52 | pred_landmark = torch.squeeze(pred_landmark)
53 | gt_landmark = torch.squeeze(gt_landmark)
54 | gt_label = torch.squeeze(gt_label)
55 | mask = torch.eq(gt_label,-2)
56 |
57 | chose_index = torch.nonzero(mask.data)
58 | chose_index = torch.squeeze(chose_index)
59 |
60 | valid_gt_landmark = gt_landmark[chose_index, :]
61 | valid_pred_landmark = pred_landmark[chose_index, :]
62 | return self.loss_landmark(valid_pred_landmark,valid_gt_landmark)*self.land_factor
63 |
64 |
65 |
66 |
67 |
68 | class PNet(nn.Module):
69 | ''' PNet '''
70 |
71 | def __init__(self, is_train=False, use_cuda=True):
72 | super(PNet, self).__init__()
73 | self.is_train = is_train
74 | self.use_cuda = use_cuda
75 |
76 | # backend
77 | self.pre_layer = nn.Sequential(
78 | nn.Conv2d(3, 10, kernel_size=3, stride=1), # conv1
79 | nn.PReLU(), # PReLU1
80 | nn.MaxPool2d(kernel_size=2, stride=2), # pool1
81 | nn.Conv2d(10, 16, kernel_size=3, stride=1), # conv2
82 | nn.PReLU(), # PReLU2
83 | nn.Conv2d(16, 32, kernel_size=3, stride=1), # conv3
84 | nn.PReLU() # PReLU3
85 | )
86 | # detection
87 | self.conv4_1 = nn.Conv2d(32, 1, kernel_size=1, stride=1)
88 | # bounding box regresion
89 | self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1, stride=1)
90 | # landmark localization
91 | self.conv4_3 = nn.Conv2d(32, 10, kernel_size=1, stride=1)
92 |
93 | # weight initiation with xavier
94 | self.apply(weights_init)
95 |
96 | def forward(self, x):
97 | x = self.pre_layer(x)
98 | label = F.sigmoid(self.conv4_1(x))
99 | offset = self.conv4_2(x)
100 | # landmark = self.conv4_3(x)
101 |
102 | if self.is_train is True:
103 | # label_loss = LossUtil.label_loss(self.gt_label,torch.squeeze(label))
104 | # bbox_loss = LossUtil.bbox_loss(self.gt_bbox,torch.squeeze(offset))
105 | return label,offset
106 | #landmark = self.conv4_3(x)
107 | return label, offset
108 |
109 |
110 |
111 |
112 |
113 | class RNet(nn.Module):
114 | ''' RNet '''
115 |
116 | def __init__(self,is_train=False, use_cuda=True):
117 | super(RNet, self).__init__()
118 | self.is_train = is_train
119 | self.use_cuda = use_cuda
120 | # backend
121 | self.pre_layer = nn.Sequential(
122 | nn.Conv2d(3, 28, kernel_size=3, stride=1), # conv1
123 | nn.PReLU(), # prelu1
124 | nn.MaxPool2d(kernel_size=3, stride=2), # pool1
125 | nn.Conv2d(28, 48, kernel_size=3, stride=1), # conv2
126 | nn.PReLU(), # prelu2
127 | nn.MaxPool2d(kernel_size=3, stride=2), # pool2
128 | nn.Conv2d(48, 64, kernel_size=2, stride=1), # conv3
129 | nn.PReLU() # prelu3
130 |
131 | )
132 | self.conv4 = nn.Linear(64*2*2, 128) # conv4
133 | self.prelu4 = nn.PReLU() # prelu4
134 | # detection
135 | self.conv5_1 = nn.Linear(128, 1)
136 | # bounding box regression
137 | self.conv5_2 = nn.Linear(128, 4)
138 | # lanbmark localization
139 | self.conv5_3 = nn.Linear(128, 10)
140 | # weight initiation weih xavier
141 | self.apply(weights_init)
142 |
143 | def forward(self, x):
144 | # backend
145 | x = self.pre_layer(x)
146 | x = x.view(x.size(0), -1)
147 | x = self.conv4(x)
148 | x = self.prelu4(x)
149 | # detection
150 | det = torch.sigmoid(self.conv5_1(x))
151 | box = self.conv5_2(x)
152 | # landmark = self.conv5_3(x)
153 |
154 | if self.is_train is True:
155 | return det, box
156 | #landmard = self.conv5_3(x)
157 | return det, box
158 |
159 |
160 |
161 |
162 | class ONet(nn.Module):
163 | ''' RNet '''
164 |
165 | def __init__(self,is_train=False, use_cuda=True):
166 | super(ONet, self).__init__()
167 | self.is_train = is_train
168 | self.use_cuda = use_cuda
169 | # backend
170 | self.pre_layer = nn.Sequential(
171 | nn.Conv2d(3, 32, kernel_size=3, stride=1), # conv1
172 | nn.PReLU(), # prelu1
173 | nn.MaxPool2d(kernel_size=3, stride=2), # pool1
174 | nn.Conv2d(32, 64, kernel_size=3, stride=1), # conv2
175 | nn.PReLU(), # prelu2
176 | nn.MaxPool2d(kernel_size=3, stride=2), # pool2
177 | nn.Conv2d(64, 64, kernel_size=3, stride=1), # conv3
178 | nn.PReLU(), # prelu3
179 | nn.MaxPool2d(kernel_size=2,stride=2), # pool3
180 | nn.Conv2d(64,128,kernel_size=2,stride=1), # conv4
181 | nn.PReLU() # prelu4
182 | )
183 | self.conv5 = nn.Linear(128*2*2, 256) # conv5
184 | self.prelu5 = nn.PReLU() # prelu5
185 | # detection
186 | self.conv6_1 = nn.Linear(256, 1)
187 | # bounding box regression
188 | self.conv6_2 = nn.Linear(256, 4)
189 | # lanbmark localization
190 | self.conv6_3 = nn.Linear(256, 10)
191 | # weight initiation weih xavier
192 | self.apply(weights_init)
193 |
194 | def forward(self, x):
195 | # backend
196 | x = self.pre_layer(x)
197 | x = x.view(x.size(0), -1)
198 | x = self.conv5(x)
199 | x = self.prelu5(x)
200 | # detection
201 | det = torch.sigmoid(self.conv6_1(x))
202 | box = self.conv6_2(x)
203 | landmark = self.conv6_3(x)
204 | if self.is_train is True:
205 | return det, box, landmark
206 | #landmard = self.conv5_3(x)
207 | return det, box, landmark
208 |
209 |
210 |
211 |
212 |
213 | # Residual Block
214 | class ResidualBlock(nn.Module):
215 | def __init__(self, in_channels, out_channels, stride=1, downsample=None):
216 | super(ResidualBlock, self).__init__()
217 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride)
218 | self.bn1 = nn.BatchNorm2d(out_channels)
219 | self.relu = nn.ReLU(inplace=True)
220 | self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride)
221 | self.bn2 = nn.BatchNorm2d(out_channels)
222 | self.downsample = downsample
223 |
224 | def forward(self, x):
225 | residual = x
226 | out = self.conv1(x)
227 | out = self.bn1(out)
228 | out = self.relu(out)
229 | out = self.conv2(out)
230 | out = self.bn2(out)
231 | if self.downsample:
232 | residual = self.downsample(x)
233 | out += residual
234 | out = self.relu(out)
235 | return out
236 |
237 |
238 |
239 | # ResNet Module
240 | class ResNet(nn.Module):
241 | def __init__(self, block, num_classes=10):
242 | super(ResNet, self).__init__()
243 | self.in_channels = 16
244 | self.conv = nn.Conv2d(3, 16,kernel_size=3)
245 | self.bn = nn.BatchNorm2d(16)
246 | self.relu = nn.ReLU(inplace=True)
247 | self.layer1 = self.make_layer(block, 16, 3)
248 | self.layer2 = self.make_layer(block, 32, 3, 2)
249 | self.layer3 = self.make_layer(block, 64, 3, 2)
250 | self.avg_pool = nn.AvgPool2d(8)
251 | self.fc = nn.Linear(64, num_classes)
252 |
253 | def make_layer(self, block, out_channels, blocks, stride=1):
254 | downsample = None
255 | if (stride != 1) or (self.in_channels != out_channels):
256 | downsample = nn.Sequential(
257 | nn.Conv2d(self.in_channels, out_channels, kernel_size=3, stride=stride),
258 | nn.BatchNorm2d(out_channels))
259 | layers = []
260 | layers.append(block(self.in_channels, out_channels, stride, downsample))
261 | self.in_channels = out_channels
262 | for i in range(1, blocks):
263 | layers.append(block(out_channels, out_channels))
264 | return nn.Sequential(*layers)
265 |
266 | def forward(self, x):
267 | out = self.conv(x)
268 | out = self.bn(out)
269 | out = self.relu(out)
270 | out = self.layer1(out)
271 | out = self.layer2(out)
272 | out = self.layer3(out)
273 | out = self.avg_pool(out)
274 | out = out.view(out.size(0), -1)
275 | out = self.fc(out)
276 | return out
--------------------------------------------------------------------------------
/dface/core/nms.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def torch_nms(dets, thresh, mode="Union"):
5 | """
6 | greedily select boxes with high confidence
7 | keep boxes overlap <= thresh
8 | rule out overlap > thresh
9 | :param dets: [[x1, y1, x2, y2 score]]
10 | :param thresh: retain overlap <= thresh
11 | :return: indexes to keep
12 | """
13 | x1 = dets[:, 0]
14 | y1 = dets[:, 1]
15 | x2 = dets[:, 2]
16 | y2 = dets[:, 3]
17 | scores = dets[:, 4]
18 |
19 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
20 | order = scores.argsort()[::-1]
21 |
22 | keep = []
23 | while order.size > 0:
24 | i = order[0]
25 | keep.append(i)
26 | xx1 = np.maximum(x1[i], x1[order[1:]])
27 | yy1 = np.maximum(y1[i], y1[order[1:]])
28 | xx2 = np.minimum(x2[i], x2[order[1:]])
29 | yy2 = np.minimum(y2[i], y2[order[1:]])
30 |
31 | w = np.maximum(0.0, xx2 - xx1 + 1)
32 | h = np.maximum(0.0, yy2 - yy1 + 1)
33 | inter = w * h
34 | if mode == "Union":
35 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
36 | elif mode == "Minimum":
37 | ovr = inter / np.minimum(areas[i], areas[order[1:]])
38 |
39 | inds = np.where(ovr <= thresh)[0]
40 | order = order[inds + 1]
41 |
42 | return keep
43 |
--------------------------------------------------------------------------------
/dface/core/resnet_inception_v2.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.utils.model_zoo as model_zoo
4 | import os
5 | import sys
6 |
7 |
8 | class BasicConv2d(nn.Module):
9 | def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
10 | super(BasicConv2d, self).__init__()
11 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
12 | bias=False) # verify bias false
13 | self.bn = nn.BatchNorm2d(out_planes, eps=0.001, momentum=0, affine=True)
14 | self.relu = nn.ReLU(inplace=False)
15 |
16 | def forward(self, x):
17 | x = self.conv(x)
18 | x = self.bn(x)
19 | x = self.relu(x)
20 | return x
21 |
22 |
23 | class Mixed_5b(nn.Module):
24 | def __init__(self):
25 | super(Mixed_5b, self).__init__()
26 |
27 | self.branch0 = BasicConv2d(192, 96, kernel_size=1, stride=1)
28 |
29 | self.branch1 = nn.Sequential(
30 | BasicConv2d(192, 48, kernel_size=1, stride=1),
31 | BasicConv2d(48, 64, kernel_size=5, stride=1, padding=2)
32 | )
33 |
34 | self.branch2 = nn.Sequential(
35 | BasicConv2d(192, 64, kernel_size=1, stride=1),
36 | BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),
37 | BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)
38 | )
39 |
40 | self.branch3 = nn.Sequential(
41 | nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),
42 | BasicConv2d(192, 64, kernel_size=1, stride=1)
43 | )
44 |
45 | def forward(self, x):
46 | x0 = self.branch0(x)
47 | x1 = self.branch1(x)
48 | x2 = self.branch2(x)
49 | x3 = self.branch3(x)
50 | out = torch.cat((x0, x1, x2, x3), 1)
51 | return out
52 |
53 |
54 | class Block35(nn.Module):
55 | def __init__(self, scale=1.0):
56 | super(Block35, self).__init__()
57 |
58 | self.scale = scale
59 |
60 | self.branch0 = BasicConv2d(320, 32, kernel_size=1, stride=1)
61 |
62 | self.branch1 = nn.Sequential(
63 | BasicConv2d(320, 32, kernel_size=1, stride=1),
64 | BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)
65 | )
66 |
67 | self.branch2 = nn.Sequential(
68 | BasicConv2d(320, 32, kernel_size=1, stride=1),
69 | BasicConv2d(32, 48, kernel_size=3, stride=1, padding=1),
70 | BasicConv2d(48, 64, kernel_size=3, stride=1, padding=1)
71 | )
72 |
73 | self.conv2d = nn.Conv2d(128, 320, kernel_size=1, stride=1)
74 | self.relu = nn.ReLU(inplace=False)
75 |
76 | def forward(self, x):
77 | x0 = self.branch0(x)
78 | x1 = self.branch1(x)
79 | x2 = self.branch2(x)
80 | out = torch.cat((x0, x1, x2), 1)
81 | out = self.conv2d(out)
82 | out = out * self.scale + x
83 | out = self.relu(out)
84 | return out
85 |
86 |
87 | class Mixed_6a(nn.Module):
88 | def __init__(self):
89 | super(Mixed_6a, self).__init__()
90 |
91 | self.branch0 = BasicConv2d(320, 384, kernel_size=3, stride=2)
92 |
93 | self.branch1 = nn.Sequential(
94 | BasicConv2d(320, 256, kernel_size=1, stride=1),
95 | BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1),
96 | BasicConv2d(256, 384, kernel_size=3, stride=2)
97 | )
98 |
99 | self.branch2 = nn.MaxPool2d(3, stride=2)
100 |
101 | def forward(self, x):
102 | x0 = self.branch0(x)
103 | x1 = self.branch1(x)
104 | x2 = self.branch2(x)
105 | out = torch.cat((x0, x1, x2), 1)
106 | return out
107 |
108 |
109 | class Block17(nn.Module):
110 | def __init__(self, scale=1.0):
111 | super(Block17, self).__init__()
112 |
113 | self.scale = scale
114 |
115 | self.branch0 = BasicConv2d(1088, 192, kernel_size=1, stride=1)
116 |
117 | self.branch1 = nn.Sequential(
118 | BasicConv2d(1088, 128, kernel_size=1, stride=1),
119 | BasicConv2d(128, 160, kernel_size=(1, 7), stride=1, padding=(0, 3)),
120 | BasicConv2d(160, 192, kernel_size=(7, 1), stride=1, padding=(3, 0))
121 | )
122 |
123 | self.conv2d = nn.Conv2d(384, 1088, kernel_size=1, stride=1)
124 | self.relu = nn.ReLU(inplace=False)
125 |
126 | def forward(self, x):
127 | x0 = self.branch0(x)
128 | x1 = self.branch1(x)
129 | out = torch.cat((x0, x1), 1)
130 | out = self.conv2d(out)
131 | out = out * self.scale + x
132 | out = self.relu(out)
133 | return out
134 |
135 |
136 | class Mixed_7a(nn.Module):
137 | def __init__(self):
138 | super(Mixed_7a, self).__init__()
139 |
140 | self.branch0 = nn.Sequential(
141 | BasicConv2d(1088, 256, kernel_size=1, stride=1),
142 | BasicConv2d(256, 384, kernel_size=3, stride=2)
143 | )
144 |
145 | self.branch1 = nn.Sequential(
146 | BasicConv2d(1088, 256, kernel_size=1, stride=1),
147 | BasicConv2d(256, 288, kernel_size=3, stride=2)
148 | )
149 |
150 | self.branch2 = nn.Sequential(
151 | BasicConv2d(1088, 256, kernel_size=1, stride=1),
152 | BasicConv2d(256, 288, kernel_size=3, stride=1, padding=1),
153 | BasicConv2d(288, 320, kernel_size=3, stride=2)
154 | )
155 |
156 | self.branch3 = nn.MaxPool2d(3, stride=2)
157 |
158 | def forward(self, x):
159 | x0 = self.branch0(x)
160 | x1 = self.branch1(x)
161 | x2 = self.branch2(x)
162 | x3 = self.branch3(x)
163 | out = torch.cat((x0, x1, x2, x3), 1)
164 | return out
165 |
166 |
167 | class Block8(nn.Module):
168 | def __init__(self, scale=1.0, noReLU=False):
169 | super(Block8, self).__init__()
170 |
171 | self.scale = scale
172 | self.noReLU = noReLU
173 |
174 | self.branch0 = BasicConv2d(2080, 192, kernel_size=1, stride=1)
175 |
176 | self.branch1 = nn.Sequential(
177 | BasicConv2d(2080, 192, kernel_size=1, stride=1),
178 | BasicConv2d(192, 224, kernel_size=(1, 3), stride=1, padding=(0, 1)),
179 | BasicConv2d(224, 256, kernel_size=(3, 1), stride=1, padding=(1, 0))
180 | )
181 |
182 | self.conv2d = nn.Conv2d(448, 2080, kernel_size=1, stride=1)
183 | if not self.noReLU:
184 | self.relu = nn.ReLU(inplace=False)
185 |
186 | def forward(self, x):
187 | x0 = self.branch0(x)
188 | x1 = self.branch1(x)
189 | out = torch.cat((x0, x1), 1)
190 | out = self.conv2d(out)
191 | out = out * self.scale + x
192 | if not self.noReLU:
193 | out = self.relu(out)
194 | return out
195 |
196 |
197 | class InceptionResnetV2(nn.Module):
198 | def __init__(self, num_classes=1001):
199 | super(InceptionResnetV2, self).__init__()
200 | self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2)
201 | self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1)
202 | self.conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1)
203 | self.maxpool_3a = nn.MaxPool2d(3, stride=2)
204 | self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1)
205 | self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1)
206 | self.maxpool_5a = nn.MaxPool2d(3, stride=2)
207 | self.mixed_5b = Mixed_5b()
208 | self.repeat = nn.Sequential(
209 | Block35(scale=0.17),
210 | Block35(scale=0.17),
211 | Block35(scale=0.17),
212 | Block35(scale=0.17),
213 | Block35(scale=0.17),
214 | Block35(scale=0.17),
215 | Block35(scale=0.17),
216 | Block35(scale=0.17),
217 | Block35(scale=0.17),
218 | Block35(scale=0.17)
219 | )
220 | self.mixed_6a = Mixed_6a()
221 | self.repeat_1 = nn.Sequential(
222 | Block17(scale=0.10),
223 | Block17(scale=0.10),
224 | Block17(scale=0.10),
225 | Block17(scale=0.10),
226 | Block17(scale=0.10),
227 | Block17(scale=0.10),
228 | Block17(scale=0.10),
229 | Block17(scale=0.10),
230 | Block17(scale=0.10),
231 | Block17(scale=0.10),
232 | Block17(scale=0.10),
233 | Block17(scale=0.10),
234 | Block17(scale=0.10),
235 | Block17(scale=0.10),
236 | Block17(scale=0.10),
237 | Block17(scale=0.10),
238 | Block17(scale=0.10),
239 | Block17(scale=0.10),
240 | Block17(scale=0.10),
241 | Block17(scale=0.10)
242 | )
243 | self.mixed_7a = Mixed_7a()
244 | self.repeat_2 = nn.Sequential(
245 | Block8(scale=0.20),
246 | Block8(scale=0.20),
247 | Block8(scale=0.20),
248 | Block8(scale=0.20),
249 | Block8(scale=0.20),
250 | Block8(scale=0.20),
251 | Block8(scale=0.20),
252 | Block8(scale=0.20),
253 | Block8(scale=0.20)
254 | )
255 | self.block8 = Block8(noReLU=True)
256 | self.conv2d_7b = BasicConv2d(2080, 1536, kernel_size=1, stride=1)
257 | self.avgpool_1a = nn.AvgPool2d(8, count_include_pad=False)
258 | self.classif = nn.Linear(1536, num_classes)
259 |
260 | def forward(self, x):
261 | x = self.conv2d_1a(x)
262 | x = self.conv2d_2a(x)
263 | x = self.conv2d_2b(x)
264 | x = self.maxpool_3a(x)
265 | x = self.conv2d_3b(x)
266 | x = self.conv2d_4a(x)
267 | x = self.maxpool_5a(x)
268 | x = self.mixed_5b(x)
269 | x = self.repeat(x)
270 | x = self.mixed_6a(x)
271 | x = self.repeat_1(x)
272 | x = self.mixed_7a(x)
273 | x = self.repeat_2(x)
274 | x = self.block8(x)
275 | x = self.conv2d_7b(x)
276 | x = self.avgpool_1a(x)
277 | x = x.view(x.size(0), -1)
278 | x = self.classif(x)
279 | return x
--------------------------------------------------------------------------------
/dface/core/roc.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
--------------------------------------------------------------------------------
/dface/core/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def IoU(box, boxes):
4 | """Compute IoU between detect box and gt boxes
5 |
6 | Parameters:
7 | ----------
8 | box: numpy array , shape (5, ): x1, y1, x2, y2, score
9 | input box
10 | boxes: numpy array, shape (n, 4): x1, y1, x2, y2
11 | input ground truth boxes
12 |
13 | Returns:
14 | -------
15 | ovr: numpy.array, shape (n, )
16 | IoU
17 | """
18 | box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)
19 | area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
20 | xx1 = np.maximum(box[0], boxes[:, 0])
21 | yy1 = np.maximum(box[1], boxes[:, 1])
22 | xx2 = np.minimum(box[2], boxes[:, 2])
23 | yy2 = np.minimum(box[3], boxes[:, 3])
24 |
25 | # compute the width and height of the bounding box
26 | w = np.maximum(0, xx2 - xx1 + 1)
27 | h = np.maximum(0, yy2 - yy1 + 1)
28 |
29 | inter = w * h
30 | ovr = np.true_divide(inter,(box_area + area - inter))
31 | #ovr = inter / (box_area + area - inter)
32 | return ovr
33 |
34 |
35 | def convert_to_square(bbox):
36 | """Convert bbox to square
37 |
38 | Parameters:
39 | ----------
40 | bbox: numpy array , shape n x 5
41 | input bbox
42 |
43 | Returns:
44 | -------
45 | square bbox
46 | """
47 | square_bbox = bbox.copy()
48 |
49 | h = bbox[:, 3] - bbox[:, 1] + 1
50 | w = bbox[:, 2] - bbox[:, 0] + 1
51 | max_side = np.maximum(h,w)
52 | square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5
53 | square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5
54 | square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1
55 | square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1
56 | return square_bbox
57 |
58 |
59 | def nms(dets, thresh, mode="Union"):
60 | """
61 | greedily select boxes with high confidence
62 | keep boxes overlap <= thresh
63 | rule out overlap > thresh
64 | :param dets: [[x1, y1, x2, y2 score]]
65 | :param thresh: retain overlap <= thresh
66 | :return: indexes to keep
67 | """
68 | x1 = dets[:, 0]
69 | y1 = dets[:, 1]
70 | x2 = dets[:, 2]
71 | y2 = dets[:, 3]
72 | scores = dets[:, 4]
73 |
74 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
75 | order = scores.argsort()[::-1]
76 |
77 | keep = []
78 | while order.size > 0:
79 | i = order[0]
80 | keep.append(i)
81 | xx1 = np.maximum(x1[i], x1[order[1:]])
82 | yy1 = np.maximum(y1[i], y1[order[1:]])
83 | xx2 = np.minimum(x2[i], x2[order[1:]])
84 | yy2 = np.minimum(y2[i], y2[order[1:]])
85 |
86 | w = np.maximum(0.0, xx2 - xx1 + 1)
87 | h = np.maximum(0.0, yy2 - yy1 + 1)
88 | inter = w * h
89 | if mode == "Union":
90 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
91 | elif mode == "Minimum":
92 | ovr = inter / np.minimum(areas[i], areas[order[1:]])
93 |
94 | inds = np.where(ovr <= thresh)[0]
95 | order = order[inds + 1]
96 |
97 | return keep
98 |
99 |
100 |
101 |
102 |
--------------------------------------------------------------------------------
/dface/core/vision.py:
--------------------------------------------------------------------------------
1 | from matplotlib.patches import Circle
2 |
3 |
4 | def vis_two(im_array, dets1, dets2, thresh=0.9):
5 | """Visualize detection results before and after calibration
6 |
7 | Parameters:
8 | ----------
9 | im_array: numpy.ndarray, shape(1, c, h, w)
10 | test image in rgb
11 | dets1: numpy.ndarray([[x1 y1 x2 y2 score]])
12 | detection results before calibration
13 | dets2: numpy.ndarray([[x1 y1 x2 y2 score]])
14 | detection results after calibration
15 | thresh: float
16 | boxes with scores > thresh will be drawn in red otherwise yellow
17 |
18 | Returns:
19 | -------
20 | """
21 | import matplotlib.pyplot as plt
22 | import random
23 |
24 | figure = plt.figure()
25 | plt.subplot(121)
26 | plt.imshow(im_array)
27 | color = 'yellow'
28 |
29 | for i in range(dets1.shape[0]):
30 | bbox = dets1[i, :4]
31 | landmarks = dets1[i, 5:]
32 | score = dets1[i, 4]
33 | if score > thresh:
34 | rect = plt.Rectangle((bbox[0], bbox[1]),
35 | bbox[2] - bbox[0],
36 | bbox[3] - bbox[1], fill=False,
37 | edgecolor='red', linewidth=0.7)
38 | plt.gca().add_patch(rect)
39 | landmarks = landmarks.reshape((5,2))
40 | for j in range(5):
41 | plt.scatter(landmarks[j,0],landmarks[j,1],c='yellow',linewidths=0.1, marker='x', s=5)
42 |
43 |
44 | # plt.gca().text(bbox[0], bbox[1] - 2,
45 | # '{:.3f}'.format(score),
46 | # bbox=dict(facecolor='blue', alpha=0.5), fontsize=12, color='white')
47 | # else:
48 | # rect = plt.Rectangle((bbox[0], bbox[1]),
49 | # bbox[2] - bbox[0],
50 | # bbox[3] - bbox[1], fill=False,
51 | # edgecolor=color, linewidth=0.5)
52 | # plt.gca().add_patch(rect)
53 |
54 | plt.subplot(122)
55 | plt.imshow(im_array)
56 | color = 'yellow'
57 |
58 | for i in range(dets2.shape[0]):
59 | bbox = dets2[i, :4]
60 | landmarks = dets1[i, 5:]
61 | score = dets2[i, 4]
62 | if score > thresh:
63 | rect = plt.Rectangle((bbox[0], bbox[1]),
64 | bbox[2] - bbox[0],
65 | bbox[3] - bbox[1], fill=False,
66 | edgecolor='red', linewidth=0.7)
67 | plt.gca().add_patch(rect)
68 |
69 | landmarks = landmarks.reshape((5, 2))
70 | for j in range(5):
71 | plt.scatter(landmarks[j, 0], landmarks[j, 1], c='yellow',linewidths=0.1, marker='x', s=5)
72 |
73 | # plt.gca().text(bbox[0], bbox[1] - 2,
74 | # '{:.3f}'.format(score),
75 | # bbox=dict(facecolor='blue', alpha=0.5), fontsize=12, color='white')
76 | # else:
77 | # rect = plt.Rectangle((bbox[0], bbox[1]),
78 | # bbox[2] - bbox[0],
79 | # bbox[3] - bbox[1], fill=False,
80 | # edgecolor=color, linewidth=0.5)
81 | # plt.gca().add_patch(rect)
82 | plt.show()
83 |
84 |
85 | def vis_face(im_array, dets, landmarks=None):
86 | """Visualize detection results before and after calibration
87 |
88 | Parameters:
89 | ----------
90 | im_array: numpy.ndarray, shape(1, c, h, w)
91 | test image in rgb
92 | dets1: numpy.ndarray([[x1 y1 x2 y2 score]])
93 | detection results before calibration
94 | dets2: numpy.ndarray([[x1 y1 x2 y2 score]])
95 | detection results after calibration
96 | thresh: float
97 | boxes with scores > thresh will be drawn in red otherwise yellow
98 |
99 | Returns:
100 | -------
101 | """
102 | import matplotlib.pyplot as plt
103 | import random
104 | import pylab
105 |
106 | figure = pylab.figure()
107 | # plt.subplot(121)
108 | pylab.imshow(im_array)
109 | figure.suptitle('DFace Detector', fontsize=20)
110 |
111 |
112 |
113 | for i in range(dets.shape[0]):
114 | bbox = dets[i, :4]
115 |
116 | rect = pylab.Rectangle((bbox[0], bbox[1]),
117 | bbox[2] - bbox[0],
118 | bbox[3] - bbox[1], fill=False,
119 | edgecolor='yellow', linewidth=0.9)
120 | pylab.gca().add_patch(rect)
121 |
122 | if landmarks is not None:
123 | for i in range(landmarks.shape[0]):
124 | landmarks_one = landmarks[i, :]
125 | landmarks_one = landmarks_one.reshape((5, 2))
126 | for j in range(5):
127 | # pylab.scatter(landmarks_one[j, 0], landmarks_one[j, 1], c='yellow', linewidths=0.1, marker='x', s=5)
128 |
129 | cir1 = Circle(xy=(landmarks_one[j, 0], landmarks_one[j, 1]), radius=2, alpha=0.4, color="red")
130 | pylab.gca().add_patch(cir1)
131 | # plt.gca().text(bbox[0], bbox[1] - 2,
132 | # '{:.3f}'.format(score),
133 | # bbox=dict(facecolor='blue', alpha=0.5), fontsize=12, color='white')
134 | # else:
135 | # rect = plt.Rectangle((bbox[0], bbox[1]),
136 | # bbox[2] - bbox[0],
137 | # bbox[3] - bbox[1], fill=False,
138 | # edgecolor=color, linewidth=0.5)
139 | # plt.gca().add_patch(rect)
140 |
141 | pylab.show()
--------------------------------------------------------------------------------
/dface/prepare_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/prepare_data/__init__.py
--------------------------------------------------------------------------------
/dface/prepare_data/assemble.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import numpy.random as npr
4 | import numpy as np
5 |
6 | def assemble_data(output_file, anno_file_list=[]):
7 | #assemble the annotations to one file
8 | size = 12
9 |
10 | if len(anno_file_list)==0:
11 | return 0
12 |
13 | if os.path.exists(output_file):
14 | os.remove(output_file)
15 |
16 | for anno_file in anno_file_list:
17 | with open(anno_file, 'r') as f:
18 | anno_lines = f.readlines()
19 |
20 | base_num = 250000
21 |
22 | if len(anno_lines) > base_num * 3:
23 | idx_keep = npr.choice(len(anno_lines), size=base_num * 3, replace=True)
24 | elif len(anno_lines) > 100000:
25 | idx_keep = npr.choice(len(anno_lines), size=len(anno_lines), replace=True)
26 | else:
27 | idx_keep = np.arange(len(anno_lines))
28 | np.random.shuffle(idx_keep)
29 | chose_count = 0
30 | with open(output_file, 'a+') as f:
31 | for idx in idx_keep:
32 | f.write(anno_lines[idx])
33 | chose_count+=1
34 |
35 | return chose_count
--------------------------------------------------------------------------------
/dface/prepare_data/assemble_onet_imglist.py:
--------------------------------------------------------------------------------
1 | import os
2 | import dface.config as config
3 | import dface.prepare_data.assemble as assemble
4 |
5 |
6 | if __name__ == '__main__':
7 |
8 | anno_list = []
9 |
10 | net_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_LANDMARK_ANNO_FILENAME)
11 | net_postive_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_POSTIVE_ANNO_FILENAME)
12 | net_part_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_PART_ANNO_FILENAME)
13 | net_neg_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_NEGATIVE_ANNO_FILENAME)
14 |
15 | anno_list.append(net_postive_file)
16 | anno_list.append(net_part_file)
17 | anno_list.append(net_neg_file)
18 | anno_list.append(net_landmark_file)
19 |
20 | imglist_filename = config.ONET_TRAIN_IMGLIST_FILENAME
21 | anno_dir = config.ANNO_STORE_DIR
22 | imglist_file = os.path.join(anno_dir, imglist_filename)
23 |
24 | chose_count = assemble.assemble_data(imglist_file ,anno_list)
25 | print("PNet train annotation result file path:%s" % imglist_file)
26 |
--------------------------------------------------------------------------------
/dface/prepare_data/assemble_pnet_imglist.py:
--------------------------------------------------------------------------------
1 | import os
2 | import dface.config as config
3 | import dface.prepare_data.assemble as assemble
4 |
5 |
6 | if __name__ == '__main__':
7 |
8 | anno_list = []
9 |
10 | # pnet_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_LANDMARK_ANNO_FILENAME)
11 | pnet_postive_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_POSTIVE_ANNO_FILENAME)
12 | pnet_part_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_PART_ANNO_FILENAME)
13 | pnet_neg_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_NEGATIVE_ANNO_FILENAME)
14 |
15 | anno_list.append(pnet_postive_file)
16 | anno_list.append(pnet_part_file)
17 | anno_list.append(pnet_neg_file)
18 | # anno_list.append(pnet_landmark_file)
19 |
20 | imglist_filename = config.PNET_TRAIN_IMGLIST_FILENAME
21 | anno_dir = config.ANNO_STORE_DIR
22 | imglist_file = os.path.join(anno_dir, imglist_filename)
23 |
24 | chose_count = assemble.assemble_data(imglist_file ,anno_list)
25 | print("PNet train annotation result file path:%s" % imglist_file)
26 |
--------------------------------------------------------------------------------
/dface/prepare_data/assemble_rnet_imglist.py:
--------------------------------------------------------------------------------
1 | import os
2 | import dface.config as config
3 | import dface.prepare_data.assemble as assemble
4 |
5 |
6 | if __name__ == '__main__':
7 |
8 | anno_list = []
9 |
10 | # pnet_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_LANDMARK_ANNO_FILENAME)
11 | pnet_postive_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_POSTIVE_ANNO_FILENAME)
12 | pnet_part_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_PART_ANNO_FILENAME)
13 | pnet_neg_file = os.path.join(config.ANNO_STORE_DIR,config.RNET_NEGATIVE_ANNO_FILENAME)
14 |
15 | anno_list.append(pnet_postive_file)
16 | anno_list.append(pnet_part_file)
17 | anno_list.append(pnet_neg_file)
18 | # anno_list.append(pnet_landmark_file)
19 |
20 | imglist_filename = config.RNET_TRAIN_IMGLIST_FILENAME
21 | anno_dir = config.ANNO_STORE_DIR
22 | imglist_file = os.path.join(anno_dir, imglist_filename)
23 |
24 | chose_count = assemble.assemble_data(imglist_file ,anno_list)
25 | print("PNet train annotation result file path:%s" % imglist_file)
26 |
--------------------------------------------------------------------------------
/dface/prepare_data/gen_Onet_train_data.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import cv2
4 | import numpy as np
5 | from dface.core.detect import MtcnnDetector,create_mtcnn_net
6 | from dface.core.imagedb import ImageDB
7 | from dface.core.image_reader import TestImageLoader
8 | import time
9 | import os
10 | import cPickle
11 | from dface.core.utils import convert_to_square,IoU
12 | import dface.config as config
13 | import dface.core.vision as vision
14 |
15 | def gen_onet_data(data_dir, anno_file, pnet_model_file, rnet_model_file, prefix_path='', use_cuda=True, vis=False):
16 |
17 |
18 | pnet, rnet, _ = create_mtcnn_net(p_model_path=pnet_model_file, r_model_path=rnet_model_file, use_cuda=use_cuda)
19 | mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, min_face_size=12)
20 |
21 | imagedb = ImageDB(anno_file,mode="test",prefix_path=prefix_path)
22 | imdb = imagedb.load_imdb()
23 | image_reader = TestImageLoader(imdb,1,False)
24 |
25 | all_boxes = list()
26 | batch_idx = 0
27 |
28 | for databatch in image_reader:
29 | if batch_idx % 100 == 0:
30 | print("%d images done" % batch_idx)
31 | im = databatch
32 |
33 | t = time.time()
34 |
35 | p_boxes, p_boxes_align = mtcnn_detector.detect_pnet(im=im)
36 |
37 | boxes, boxes_align = mtcnn_detector.detect_rnet(im=im, dets=p_boxes_align)
38 |
39 | if boxes_align is None:
40 | all_boxes.append(np.array([]))
41 | batch_idx += 1
42 | continue
43 | if vis:
44 | rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB)
45 | vision.vis_two(rgb_im, boxes, boxes_align)
46 |
47 | t1 = time.time() - t
48 | t = time.time()
49 | all_boxes.append(boxes_align)
50 | batch_idx += 1
51 |
52 | save_path = config.MODEL_STORE_DIR
53 |
54 | if not os.path.exists(save_path):
55 | os.mkdir(save_path)
56 |
57 | save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time()))
58 | with open(save_file, 'wb') as f:
59 | cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)
60 |
61 |
62 | gen_onet_sample_data(data_dir,anno_file,save_file,prefix_path)
63 |
64 |
65 |
66 |
67 |
68 |
69 | def gen_onet_sample_data(data_dir,anno_file,det_boxs_file,prefix):
70 |
71 | neg_save_dir = os.path.join(data_dir, "48/negative")
72 | pos_save_dir = os.path.join(data_dir, "48/positive")
73 | part_save_dir = os.path.join(data_dir, "48/part")
74 |
75 | for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]:
76 | if not os.path.exists(dir_path):
77 | os.makedirs(dir_path)
78 |
79 |
80 | # load ground truth from annotation file
81 | # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image
82 |
83 | with open(anno_file, 'r') as f:
84 | annotations = f.readlines()
85 |
86 | image_size = 48
87 | net = "onet"
88 |
89 | im_idx_list = list()
90 | gt_boxes_list = list()
91 | num_of_images = len(annotations)
92 | print("processing %d images in total" % num_of_images)
93 |
94 | for annotation in annotations:
95 | annotation = annotation.strip().split(' ')
96 | im_idx = os.path.join(prefix,annotation[0])
97 |
98 | boxes = map(float, annotation[1:])
99 | boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
100 | im_idx_list.append(im_idx)
101 | gt_boxes_list.append(boxes)
102 |
103 |
104 | save_path = config.ANNO_STORE_DIR
105 | if not os.path.exists(save_path):
106 | os.makedirs(save_path)
107 |
108 | f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w')
109 | f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w')
110 | f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w')
111 |
112 | det_handle = open(det_boxs_file, 'r')
113 |
114 | det_boxes = cPickle.load(det_handle)
115 | print(len(det_boxes), num_of_images)
116 | assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"
117 |
118 | # index of neg, pos and part face, used as their image names
119 | n_idx = 0
120 | p_idx = 0
121 | d_idx = 0
122 | image_done = 0
123 | for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
124 | if image_done % 100 == 0:
125 | print("%d images done" % image_done)
126 | image_done += 1
127 |
128 | if dets.shape[0] == 0:
129 | continue
130 | img = cv2.imread(im_idx)
131 | dets = convert_to_square(dets)
132 | dets[:, 0:4] = np.round(dets[:, 0:4])
133 |
134 | for box in dets:
135 | x_left, y_top, x_right, y_bottom = box[0:4].astype(int)
136 | width = x_right - x_left + 1
137 | height = y_bottom - y_top + 1
138 |
139 | # ignore box that is too small or beyond image border
140 | if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
141 | continue
142 |
143 | # compute intersection over union(IoU) between current box and all gt boxes
144 | Iou = IoU(box, gts)
145 | cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
146 | resized_im = cv2.resize(cropped_im, (image_size, image_size),
147 | interpolation=cv2.INTER_LINEAR)
148 |
149 | # save negative images and write label
150 | if np.max(Iou) < 0.3:
151 | # Iou with all gts must below 0.3
152 | save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
153 | f2.write(save_file + ' 0\n')
154 | cv2.imwrite(save_file, resized_im)
155 | n_idx += 1
156 | else:
157 | # find gt_box with the highest iou
158 | idx = np.argmax(Iou)
159 | assigned_gt = gts[idx]
160 | x1, y1, x2, y2 = assigned_gt
161 |
162 | # compute bbox reg label
163 | offset_x1 = (x1 - x_left) / float(width)
164 | offset_y1 = (y1 - y_top) / float(height)
165 | offset_x2 = (x2 - x_right) / float(width)
166 | offset_y2 = (y2 - y_bottom) / float(height)
167 |
168 | # save positive and part-face images and write labels
169 | if np.max(Iou) >= 0.65:
170 | save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
171 | f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
172 | offset_x1, offset_y1, offset_x2, offset_y2))
173 | cv2.imwrite(save_file, resized_im)
174 | p_idx += 1
175 |
176 | elif np.max(Iou) >= 0.4:
177 | save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
178 | f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
179 | offset_x1, offset_y1, offset_x2, offset_y2))
180 | cv2.imwrite(save_file, resized_im)
181 | d_idx += 1
182 | f1.close()
183 | f2.close()
184 | f3.close()
185 |
186 |
187 |
188 | def model_store_path():
189 | return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))+"/model_store"
190 |
191 |
192 |
193 | def parse_args():
194 | parser = argparse.ArgumentParser(description='Test mtcnn',
195 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
196 |
197 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark',
198 | default='../data/wider/', type=str)
199 | parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file',
200 | default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str)
201 | parser.add_argument('--pmodel_file', dest='pnet_model_file', help='PNet model file path',
202 | default='/idata/workspace/dface/model_store/pnet_epoch.pt', type=str)
203 | parser.add_argument('--rmodel_file', dest='rnet_model_file', help='RNet model file path',
204 | default='/idata/workspace/dface/model_store/rnet_epoch.pt', type=str)
205 | parser.add_argument('--gpu', dest='use_cuda', help='with gpu',
206 | default=config.USE_CUDA, type=bool)
207 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
208 | default='', type=str)
209 |
210 | args = parser.parse_args()
211 | return args
212 |
213 |
214 |
215 | if __name__ == '__main__':
216 | args = parse_args()
217 | gen_onet_data(args.traindata_store, args.annotation_file, args.pnet_model_file, args.rnet_model_file, args.prefix_path, args.use_cuda)
218 |
219 |
220 |
221 |
--------------------------------------------------------------------------------
/dface/prepare_data/gen_Pnet_train_data.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import numpy as np
3 | import cv2
4 | import os
5 | import numpy.random as npr
6 | from dface.core.utils import IoU
7 | import dface.config as config
8 |
9 | def gen_pnet_data(data_dir,anno_file,prefix):
10 |
11 | neg_save_dir = os.path.join(data_dir,"12/negative")
12 | pos_save_dir = os.path.join(data_dir,"12/positive")
13 | part_save_dir = os.path.join(data_dir,"12/part")
14 |
15 | for dir_path in [neg_save_dir,pos_save_dir,part_save_dir]:
16 | if not os.path.exists(dir_path):
17 | os.makedirs(dir_path)
18 |
19 | save_dir = os.path.join(data_dir,"pnet")
20 | if not os.path.exists(save_dir):
21 | os.mkdir(save_dir)
22 |
23 | post_save_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_POSTIVE_ANNO_FILENAME)
24 | neg_save_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_NEGATIVE_ANNO_FILENAME)
25 | part_save_file = os.path.join(config.ANNO_STORE_DIR,config.PNET_PART_ANNO_FILENAME)
26 |
27 | f1 = open(post_save_file, 'w')
28 | f2 = open(neg_save_file, 'w')
29 | f3 = open(part_save_file, 'w')
30 |
31 | with open(anno_file, 'r') as f:
32 | annotations = f.readlines()
33 |
34 | num = len(annotations)
35 | print("%d pics in total" % num)
36 | p_idx = 0
37 | n_idx = 0
38 | d_idx = 0
39 | idx = 0
40 | box_idx = 0
41 | for annotation in annotations:
42 | annotation = annotation.strip().split(' ')
43 | im_path = os.path.join(prefix,annotation[0])
44 | bbox = list(map(float, annotation[1:]))
45 | boxes = np.array(bbox, dtype=np.int32).reshape(-1, 4)
46 | img = cv2.imread(im_path)
47 | idx += 1
48 | if idx % 100 == 0:
49 | print(idx, "images done")
50 |
51 | height, width, channel = img.shape
52 |
53 | neg_num = 0
54 | while neg_num < 50:
55 | size = npr.randint(12, min(width, height) / 2)
56 | nx = npr.randint(0, width - size)
57 | ny = npr.randint(0, height - size)
58 | crop_box = np.array([nx, ny, nx + size, ny + size])
59 |
60 | Iou = IoU(crop_box, boxes)
61 |
62 | cropped_im = img[ny : ny + size, nx : nx + size, :]
63 | resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
64 |
65 | if np.max(Iou) < 0.3:
66 | # Iou with all gts must below 0.3
67 | save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx)
68 | f2.write(save_file + ' 0\n')
69 | cv2.imwrite(save_file, resized_im)
70 | n_idx += 1
71 | neg_num += 1
72 |
73 |
74 | for box in boxes:
75 | # box (x_left, y_top, x_right, y_bottom)
76 | x1, y1, x2, y2 = box
77 | w = x2 - x1 + 1
78 | h = y2 - y1 + 1
79 |
80 | # ignore small faces
81 | # in case the ground truth boxes of small faces are not accurate
82 | if max(w, h) < 40 or x1 < 0 or y1 < 0:
83 | continue
84 |
85 | # generate negative examples that have overlap with gt
86 | for i in range(5):
87 | size = npr.randint(12, min(width, height) / 2)
88 | # delta_x and delta_y are offsets of (x1, y1)
89 | delta_x = npr.randint(max(-size, -x1), w)
90 | delta_y = npr.randint(max(-size, -y1), h)
91 | nx1 = max(0, x1 + delta_x)
92 | ny1 = max(0, y1 + delta_y)
93 |
94 |
95 |
96 | if nx1 + size > width or ny1 + size > height:
97 | continue
98 | crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size])
99 | Iou = IoU(crop_box, boxes)
100 |
101 | cropped_im = img[ny1 : ny1 + size, nx1 : nx1 + size, :]
102 | resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
103 |
104 | if np.max(Iou) < 0.3:
105 | # Iou with all gts must below 0.3
106 | save_file = os.path.join(neg_save_dir, "%s.jpg"%n_idx)
107 | f2.write(save_file + ' 0\n')
108 | cv2.imwrite(save_file, resized_im)
109 | n_idx += 1
110 |
111 | # generate positive examples and part faces
112 | for i in range(20):
113 | size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))
114 |
115 | # delta here is the offset of box center
116 | delta_x = npr.randint(-w * 0.2, w * 0.2)
117 | delta_y = npr.randint(-h * 0.2, h * 0.2)
118 |
119 | nx1 = max(x1 + w / 2 + delta_x - size / 2, 0)
120 | ny1 = max(y1 + h / 2 + delta_y - size / 2, 0)
121 | nx2 = nx1 + size
122 | ny2 = ny1 + size
123 |
124 | if nx2 > width or ny2 > height:
125 | continue
126 | crop_box = np.array([nx1, ny1, nx2, ny2])
127 |
128 | offset_x1 = (x1 - nx1) / float(size)
129 | offset_y1 = (y1 - ny1) / float(size)
130 | offset_x2 = (x2 - nx2) / float(size)
131 | offset_y2 = (y2 - ny2) / float(size)
132 |
133 | cropped_im = img[ny1 : ny2, nx1 : nx2, :]
134 | resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR)
135 |
136 | box_ = box.reshape(1, -1)
137 | if IoU(crop_box, box_) >= 0.65:
138 | save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx)
139 | f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
140 | cv2.imwrite(save_file, resized_im)
141 | p_idx += 1
142 | elif IoU(crop_box, box_) >= 0.4:
143 | save_file = os.path.join(part_save_dir, "%s.jpg"%d_idx)
144 | f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n'%(offset_x1, offset_y1, offset_x2, offset_y2))
145 | cv2.imwrite(save_file, resized_im)
146 | d_idx += 1
147 | box_idx += 1
148 | print("%s images done, pos: %s part: %s neg: %s"%(idx, p_idx, d_idx, n_idx))
149 |
150 | f1.close()
151 | f2.close()
152 | f3.close()
153 |
154 |
155 |
156 | def parse_args():
157 | parser = argparse.ArgumentParser(description='Test mtcnn',
158 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
159 |
160 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark',
161 | default='../data/wider/', type=str)
162 | parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file',
163 | default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str)
164 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
165 | default='', type=str)
166 |
167 |
168 |
169 |
170 | args = parser.parse_args()
171 | return args
172 |
173 | if __name__ == '__main__':
174 | args = parse_args()
175 | gen_pnet_data(args.traindata_store,args.annotation_file,args.prefix_path)
176 |
--------------------------------------------------------------------------------
/dface/prepare_data/gen_Rnet_train_data.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 |
4 | import cv2
5 | import numpy as np
6 | from dface.core.detect import MtcnnDetector,create_mtcnn_net
7 | from dface.core.imagedb import ImageDB
8 | from dface.core.image_reader import TestImageLoader
9 | import time
10 | import os
11 | import cPickle
12 | from dface.core.utils import convert_to_square,IoU
13 | import dface.config as config
14 | import dface.core.vision as vision
15 |
16 | def gen_rnet_data(data_dir, anno_file, pnet_model_file, prefix_path='', use_cuda=True, vis=False):
17 |
18 |
19 | pnet, _, _ = create_mtcnn_net(p_model_path=pnet_model_file, use_cuda=use_cuda)
20 | mtcnn_detector = MtcnnDetector(pnet=pnet,min_face_size=12)
21 |
22 | imagedb = ImageDB(anno_file,mode="test",prefix_path=prefix_path)
23 | imdb = imagedb.load_imdb()
24 | image_reader = TestImageLoader(imdb,1,False)
25 |
26 | all_boxes = list()
27 | batch_idx = 0
28 |
29 | for databatch in image_reader:
30 | if batch_idx % 100 == 0:
31 | print ("%d images done" % batch_idx)
32 | im = databatch
33 |
34 | t = time.time()
35 |
36 | boxes, boxes_align = mtcnn_detector.detect_pnet(im=im)
37 | if boxes_align is None:
38 | all_boxes.append(np.array([]))
39 | batch_idx += 1
40 | continue
41 | if vis:
42 | rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB)
43 | vision.vis_two(rgb_im, boxes, boxes_align)
44 |
45 | t1 = time.time() - t
46 | t = time.time()
47 | all_boxes.append(boxes_align)
48 | batch_idx += 1
49 |
50 | # save_path = model_store_path()
51 | save_path = config.MODEL_STORE_DIR
52 |
53 | if not os.path.exists(save_path):
54 | os.mkdir(save_path)
55 |
56 | save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time()))
57 | with open(save_file, 'wb') as f:
58 | cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)
59 |
60 |
61 | gen_rnet_sample_data(data_dir,anno_file,save_file,prefix_path)
62 |
63 |
64 |
65 | def gen_rnet_sample_data(data_dir,anno_file,det_boxs_file,prefix_path):
66 |
67 | neg_save_dir = os.path.join(data_dir, "24/negative")
68 | pos_save_dir = os.path.join(data_dir, "24/positive")
69 | part_save_dir = os.path.join(data_dir, "24/part")
70 |
71 | for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]:
72 | if not os.path.exists(dir_path):
73 | os.makedirs(dir_path)
74 |
75 |
76 | # load ground truth from annotation file
77 | # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image
78 |
79 | with open(anno_file, 'r') as f:
80 | annotations = f.readlines()
81 |
82 | image_size = 24
83 | net = "rnet"
84 |
85 | im_idx_list = list()
86 | gt_boxes_list = list()
87 | num_of_images = len(annotations)
88 | print ("processing %d images in total" % num_of_images)
89 |
90 | for annotation in annotations:
91 | annotation = annotation.strip().split(' ')
92 | im_idx = os.path.join(prefix_path,annotation[0])
93 |
94 | boxes = map(float, annotation[1:])
95 | boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
96 | im_idx_list.append(im_idx)
97 | gt_boxes_list.append(boxes)
98 |
99 |
100 | save_path = config.ANNO_STORE_DIR
101 | if not os.path.exists(save_path):
102 | os.makedirs(save_path)
103 |
104 | f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w')
105 | f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w')
106 | f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w')
107 |
108 | det_handle = open(det_boxs_file, 'r')
109 |
110 | det_boxes = cPickle.load(det_handle)
111 | print(len(det_boxes), num_of_images)
112 | assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"
113 |
114 | # index of neg, pos and part face, used as their image names
115 | n_idx = 0
116 | p_idx = 0
117 | d_idx = 0
118 | image_done = 0
119 | for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
120 | if image_done % 100 == 0:
121 | print("%d images done" % image_done)
122 | image_done += 1
123 |
124 | if dets.shape[0] == 0:
125 | continue
126 | img = cv2.imread(im_idx)
127 | dets = convert_to_square(dets)
128 | dets[:, 0:4] = np.round(dets[:, 0:4])
129 |
130 | for box in dets:
131 | x_left, y_top, x_right, y_bottom = box[0:4].astype(int)
132 | width = x_right - x_left + 1
133 | height = y_bottom - y_top + 1
134 |
135 | # ignore box that is too small or beyond image border
136 | if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
137 | continue
138 |
139 | # compute intersection over union(IoU) between current box and all gt boxes
140 | Iou = IoU(box, gts)
141 | cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
142 | resized_im = cv2.resize(cropped_im, (image_size, image_size),
143 | interpolation=cv2.INTER_LINEAR)
144 |
145 | # save negative images and write label
146 | if np.max(Iou) < 0.3:
147 | # Iou with all gts must below 0.3
148 | save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
149 | f2.write(save_file + ' 0\n')
150 | cv2.imwrite(save_file, resized_im)
151 | n_idx += 1
152 | else:
153 | # find gt_box with the highest iou
154 | idx = np.argmax(Iou)
155 | assigned_gt = gts[idx]
156 | x1, y1, x2, y2 = assigned_gt
157 |
158 | # compute bbox reg label
159 | offset_x1 = (x1 - x_left) / float(width)
160 | offset_y1 = (y1 - y_top) / float(height)
161 | offset_x2 = (x2 - x_right) / float(width)
162 | offset_y2 = (y2 - y_bottom) / float(height)
163 |
164 | # save positive and part-face images and write labels
165 | if np.max(Iou) >= 0.65:
166 | save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
167 | f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
168 | offset_x1, offset_y1, offset_x2, offset_y2))
169 | cv2.imwrite(save_file, resized_im)
170 | p_idx += 1
171 |
172 | elif np.max(Iou) >= 0.4:
173 | save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
174 | f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
175 | offset_x1, offset_y1, offset_x2, offset_y2))
176 | cv2.imwrite(save_file, resized_im)
177 | d_idx += 1
178 | f1.close()
179 | f2.close()
180 | f3.close()
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 | def model_store_path():
189 | return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))+"/model_store"
190 |
191 |
192 |
193 | def parse_args():
194 | parser = argparse.ArgumentParser(description='Test mtcnn',
195 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
196 |
197 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark',
198 | default='../data/wider/', type=str)
199 | parser.add_argument('--anno_file', dest='annotation_file', help='wider face original annotation file',
200 | default=os.path.join(config.ANNO_STORE_DIR,"wider_origin_anno.txt"), type=str)
201 | parser.add_argument('--pmodel_file', dest='pnet_model_file', help='PNet model file path',
202 | default='/idata/workspace/dface/model_store/pnet_epoch.pt', type=str)
203 | parser.add_argument('--gpu', dest='use_cuda', help='with gpu',
204 | default=config.USE_CUDA, type=bool)
205 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
206 | default='', type=str)
207 |
208 |
209 | args = parser.parse_args()
210 | return args
211 |
212 |
213 |
214 | if __name__ == '__main__':
215 | args = parse_args()
216 | gen_rnet_data(args.traindata_store, args.annotation_file, args.pnet_model_file, args.prefix_path, args.use_cuda)
217 |
218 |
219 |
220 |
--------------------------------------------------------------------------------
/dface/prepare_data/gen_landmark_12.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 | import cv2
4 | import numpy as np
5 | import sys
6 | import numpy.random as npr
7 | import argparse
8 | import dface.config as config
9 | import dface.core.utils as utils
10 |
11 |
12 | def gen_data(anno_file, data_dir, prefix):
13 |
14 |
15 | size = 12
16 | image_id = 0
17 |
18 | landmark_imgs_save_dir = os.path.join(data_dir,"12/landmark")
19 | if not os.path.exists(landmark_imgs_save_dir):
20 | os.makedirs(landmark_imgs_save_dir)
21 |
22 | anno_dir = config.ANNO_STORE_DIR
23 | if not os.path.exists(anno_dir):
24 | os.makedirs(anno_dir)
25 |
26 | landmark_anno_filename = config.PNET_LANDMARK_ANNO_FILENAME
27 | save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename)
28 |
29 | f = open(save_landmark_anno, 'w')
30 | # dstdir = "train_landmark_few"
31 |
32 |
33 | with open(anno_file, 'r') as f2:
34 | annotations = f2.readlines()
35 |
36 | num = len(annotations)
37 | print("%d pics in total" % num)
38 |
39 | l_idx =0
40 | idx = 0
41 | # image_path bbox landmark(5*2)
42 | for annotation in annotations:
43 | # print imgPath
44 |
45 | annotation = annotation.strip().split(' ')
46 |
47 | assert len(annotation)==15,"each line should have 15 element"
48 |
49 | im_path = os.path.join(prefix,annotation[0].replace("\\", "/"))
50 |
51 | gt_box = map(float, annotation[1:5])
52 | gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]]
53 |
54 |
55 | gt_box = np.array(gt_box, dtype=np.int32)
56 |
57 |
58 |
59 | landmark = bbox = map(float, annotation[5:])
60 | landmark = np.array(landmark, dtype=np.float)
61 |
62 | img = cv2.imread(im_path)
63 | assert (img is not None)
64 |
65 | height, width, channel = img.shape
66 | # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1]
67 | # crop_face = cv2.resize(crop_face,(size,size))
68 |
69 | idx = idx + 1
70 | if idx % 100 == 0:
71 | print("%d images done, landmark images: %d"%(idx,l_idx))
72 |
73 | x1, y1, x2, y2 = gt_box
74 |
75 | # gt's width
76 | w = x2 - x1 + 1
77 | # gt's height
78 | h = y2 - y1 + 1
79 | if max(w, h) < 40 or x1 < 0 or y1 < 0:
80 | continue
81 | # random shift
82 | for i in range(10):
83 | bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))
84 | delta_x = npr.randint(-w * 0.2, w * 0.2)
85 | delta_y = npr.randint(-h * 0.2, h * 0.2)
86 | nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0)
87 | ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0)
88 |
89 | nx2 = nx1 + bbox_size
90 | ny2 = ny1 + bbox_size
91 | if nx2 > width or ny2 > height:
92 | continue
93 | crop_box = np.array([nx1, ny1, nx2, ny2])
94 | cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :]
95 | resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR)
96 |
97 | offset_x1 = (x1 - nx1) / float(bbox_size)
98 | offset_y1 = (y1 - ny1) / float(bbox_size)
99 | offset_x2 = (x2 - nx2) / float(bbox_size)
100 | offset_y2 = (y2 - ny2) / float(bbox_size)
101 |
102 | offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size)
103 | offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size)
104 |
105 | offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size)
106 | offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size)
107 |
108 | offset_nose_x = (landmark[4] - nx1) / float(bbox_size)
109 | offset_nose_y = (landmark[5] - ny1) / float(bbox_size)
110 |
111 | offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size)
112 | offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size)
113 |
114 | offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size)
115 | offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size)
116 |
117 |
118 | # cal iou
119 | iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0))
120 | if iou > 0.65:
121 | save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx)
122 | cv2.imwrite(save_file, resized_im)
123 |
124 | f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \
125 | (offset_x1, offset_y1, offset_x2, offset_y2, \
126 | offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y))
127 |
128 | l_idx += 1
129 |
130 |
131 | f.close()
132 |
133 |
134 |
135 |
136 | def parse_args():
137 | parser = argparse.ArgumentParser(description='Test mtcnn',
138 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
139 |
140 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark',
141 | default='../data/wider/', type=str)
142 | parser.add_argument('--anno_file', dest='annotation_file', help='celeba dataset original annotation file',
143 | default='../data/wider/anno.txt', type=str)
144 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
145 | default='../data/', type=str)
146 |
147 |
148 | args = parser.parse_args()
149 | return args
150 |
151 | if __name__ == '__main__':
152 | args = parse_args()
153 |
154 | gen_data(args.annotation_file, args.traindata_store, args.prefix_path)
155 |
156 |
157 |
--------------------------------------------------------------------------------
/dface/prepare_data/gen_landmark_24.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 | import cv2
4 | import numpy as np
5 | import random
6 | import sys
7 | import numpy.random as npr
8 | import argparse
9 | import dface.config as config
10 | import dface.core.utils as utils
11 |
12 |
13 |
14 | def gen_data(anno_file, data_dir, prefix):
15 |
16 |
17 | size = 24
18 | image_id = 0
19 |
20 | landmark_imgs_save_dir = os.path.join(data_dir,"24/landmark")
21 | if not os.path.exists(landmark_imgs_save_dir):
22 | os.makedirs(landmark_imgs_save_dir)
23 |
24 | anno_dir = config.ANNO_STORE_DIR
25 | if not os.path.exists(anno_dir):
26 | os.makedirs(anno_dir)
27 |
28 | landmark_anno_filename = config.RNET_LANDMARK_ANNO_FILENAME
29 | save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename)
30 |
31 | f = open(save_landmark_anno, 'w')
32 | # dstdir = "train_landmark_few"
33 |
34 | with open(anno_file, 'r') as f2:
35 | annotations = f2.readlines()
36 |
37 | num = len(annotations)
38 | print("%d total images" % num)
39 |
40 | l_idx =0
41 | idx = 0
42 | # image_path bbox landmark(5*2)
43 | for annotation in annotations:
44 | # print imgPath
45 |
46 | annotation = annotation.strip().split(' ')
47 |
48 | assert len(annotation)==15,"each line should have 15 element"
49 |
50 | im_path = os.path.join(prefix,annotation[0].replace("\\", "/"))
51 |
52 | gt_box = map(float, annotation[1:5])
53 | gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]]
54 |
55 |
56 | gt_box = np.array(gt_box, dtype=np.int32)
57 |
58 | landmark = map(float, annotation[5:])
59 | landmark = np.array(landmark, dtype=np.float)
60 |
61 | img = cv2.imread(im_path)
62 | assert (img is not None)
63 |
64 | height, width, channel = img.shape
65 | # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1]
66 | # crop_face = cv2.resize(crop_face,(size,size))
67 |
68 | idx = idx + 1
69 | if idx % 100 == 0:
70 | print("%d images done, landmark images: %d"%(idx,l_idx))
71 |
72 | x1, y1, x2, y2 = gt_box
73 |
74 | # gt's width
75 | w = x2 - x1 + 1
76 | # gt's height
77 | h = y2 - y1 + 1
78 | if max(w, h) < 40 or x1 < 0 or y1 < 0:
79 | continue
80 | # random shift
81 | for i in range(10):
82 | bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))
83 | delta_x = npr.randint(-w * 0.2, w * 0.2)
84 | delta_y = npr.randint(-h * 0.2, h * 0.2)
85 | nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0)
86 | ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0)
87 |
88 | nx2 = nx1 + bbox_size
89 | ny2 = ny1 + bbox_size
90 | if nx2 > width or ny2 > height:
91 | continue
92 | crop_box = np.array([nx1, ny1, nx2, ny2])
93 | cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :]
94 | resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR)
95 |
96 | offset_x1 = (x1 - nx1) / float(bbox_size)
97 | offset_y1 = (y1 - ny1) / float(bbox_size)
98 | offset_x2 = (x2 - nx2) / float(bbox_size)
99 | offset_y2 = (y2 - ny2) / float(bbox_size)
100 |
101 | offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size)
102 | offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size)
103 |
104 | offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size)
105 | offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size)
106 |
107 | offset_nose_x = (landmark[4] - nx1) / float(bbox_size)
108 | offset_nose_y = (landmark[5] - ny1) / float(bbox_size)
109 |
110 | offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size)
111 | offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size)
112 |
113 | offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size)
114 | offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size)
115 |
116 |
117 | # cal iou
118 | iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0))
119 | if iou > 0.65:
120 | save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx)
121 | cv2.imwrite(save_file, resized_im)
122 |
123 | f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \
124 | (offset_x1, offset_y1, offset_x2, offset_y2, \
125 | offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y))
126 |
127 | l_idx += 1
128 |
129 | f.close()
130 |
131 |
132 |
133 |
134 | def parse_args():
135 | parser = argparse.ArgumentParser(description='Test mtcnn',
136 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
137 |
138 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark',
139 | default='/idata/data/wider/', type=str)
140 | parser.add_argument('--anno_file', dest='annotation_file', help='celeba dataset original annotation file',
141 | default='/idata/data/trainImageList.txt', type=str)
142 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
143 | default='/idata/data', type=str)
144 |
145 |
146 | args = parser.parse_args()
147 | return args
148 |
149 | if __name__ == '__main__':
150 | args = parse_args()
151 |
152 | gen_data(args.annotation_file, args.traindata_store, args.prefix_path)
153 |
154 |
155 |
--------------------------------------------------------------------------------
/dface/prepare_data/gen_landmark_48.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 | import cv2
4 | import numpy as np
5 | import random
6 | import sys
7 | import numpy.random as npr
8 | import argparse
9 | import dface.config as config
10 | import dface.core.utils as utils
11 |
12 |
13 | def gen_data(anno_file, data_dir, prefix):
14 |
15 |
16 | size = 48
17 | image_id = 0
18 |
19 | landmark_imgs_save_dir = os.path.join(data_dir,"48/landmark")
20 | if not os.path.exists(landmark_imgs_save_dir):
21 | os.makedirs(landmark_imgs_save_dir)
22 |
23 | anno_dir = config.ANNO_STORE_DIR
24 | if not os.path.exists(anno_dir):
25 | os.makedirs(anno_dir)
26 |
27 | landmark_anno_filename = config.ONET_LANDMARK_ANNO_FILENAME
28 | save_landmark_anno = os.path.join(anno_dir,landmark_anno_filename)
29 |
30 | f = open(save_landmark_anno, 'w')
31 | # dstdir = "train_landmark_few"
32 |
33 | with open(anno_file, 'r') as f2:
34 | annotations = f2.readlines()
35 |
36 | num = len(annotations)
37 | print("%d total images" % num)
38 |
39 | l_idx =0
40 | idx = 0
41 | # image_path bbox landmark(5*2)
42 | for annotation in annotations:
43 | # print imgPath
44 |
45 | annotation = annotation.strip().split(' ')
46 |
47 | assert len(annotation)==15,"each line should have 15 element"
48 |
49 | im_path = os.path.join(prefix,annotation[0].replace("\\", "/"))
50 |
51 | gt_box = map(float, annotation[1:5])
52 | # gt_box = [gt_box[0], gt_box[2], gt_box[1], gt_box[3]]
53 |
54 |
55 | gt_box = np.array(gt_box, dtype=np.int32)
56 |
57 | landmark = map(float, annotation[5:])
58 | landmark = np.array(landmark, dtype=np.float)
59 |
60 | img = cv2.imread(im_path)
61 | assert (img is not None)
62 |
63 | height, width, channel = img.shape
64 | # crop_face = img[gt_box[1]:gt_box[3]+1, gt_box[0]:gt_box[2]+1]
65 | # crop_face = cv2.resize(crop_face,(size,size))
66 |
67 | idx = idx + 1
68 | if idx % 100 == 0:
69 | print("%d images done, landmark images: %d"%(idx,l_idx))
70 |
71 | x1, y1, x2, y2 = gt_box
72 |
73 | # gt's width
74 | w = x2 - x1 + 1
75 | # gt's height
76 | h = y2 - y1 + 1
77 | if max(w, h) < 40 or x1 < 0 or y1 < 0:
78 | continue
79 | # random shift
80 | for i in range(10):
81 | bbox_size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h)))
82 | delta_x = npr.randint(-w * 0.2, w * 0.2)
83 | delta_y = npr.randint(-h * 0.2, h * 0.2)
84 | nx1 = max(x1 + w / 2 - bbox_size / 2 + delta_x, 0)
85 | ny1 = max(y1 + h / 2 - bbox_size / 2 + delta_y, 0)
86 |
87 | nx2 = nx1 + bbox_size
88 | ny2 = ny1 + bbox_size
89 | if nx2 > width or ny2 > height:
90 | continue
91 | crop_box = np.array([nx1, ny1, nx2, ny2])
92 | cropped_im = img[ny1:ny2 + 1, nx1:nx2 + 1, :]
93 | resized_im = cv2.resize(cropped_im, (size, size),interpolation=cv2.INTER_LINEAR)
94 |
95 | offset_x1 = (x1 - nx1) / float(bbox_size)
96 | offset_y1 = (y1 - ny1) / float(bbox_size)
97 | offset_x2 = (x2 - nx2) / float(bbox_size)
98 | offset_y2 = (y2 - ny2) / float(bbox_size)
99 |
100 | offset_left_eye_x = (landmark[0] - nx1) / float(bbox_size)
101 | offset_left_eye_y = (landmark[1] - ny1) / float(bbox_size)
102 |
103 | offset_right_eye_x = (landmark[2] - nx1) / float(bbox_size)
104 | offset_right_eye_y = (landmark[3] - ny1) / float(bbox_size)
105 |
106 | offset_nose_x = (landmark[4] - nx1) / float(bbox_size)
107 | offset_nose_y = (landmark[5] - ny1) / float(bbox_size)
108 |
109 | offset_left_mouth_x = (landmark[6] - nx1) / float(bbox_size)
110 | offset_left_mouth_y = (landmark[7] - ny1) / float(bbox_size)
111 |
112 | offset_right_mouth_x = (landmark[8] - nx1) / float(bbox_size)
113 | offset_right_mouth_y = (landmark[9] - ny1) / float(bbox_size)
114 |
115 |
116 | # cal iou
117 | iou = utils.IoU(crop_box.astype(np.float), np.expand_dims(gt_box.astype(np.float), 0))
118 | if iou > 0.65:
119 | save_file = os.path.join(landmark_imgs_save_dir, "%s.jpg" % l_idx)
120 | cv2.imwrite(save_file, resized_im)
121 |
122 | f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \
123 | (offset_x1, offset_y1, offset_x2, offset_y2, \
124 | offset_left_eye_x,offset_left_eye_y,offset_right_eye_x,offset_right_eye_y,offset_nose_x,offset_nose_y,offset_left_mouth_x,offset_left_mouth_y,offset_right_mouth_x,offset_right_mouth_y))
125 |
126 | l_idx += 1
127 |
128 | f.close()
129 |
130 |
131 |
132 |
133 | def parse_args():
134 | parser = argparse.ArgumentParser(description='Test mtcnn',
135 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
136 |
137 | parser.add_argument('--dface_traindata_store', dest='traindata_store', help='dface train data temporary folder,include 12,24,48/postive,negative,part,landmark',
138 | default='/idata/data/wider/', type=str)
139 | parser.add_argument('--anno_file', dest='annotation_file', help='celeba dataset original annotation file',
140 | default='/idata/data/trainImageList.txt', type=str)
141 | parser.add_argument('--prefix_path', dest='prefix_path', help='annotation file image prefix root path',
142 | default='/idata/data', type=str)
143 |
144 |
145 | args = parser.parse_args()
146 | return args
147 |
148 | if __name__ == '__main__':
149 | args = parse_args()
150 |
151 | gen_data(args.annotation_file, args.traindata_store, args.prefix_path)
152 |
153 |
154 |
--------------------------------------------------------------------------------
/dface/prepare_data/gen_landmark_net_48.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import cv2
4 | import numpy as np
5 | from core.detect import MtcnnDetector,create_mtcnn_net
6 | from core.imagedb import ImageDB
7 | from core.image_reader import TestImageLoader
8 | import time
9 | import os
10 | import cPickle
11 | from dface.core.utils import convert_to_square,IoU
12 | import dface.config as config
13 | import dface.core.vision as vision
14 |
15 | def gen_landmark48_data(data_dir, anno_file, pnet_model_file, rnet_model_file, prefix_path='', use_cuda=True, vis=False):
16 |
17 |
18 | pnet, rnet, _ = create_mtcnn_net(p_model_path=pnet_model_file, r_model_path=rnet_model_file, use_cuda=use_cuda)
19 | mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, min_face_size=12)
20 |
21 | imagedb = ImageDB(anno_file,mode="test",prefix_path=prefix_path)
22 | imdb = imagedb.load_imdb()
23 | image_reader = TestImageLoader(imdb,1,False)
24 |
25 | all_boxes = list()
26 | batch_idx = 0
27 |
28 | for databatch in image_reader:
29 | if batch_idx % 100 == 0:
30 | print("%d images done" % batch_idx)
31 | im = databatch
32 |
33 |
34 | if im.shape[0] >= 1200 or im.shape[1] >=1200:
35 | all_boxes.append(np.array([]))
36 | batch_idx += 1
37 | continue
38 |
39 |
40 | t = time.time()
41 |
42 | p_boxes, p_boxes_align = mtcnn_detector.detect_pnet(im=im)
43 |
44 | boxes, boxes_align = mtcnn_detector.detect_rnet(im=im, dets=p_boxes_align)
45 |
46 | if boxes_align is None:
47 | all_boxes.append(np.array([]))
48 | batch_idx += 1
49 | continue
50 | if vis:
51 | rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB)
52 | vision.vis_two(rgb_im, boxes, boxes_align)
53 |
54 | t1 = time.time() - t
55 | t = time.time()
56 | all_boxes.append(boxes_align)
57 | batch_idx += 1
58 |
59 | save_path = config.MODEL_STORE_DIR
60 |
61 | if not os.path.exists(save_path):
62 | os.mkdir(save_path)
63 |
64 | save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time()))
65 | with open(save_file, 'wb') as f:
66 | cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)
67 |
68 |
69 | gen_sample_data(data_dir,anno_file,save_file, prefix_path)
70 |
71 |
72 |
73 | def gen_sample_data(data_dir, anno_file, det_boxs_file, prefix_path =''):
74 |
75 | landmark_save_dir = os.path.join(data_dir, "48/landmark")
76 |
77 | if not os.path.exists(landmark_save_dir):
78 | os.makedirs(landmark_save_dir)
79 |
80 |
81 | # load ground truth from annotation file
82 | # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image
83 |
84 | with open(anno_file, 'r') as f:
85 | annotations = f.readlines()
86 |
87 | image_size = 48
88 | net = "onet"
89 |
90 | im_idx_list = list()
91 | gt_boxes_list = list()
92 | gt_landmark_list = list()
93 | num_of_images = len(annotations)
94 | print("processing %d images in total" % num_of_images)
95 |
96 | for annotation in annotations:
97 | annotation = annotation.strip().split(' ')
98 | im_idx = annotation[0]
99 |
100 | boxes = map(float, annotation[1:5])
101 | boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
102 | landmarks = map(float, annotation[5:])
103 | landmarks = np.array(landmarks, dtype=np.float32).reshape(-1, 10)
104 |
105 | im_idx_list.append(im_idx)
106 | gt_boxes_list.append(boxes)
107 | gt_landmark_list.append(landmarks)
108 |
109 |
110 | save_path = config.ANNO_STORE_DIR
111 | if not os.path.exists(save_path):
112 | os.makedirs(save_path)
113 |
114 | f = open(os.path.join(save_path, 'landmark_48.txt'), 'w')
115 |
116 |
117 | det_handle = open(det_boxs_file, 'r')
118 |
119 | det_boxes = cPickle.load(det_handle)
120 | print(len(det_boxes), num_of_images)
121 | assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"
122 |
123 | # index of neg, pos and part face, used as their image names
124 | p_idx = 0
125 | image_done = 0
126 | for im_idx, dets, gts, landmark in zip(im_idx_list, det_boxes, gt_boxes_list, gt_landmark_list):
127 | if image_done % 100 == 0:
128 | print("%d images done" % image_done)
129 | image_done += 1
130 |
131 | if dets.shape[0] == 0:
132 | continue
133 | img = cv2.imread(os.path.join(prefix_path,im_idx))
134 | dets = convert_to_square(dets)
135 | dets[:, 0:4] = np.round(dets[:, 0:4])
136 |
137 | for box in dets:
138 | x_left, y_top, x_right, y_bottom = box[0:4].astype(int)
139 | width = x_right - x_left + 1
140 | height = y_bottom - y_top + 1
141 |
142 | # ignore box that is too small or beyond image border
143 | if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
144 | continue
145 |
146 | # compute intersection over union(IoU) between current box and all gt boxes
147 | Iou = IoU(box, gts)
148 | cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
149 | resized_im = cv2.resize(cropped_im, (image_size, image_size),
150 | interpolation=cv2.INTER_LINEAR)
151 |
152 | # save negative images and write label
153 | if np.max(Iou) < 0.3:
154 | # Iou with all gts must below 0.3
155 | continue
156 | else:
157 | # find gt_box with the highest iou
158 | idx = np.argmax(Iou)
159 | assigned_gt = gts[idx]
160 | x1, y1, x2, y2 = assigned_gt
161 |
162 | # compute bbox reg label
163 | offset_x1 = (x1 - x_left) / float(width)
164 | offset_y1 = (y1 - y_top) / float(height)
165 | offset_x2 = (x2 - x_right) / float(width)
166 | offset_y2 = (y2 - y_bottom) / float(height)
167 |
168 | offset_left_eye_x = (landmark[0,0] - x_left) / float(width)
169 | offset_left_eye_y = (landmark[0,1] - y_top) / float(height)
170 |
171 | offset_right_eye_x = (landmark[0,2] - x_left) / float(width)
172 | offset_right_eye_y = (landmark[0,3] - y_top) / float(height)
173 |
174 | offset_nose_x = (landmark[0,4] - x_left) / float(width)
175 | offset_nose_y = (landmark[0,5] - y_top) / float(height)
176 |
177 | offset_left_mouth_x = (landmark[0,6] - x_left) / float(width)
178 | offset_left_mouth_y = (landmark[0,7] - y_top) / float(height)
179 |
180 | offset_right_mouth_x = (landmark[0,8] - x_left) / float(width)
181 | offset_right_mouth_y = (landmark[0,9] - y_top) / float(height)
182 |
183 |
184 |
185 | # save positive and part-face images and write labels
186 | if np.max(Iou) >= 0.65:
187 | save_file = os.path.join(landmark_save_dir, "%s.jpg" % p_idx)
188 |
189 | f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \
190 | (offset_x1, offset_y1, offset_x2, offset_y2, \
191 | offset_left_eye_x, offset_left_eye_y, offset_right_eye_x, offset_right_eye_y,
192 | offset_nose_x, offset_nose_y, offset_left_mouth_x, offset_left_mouth_y,
193 | offset_right_mouth_x, offset_right_mouth_y))
194 |
195 | cv2.imwrite(save_file, resized_im)
196 | p_idx += 1
197 |
198 | f.close()
199 |
200 |
201 |
202 | def model_store_path():
203 | return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))+"/model_store"
204 |
205 |
206 |
207 | def parse_args():
208 | parser = argparse.ArgumentParser(description='Test mtcnn',
209 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
210 |
211 | parser.add_argument('--dataset_path', dest='dataset_path', help='dataset folder',
212 | default='../data/wider/', type=str)
213 | parser.add_argument('--anno_file', dest='annotation_file', help='output data folder',
214 | default='../data/wider/anno.txt', type=str)
215 | parser.add_argument('--pmodel_file', dest='pnet_model_file', help='PNet model file path',
216 | default='/idata/workspace/mtcnn/model_store/pnet_epoch_5best.pt', type=str)
217 | parser.add_argument('--rmodel_file', dest='rnet_model_file', help='RNet model file path',
218 | default='/idata/workspace/mtcnn/model_store/rnet_epoch_1.pt', type=str)
219 | parser.add_argument('--gpu', dest='use_cuda', help='with gpu',
220 | default=config.USE_CUDA, type=bool)
221 | parser.add_argument('--prefix_path', dest='prefix_path', help='image prefix root path',
222 | default='', type=str)
223 |
224 | args = parser.parse_args()
225 | return args
226 |
227 |
228 |
229 | if __name__ == '__main__':
230 | args = parse_args()
231 | gen_landmark48_data(args.dataset_path, args.annotation_file, args.pnet_model_file, args.rnet_model_file, args.prefix_path, args.use_cuda)
232 |
233 |
234 |
235 |
--------------------------------------------------------------------------------
/dface/prepare_data/widerface_annotation_gen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/prepare_data/widerface_annotation_gen/__init__.py
--------------------------------------------------------------------------------
/dface/prepare_data/widerface_annotation_gen/transform.py:
--------------------------------------------------------------------------------
1 | from dface.prepare_data.widerface_annotation_gen.wider_loader import WIDER
2 | import cv2
3 | import time
4 |
5 | #wider face original images path
6 | path_to_image = '/idata/data/wider_face/WIDER_train/images'
7 |
8 | #matlab file path
9 | file_to_label = './wider_face_train.mat'
10 |
11 | #target file path
12 | target_file = './anno.txt'
13 |
14 | wider = WIDER(file_to_label, path_to_image)
15 |
16 |
17 | line_count = 0
18 | box_count = 0
19 |
20 | print('start transforming....')
21 | t = time.time()
22 |
23 | with open(target_file, 'w+') as f:
24 | # press ctrl-C to stop the process
25 | for data in wider.next():
26 | line = []
27 | line.append(str(data.image_name))
28 | line_count += 1
29 | for i,box in enumerate(data.bboxes):
30 | box_count += 1
31 | for j,bvalue in enumerate(box):
32 | line.append(str(bvalue))
33 |
34 | line.append('\n')
35 |
36 | line_str = ' '.join(line)
37 | f.write(line_str)
38 |
39 | st = time.time()-t
40 | print('end transforming')
41 |
42 | print('spend time:%ld'%st)
43 | print('total line(images):%d'%line_count)
44 | print('total boxes(faces):%d'%box_count)
45 |
46 |
47 |
--------------------------------------------------------------------------------
/dface/prepare_data/widerface_annotation_gen/wider_face_train.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/prepare_data/widerface_annotation_gen/wider_face_train.mat
--------------------------------------------------------------------------------
/dface/prepare_data/widerface_annotation_gen/wider_loader.py:
--------------------------------------------------------------------------------
1 | import os
2 | from scipy.io import loadmat
3 |
4 | class DATA:
5 | def __init__(self, image_name, bboxes):
6 | self.image_name = image_name
7 | self.bboxes = bboxes
8 |
9 |
10 | class WIDER(object):
11 | def __init__(self, file_to_label, path_to_image=None):
12 | self.file_to_label = file_to_label
13 | self.path_to_image = path_to_image
14 |
15 | self.f = loadmat(file_to_label)
16 | self.event_list = self.f['event_list']
17 | self.file_list = self.f['file_list']
18 | self.face_bbx_list = self.f['face_bbx_list']
19 |
20 | def next(self):
21 | for event_idx, event in enumerate(self.event_list):
22 | e = event[0][0].encode('utf-8')
23 | for file, bbx in zip(self.file_list[event_idx][0],
24 | self.face_bbx_list[event_idx][0]):
25 | f = file[0][0].encode('utf-8')
26 | path_of_image = os.path.join(self.path_to_image, e, f) + ".jpg"
27 |
28 | bboxes = []
29 | bbx0 = bbx[0]
30 | for i in range(bbx0.shape[0]):
31 | xmin, ymin, xmax, ymax = bbx0[i]
32 | bboxes.append((int(xmin), int(ymin), int(xmax), int(ymax)))
33 | yield DATA(path_of_image, bboxes)
34 |
35 |
--------------------------------------------------------------------------------
/dface/train_net/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/dface/train_net/__init__.py
--------------------------------------------------------------------------------
/dface/train_net/train.py:
--------------------------------------------------------------------------------
1 | from dface.core.image_reader import TrainImageReader
2 | import datetime
3 | import os
4 | from dface.core.models import PNet,RNet,ONet,LossFn
5 | import torch
6 | from torch.autograd import Variable
7 | import dface.core.image_tools as image_tools
8 |
9 |
10 |
11 |
12 |
13 | def compute_accuracy(prob_cls, gt_cls):
14 | prob_cls = torch.squeeze(prob_cls)
15 | gt_cls = torch.squeeze(gt_cls)
16 |
17 | #we only need the detection which >= 0
18 | mask = torch.ge(gt_cls,0)
19 | #get valid element
20 | valid_gt_cls = torch.masked_select(gt_cls,mask)
21 | valid_prob_cls = torch.masked_select(prob_cls,mask)
22 | size = min(valid_gt_cls.size()[0], valid_prob_cls.size()[0])
23 | prob_ones = torch.ge(valid_prob_cls,0.6).float()
24 | right_ones = torch.eq(prob_ones,valid_gt_cls).float()
25 |
26 | return torch.div(torch.mul(torch.sum(right_ones),float(1.0)),float(size))
27 |
28 |
29 | def train_pnet(model_store_path, end_epoch,imdb,
30 | batch_size,frequent=50,base_lr=0.01,use_cuda=True):
31 |
32 | if not os.path.exists(model_store_path):
33 | os.makedirs(model_store_path)
34 |
35 | lossfn = LossFn()
36 | net = PNet(is_train=True, use_cuda=use_cuda)
37 | net.train()
38 | if use_cuda:
39 | net.cuda()
40 |
41 | optimizer = torch.optim.Adam(net.parameters(), lr=base_lr)
42 |
43 | train_data=TrainImageReader(imdb,12,batch_size,shuffle=True)
44 |
45 |
46 | for cur_epoch in range(1,end_epoch+1):
47 | train_data.reset()
48 | accuracy_list=[]
49 | cls_loss_list=[]
50 | bbox_loss_list=[]
51 | # landmark_loss_list=[]
52 |
53 | for batch_idx,(image,(gt_label,gt_bbox,gt_landmark))in enumerate(train_data):
54 |
55 | im_tensor = [ image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0]) ]
56 | im_tensor = torch.stack(im_tensor)
57 |
58 | im_tensor = Variable(im_tensor)
59 | gt_label = Variable(torch.from_numpy(gt_label).float())
60 |
61 | gt_bbox = Variable(torch.from_numpy(gt_bbox).float())
62 | # gt_landmark = Variable(torch.from_numpy(gt_landmark).float())
63 |
64 | if use_cuda:
65 | im_tensor = im_tensor.cuda()
66 | gt_label = gt_label.cuda()
67 | gt_bbox = gt_bbox.cuda()
68 | # gt_landmark = gt_landmark.cuda()
69 |
70 | cls_pred, box_offset_pred = net(im_tensor)
71 | # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred)
72 |
73 | cls_loss = lossfn.cls_loss(gt_label,cls_pred)
74 | box_offset_loss = lossfn.box_loss(gt_label,gt_bbox,box_offset_pred)
75 | # landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred)
76 |
77 | all_loss = cls_loss*1.0+box_offset_loss*0.5
78 |
79 | if batch_idx%frequent==0:
80 | accuracy=compute_accuracy(cls_pred,gt_label)
81 |
82 | show1 = accuracy.data.tolist()[0]
83 | show2 = cls_loss.data.tolist()[0]
84 | show3 = box_offset_loss.data.tolist()[0]
85 | show5 = all_loss.data.tolist()[0]
86 |
87 | print("%s : Epoch: %d, Step: %d, accuracy: %s, det loss: %s, bbox loss: %s, all_loss: %s, lr:%s "%(datetime.datetime.now(),cur_epoch,batch_idx, show1,show2,show3,show5,base_lr))
88 | accuracy_list.append(accuracy)
89 | cls_loss_list.append(cls_loss)
90 | bbox_loss_list.append(box_offset_loss)
91 |
92 | optimizer.zero_grad()
93 | all_loss.backward()
94 | optimizer.step()
95 |
96 |
97 | accuracy_avg = torch.mean(torch.cat(accuracy_list))
98 | cls_loss_avg = torch.mean(torch.cat(cls_loss_list))
99 | bbox_loss_avg = torch.mean(torch.cat(bbox_loss_list))
100 | # landmark_loss_avg = torch.mean(torch.cat(landmark_loss_list))
101 |
102 | show6 = accuracy_avg.data.tolist()[0]
103 | show7 = cls_loss_avg.data.tolist()[0]
104 | show8 = bbox_loss_avg.data.tolist()[0]
105 |
106 | print("Epoch: %d, accuracy: %s, cls loss: %s, bbox loss: %s" % (cur_epoch, show6, show7, show8))
107 | torch.save(net.state_dict(), os.path.join(model_store_path,"pnet_epoch_%d.pt" % cur_epoch))
108 | torch.save(net, os.path.join(model_store_path,"pnet_epoch_model_%d.pkl" % cur_epoch))
109 |
110 |
111 |
112 |
113 | def train_rnet(model_store_path, end_epoch,imdb,
114 | batch_size,frequent=50,base_lr=0.01,use_cuda=True):
115 |
116 | if not os.path.exists(model_store_path):
117 | os.makedirs(model_store_path)
118 |
119 | lossfn = LossFn()
120 | net = RNet(is_train=True, use_cuda=use_cuda)
121 | net.train()
122 | if use_cuda:
123 | net.cuda()
124 |
125 | optimizer = torch.optim.Adam(net.parameters(), lr=base_lr)
126 |
127 | train_data=TrainImageReader(imdb,24,batch_size,shuffle=True)
128 |
129 |
130 | for cur_epoch in range(1,end_epoch+1):
131 | train_data.reset()
132 | accuracy_list=[]
133 | cls_loss_list=[]
134 | bbox_loss_list=[]
135 | landmark_loss_list=[]
136 |
137 | for batch_idx,(image,(gt_label,gt_bbox,gt_landmark))in enumerate(train_data):
138 |
139 | im_tensor = [ image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0]) ]
140 | im_tensor = torch.stack(im_tensor)
141 |
142 | im_tensor = Variable(im_tensor)
143 | gt_label = Variable(torch.from_numpy(gt_label).float())
144 |
145 | gt_bbox = Variable(torch.from_numpy(gt_bbox).float())
146 | gt_landmark = Variable(torch.from_numpy(gt_landmark).float())
147 |
148 | if use_cuda:
149 | im_tensor = im_tensor.cuda()
150 | gt_label = gt_label.cuda()
151 | gt_bbox = gt_bbox.cuda()
152 | gt_landmark = gt_landmark.cuda()
153 |
154 | cls_pred, box_offset_pred = net(im_tensor)
155 | # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred)
156 |
157 | cls_loss = lossfn.cls_loss(gt_label,cls_pred)
158 | box_offset_loss = lossfn.box_loss(gt_label,gt_bbox,box_offset_pred)
159 | # landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred)
160 |
161 | all_loss = cls_loss*1.0+box_offset_loss*0.5
162 |
163 | if batch_idx%frequent==0:
164 | accuracy=compute_accuracy(cls_pred,gt_label)
165 |
166 | show1 = accuracy.data.tolist()[0]
167 | show2 = cls_loss.data.tolist()[0]
168 | show3 = box_offset_loss.data.tolist()[0]
169 | # show4 = landmark_loss.data.tolist()[0]
170 | show5 = all_loss.data.tolist()[0]
171 |
172 | print("%s : Epoch: %d, Step: %d, accuracy: %s, det loss: %s, bbox loss: %s, all_loss: %s, lr:%s "%(datetime.datetime.now(), cur_epoch, batch_idx, show1, show2, show3, show5, base_lr))
173 | accuracy_list.append(accuracy)
174 | cls_loss_list.append(cls_loss)
175 | bbox_loss_list.append(box_offset_loss)
176 | # landmark_loss_list.append(landmark_loss)
177 |
178 | optimizer.zero_grad()
179 | all_loss.backward()
180 | optimizer.step()
181 |
182 |
183 | accuracy_avg = torch.mean(torch.cat(accuracy_list))
184 | cls_loss_avg = torch.mean(torch.cat(cls_loss_list))
185 | bbox_loss_avg = torch.mean(torch.cat(bbox_loss_list))
186 | # landmark_loss_avg = torch.mean(torch.cat(landmark_loss_list))
187 |
188 | show6 = accuracy_avg.data.tolist()[0]
189 | show7 = cls_loss_avg.data.tolist()[0]
190 | show8 = bbox_loss_avg.data.tolist()[0]
191 | # show9 = landmark_loss_avg.data.tolist()[0]
192 |
193 | print("Epoch: %d, accuracy: %s, cls loss: %s, bbox loss: %s" % (cur_epoch, show6, show7, show8))
194 | torch.save(net.state_dict(), os.path.join(model_store_path,"rnet_epoch_%d.pt" % cur_epoch))
195 | torch.save(net, os.path.join(model_store_path,"rnet_epoch_model_%d.pkl" % cur_epoch))
196 |
197 |
198 | def train_onet(model_store_path, end_epoch,imdb,
199 | batch_size,frequent=50,base_lr=0.01,use_cuda=True):
200 |
201 | if not os.path.exists(model_store_path):
202 | os.makedirs(model_store_path)
203 |
204 | lossfn = LossFn()
205 | net = ONet(is_train=True)
206 | net.train()
207 | if use_cuda:
208 | net.cuda()
209 |
210 | optimizer = torch.optim.Adam(net.parameters(), lr=base_lr)
211 |
212 | train_data=TrainImageReader(imdb,48,batch_size,shuffle=True)
213 |
214 |
215 | for cur_epoch in range(1,end_epoch+1):
216 | train_data.reset()
217 | accuracy_list=[]
218 | cls_loss_list=[]
219 | bbox_loss_list=[]
220 | landmark_loss_list=[]
221 |
222 | for batch_idx,(image,(gt_label,gt_bbox,gt_landmark))in enumerate(train_data):
223 |
224 | im_tensor = [ image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0]) ]
225 | im_tensor = torch.stack(im_tensor)
226 |
227 | im_tensor = Variable(im_tensor)
228 | gt_label = Variable(torch.from_numpy(gt_label).float())
229 |
230 | gt_bbox = Variable(torch.from_numpy(gt_bbox).float())
231 | gt_landmark = Variable(torch.from_numpy(gt_landmark).float())
232 |
233 | if use_cuda:
234 | im_tensor = im_tensor.cuda()
235 | gt_label = gt_label.cuda()
236 | gt_bbox = gt_bbox.cuda()
237 | gt_landmark = gt_landmark.cuda()
238 |
239 | cls_pred, box_offset_pred, landmark_offset_pred = net(im_tensor)
240 | # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred)
241 |
242 | cls_loss = lossfn.cls_loss(gt_label,cls_pred)
243 | box_offset_loss = lossfn.box_loss(gt_label,gt_bbox,box_offset_pred)
244 | landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred)
245 |
246 | all_loss = cls_loss*0.8+box_offset_loss*0.6+landmark_loss*1.5
247 |
248 | if batch_idx%frequent==0:
249 | accuracy=compute_accuracy(cls_pred,gt_label)
250 |
251 | show1 = accuracy.data.tolist()[0]
252 | show2 = cls_loss.data.tolist()[0]
253 | show3 = box_offset_loss.data.tolist()[0]
254 | show4 = landmark_loss.data.tolist()[0]
255 | show5 = all_loss.data.tolist()[0]
256 |
257 | print("%s : Epoch: %d, Step: %d, accuracy: %s, det loss: %s, bbox loss: %s, landmark loss: %s, all_loss: %s, lr:%s "%(datetime.datetime.now(),cur_epoch,batch_idx, show1,show2,show3,show4,show5,base_lr))
258 | accuracy_list.append(accuracy)
259 | cls_loss_list.append(cls_loss)
260 | bbox_loss_list.append(box_offset_loss)
261 | landmark_loss_list.append(landmark_loss)
262 |
263 | optimizer.zero_grad()
264 | all_loss.backward()
265 | optimizer.step()
266 |
267 |
268 | accuracy_avg = torch.mean(torch.cat(accuracy_list))
269 | cls_loss_avg = torch.mean(torch.cat(cls_loss_list))
270 | bbox_loss_avg = torch.mean(torch.cat(bbox_loss_list))
271 | landmark_loss_avg = torch.mean(torch.cat(landmark_loss_list))
272 |
273 | show6 = accuracy_avg.data.tolist()[0]
274 | show7 = cls_loss_avg.data.tolist()[0]
275 | show8 = bbox_loss_avg.data.tolist()[0]
276 | show9 = landmark_loss_avg.data.tolist()[0]
277 |
278 | print("Epoch: %d, accuracy: %s, cls loss: %s, bbox loss: %s, landmark loss: %s " % (cur_epoch, show6, show7, show8, show9))
279 | torch.save(net.state_dict(), os.path.join(model_store_path,"onet_epoch_%d.pt" % cur_epoch))
280 | torch.save(net, os.path.join(model_store_path,"onet_epoch_model_%d.pkl" % cur_epoch))
281 |
282 |
--------------------------------------------------------------------------------
/dface/train_net/train_o_net.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | from dface.core.imagedb import ImageDB
4 | import dface.train_net.train as train
5 | import dface.config as config
6 | import os
7 |
8 |
9 |
10 | def train_net(annotation_file, model_store_path,
11 | end_epoch=16, frequent=200, lr=0.01, batch_size=128, use_cuda=False):
12 |
13 | imagedb = ImageDB(annotation_file)
14 | gt_imdb = imagedb.load_imdb()
15 | gt_imdb = imagedb.append_flipped_images(gt_imdb)
16 |
17 | train.train_onet(model_store_path=model_store_path, end_epoch=end_epoch, imdb=gt_imdb, batch_size=batch_size, frequent=frequent, base_lr=lr, use_cuda=use_cuda)
18 |
19 | def parse_args():
20 | parser = argparse.ArgumentParser(description='Train ONet',
21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
22 |
23 |
24 | parser.add_argument('--anno_file', dest='annotation_file',
25 | default=os.path.join(config.ANNO_STORE_DIR,config.ONET_TRAIN_IMGLIST_FILENAME), help='training data annotation file', type=str)
26 | parser.add_argument('--model_path', dest='model_store_path', help='training model store directory',
27 | default=config.MODEL_STORE_DIR, type=str)
28 | parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
29 | default=config.END_EPOCH, type=int)
30 | parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
31 | default=200, type=int)
32 | parser.add_argument('--lr', dest='lr', help='learning rate',
33 | default=0.002, type=float)
34 | parser.add_argument('--batch_size', dest='batch_size', help='train batch size',
35 | default=1000, type=int)
36 | parser.add_argument('--gpu', dest='use_cuda', help='train with gpu',
37 | default=config.USE_CUDA, type=bool)
38 | parser.add_argument('--prefix_path', dest='', help='training data annotation images prefix root path', type=str)
39 |
40 | args = parser.parse_args()
41 | return args
42 |
43 | if __name__ == '__main__':
44 | args = parse_args()
45 | print('train ONet argument:')
46 | print(args)
47 |
48 |
49 | train_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path,
50 | end_epoch=args.end_epoch, frequent=args.frequent, lr=args.lr, batch_size=args.batch_size, use_cuda=args.use_cuda)
51 |
--------------------------------------------------------------------------------
/dface/train_net/train_p_net.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | from dface.core.imagedb import ImageDB
4 | from dface.train_net.train import train_pnet
5 | import dface.config as config
6 | import os
7 |
8 |
9 |
10 | def train_net(annotation_file, model_store_path,
11 | end_epoch=16, frequent=200, lr=0.01, batch_size=128, use_cuda=False):
12 |
13 | imagedb = ImageDB(annotation_file)
14 | gt_imdb = imagedb.load_imdb()
15 | gt_imdb = imagedb.append_flipped_images(gt_imdb)
16 |
17 | train_pnet(model_store_path=model_store_path, end_epoch=end_epoch, imdb=gt_imdb, batch_size=batch_size, frequent=frequent, base_lr=lr, use_cuda=use_cuda)
18 |
19 | def parse_args():
20 | parser = argparse.ArgumentParser(description='Train PNet',
21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
22 |
23 |
24 | parser.add_argument('--anno_file', dest='annotation_file',
25 | default=os.path.join(config.ANNO_STORE_DIR,config.PNET_TRAIN_IMGLIST_FILENAME), help='training data annotation file', type=str)
26 | parser.add_argument('--model_path', dest='model_store_path', help='training model store directory',
27 | default=config.MODEL_STORE_DIR, type=str)
28 | parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
29 | default=config.END_EPOCH, type=int)
30 | parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
31 | default=200, type=int)
32 | parser.add_argument('--lr', dest='lr', help='learning rate',
33 | default=config.TRAIN_LR, type=float)
34 | parser.add_argument('--batch_size', dest='batch_size', help='train batch size',
35 | default=config.TRAIN_BATCH_SIZE, type=int)
36 | parser.add_argument('--gpu', dest='use_cuda', help='train with gpu',
37 | default=config.USE_CUDA, type=bool)
38 | parser.add_argument('--prefix_path', dest='', help='training data annotation images prefix root path', type=str)
39 |
40 | args = parser.parse_args()
41 | return args
42 |
43 | if __name__ == '__main__':
44 | args = parse_args()
45 | print('train Pnet argument:')
46 | print(args)
47 |
48 | train_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path,
49 | end_epoch=args.end_epoch, frequent=args.frequent, lr=args.lr, batch_size=args.batch_size, use_cuda=args.use_cuda)
50 |
--------------------------------------------------------------------------------
/dface/train_net/train_r_net.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | from dface.core.imagedb import ImageDB
4 | import dface.train_net.train as train
5 | import dface.config as config
6 | import os
7 |
8 |
9 |
10 | def train_net(annotation_file, model_store_path,
11 | end_epoch=16, frequent=200, lr=0.01, batch_size=128, use_cuda=False):
12 |
13 | imagedb = ImageDB(annotation_file)
14 | gt_imdb = imagedb.load_imdb()
15 | gt_imdb = imagedb.append_flipped_images(gt_imdb)
16 |
17 | train.train_rnet(model_store_path=model_store_path, end_epoch=end_epoch, imdb=gt_imdb, batch_size=batch_size, frequent=frequent, base_lr=lr, use_cuda=use_cuda)
18 |
19 | def parse_args():
20 | parser = argparse.ArgumentParser(description='Train RNet',
21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
22 |
23 |
24 | parser.add_argument('--anno_file', dest='annotation_file',
25 | default=os.path.join(config.ANNO_STORE_DIR,config.RNET_TRAIN_IMGLIST_FILENAME), help='training data annotation file', type=str)
26 | parser.add_argument('--model_path', dest='model_store_path', help='training model store directory',
27 | default=config.MODEL_STORE_DIR, type=str)
28 | parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
29 | default=config.END_EPOCH, type=int)
30 | parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
31 | default=200, type=int)
32 | parser.add_argument('--lr', dest='lr', help='learning rate',
33 | default=config.TRAIN_LR, type=float)
34 | parser.add_argument('--batch_size', dest='batch_size', help='train batch size',
35 | default=config.TRAIN_BATCH_SIZE, type=int)
36 | parser.add_argument('--gpu', dest='use_cuda', help='train with gpu',
37 | default=config.USE_CUDA, type=bool)
38 | parser.add_argument('--prefix_path', dest='', help='training data annotation images prefix root path', type=str)
39 |
40 | args = parser.parse_args()
41 | return args
42 |
43 | if __name__ == '__main__':
44 | args = parse_args()
45 | print('train Rnet argument:')
46 | print(args)
47 |
48 |
49 | train_net(annotation_file=args.annotation_file, model_store_path=args.model_store_path,
50 | end_epoch=args.end_epoch, frequent=args.frequent, lr=args.lr, batch_size=args.batch_size, use_cuda=args.use_cuda)
51 |
--------------------------------------------------------------------------------
/environment-win64.yml:
--------------------------------------------------------------------------------
1 | name: ai_gpu
2 | channels:
3 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free
4 | - peterjc123
5 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
6 | - defaults
7 | dependencies:
8 | - _ipyw_jlab_nb_ext_conf=0.1.0=py36he6757f0_0
9 | - alabaster=0.7.10=py36hcd07829_0
10 | - anaconda-client=1.6.5=py36hd36550c_0
11 | - anaconda-navigator=1.6.10=py36h51c3d4f_0
12 | - anaconda-project=0.8.0=py36h8b3bf89_0
13 | - asn1crypto=0.22.0=py36h8e79faa_1
14 | - astroid=1.5.3=py36h9d85297_0
15 | - astropy=2.0.2=py36h06391c4_4
16 | - babel=2.5.0=py36h35444c1_0
17 | - backports=1.0=py36h81696a8_1
18 | - backports.shutil_get_terminal_size=1.0.0=py36h79ab834_2
19 | - beautifulsoup4=4.6.0=py36hd4cc5e8_1
20 | - bitarray=0.8.1=py36h6af124b_0
21 | - bkcharts=0.2=py36h7e685f7_0
22 | - blaze=0.11.3=py36h8a29ca5_0
23 | - bleach=2.0.0=py36h0a7e3d6_0
24 | - bokeh=0.12.10=py36h0be3b39_0
25 | - boto=2.48.0=py36h1a776d2_1
26 | - bottleneck=1.2.1=py36hd119dfa_0
27 | - bzip2=1.0.6=vc14hdec8e7a_1
28 | - ca-certificates=2017.08.26=h94faf87_0
29 | - cachecontrol=0.12.3=py36hfe50d7b_0
30 | - certifi=2017.7.27.1=py36h043bc9e_0
31 | - chardet=3.0.4=py36h420ce6e_1
32 | - click=6.7=py36hec8c647_0
33 | - cloudpickle=0.4.0=py36h639d8dc_0
34 | - clyent=1.2.2=py36hb10d595_1
35 | - colorama=0.3.9=py36h029ae33_0
36 | - comtypes=1.1.2=py36heb9b3d1_0
37 | - conda-verify=2.0.0=py36h065de53_0
38 | - console_shortcut=0.1.1=h6bb2dd7_3
39 | - contextlib2=0.5.5=py36he5d52c0_0
40 | - cryptography=2.0.3=py36h123decb_1
41 | - curl=7.55.1=vc14hdaba4a4_3
42 | - cycler=0.10.0=py36h009560c_0
43 | - cython=0.26.1=py36h18049ac_0
44 | - cytoolz=0.8.2=py36h547e66e_0
45 | - dask=0.15.3=py36h396fcb9_0
46 | - dask-core=0.15.3=py36hd651449_0
47 | - datashape=0.5.4=py36h5770b85_0
48 | - decorator=4.1.2=py36he63a57b_0
49 | - distlib=0.2.5=py36h51371be_0
50 | - distributed=1.19.1=py36h8504682_0
51 | - docutils=0.14=py36h6012d8f_0
52 | - entrypoints=0.2.3=py36hfd66bb0_2
53 | - et_xmlfile=1.0.1=py36h3d2d736_0
54 | - fastcache=1.0.2=py36hffdae1b_0
55 | - filelock=2.0.12=py36hd7ddd41_0
56 | - flask=0.12.2=py36h98b5e8f_0
57 | - flask-cors=3.0.3=py36h8a3855d_0
58 | - freetype=2.8=vc14h17c9bdf_0
59 | - get_terminal_size=1.0.0=h38e98db_0
60 | - gevent=1.2.2=py36h342a76c_0
61 | - glob2=0.5=py36h11cc1bd_1
62 | - greenlet=0.4.12=py36ha00ad21_0
63 | - h5py=2.7.0=py36hfbe0a52_1
64 | - hdf5=1.10.1=vc14hb361328_0
65 | - heapdict=1.0.0=py36h21fa5f4_0
66 | - html5lib=0.999999999=py36ha09b1f3_0
67 | - icc_rt=2017.0.4=h97af966_0
68 | - icu=58.2=vc14hc45fdbb_0
69 | - idna=2.6=py36h148d497_1
70 | - imageio=2.2.0=py36had6c2d2_0
71 | - imagesize=0.7.1=py36he29f638_0
72 | - intel-openmp=2018.0.0=hcd89f80_7
73 | - ipykernel=4.6.1=py36hbb77b34_0
74 | - ipython=6.1.0=py36h236ecc8_1
75 | - ipython_genutils=0.2.0=py36h3c5d0ee_0
76 | - ipywidgets=7.0.0=py36h2e74ada_0
77 | - isort=4.2.15=py36h6198cc5_0
78 | - itsdangerous=0.24=py36hb6c5a24_1
79 | - jdcal=1.3=py36h64a5255_0
80 | - jedi=0.10.2=py36hed927a0_0
81 | - jinja2=2.9.6=py36h10aa3a0_1
82 | - jpeg=9b=vc14h4d7706e_1
83 | - jsonschema=2.6.0=py36h7636477_0
84 | - jupyter=1.0.0=py36h422fd7e_2
85 | - jupyter_client=5.1.0=py36h9902a9a_0
86 | - jupyter_console=5.2.0=py36h6d89b47_1
87 | - jupyter_core=4.3.0=py36h511e818_0
88 | - jupyterlab=0.27.0=py36h34cc53b_2
89 | - jupyterlab_launcher=0.4.0=py36h22c3ccf_0
90 | - lazy-object-proxy=1.3.1=py36hd1c21d2_0
91 | - libiconv=1.15=vc14h29686d3_5
92 | - libpng=1.6.32=vc14h5163883_3
93 | - libssh2=1.8.0=vc14hcf584a9_2
94 | - libtiff=4.0.9=h0f13578_0
95 | - libxml2=2.9.4=vc14h8fd0f11_5
96 | - libxslt=1.1.29=vc14hf85b8d4_5
97 | - llvmlite=0.20.0=py36_0
98 | - locket=0.2.0=py36hfed976d_1
99 | - lockfile=0.12.2=py36h0468280_0
100 | - lxml=4.1.0=py36h0dcd83c_0
101 | - lzo=2.10=vc14h0a64fa6_1
102 | - markupsafe=1.0=py36h0e26971_1
103 | - matplotlib=2.1.0=py36h11b4b9c_0
104 | - mccabe=0.6.1=py36hb41005a_1
105 | - menuinst=1.4.10=py36h42196fb_0
106 | - mistune=0.7.4=py36h4874169_0
107 | - mpmath=0.19=py36he326802_2
108 | - msgpack-python=0.4.8=py36h58b1e9d_0
109 | - multipledispatch=0.4.9=py36he44c36e_0
110 | - navigator-updater=0.1.0=py36h8a7b86b_0
111 | - nbconvert=5.3.1=py36h8dc0fde_0
112 | - nbformat=4.4.0=py36h3a5bc1b_0
113 | - networkx=2.0=py36hff991e3_0
114 | - nltk=3.2.4=py36hd0e0a39_0
115 | - nose=1.3.7=py36h1c3779e_2
116 | - notebook=5.0.0=py36hd9fbf6f_2
117 | - numba=0.35.0=np113py36_10
118 | - numpydoc=0.7.0=py36ha25429e_0
119 | - odo=0.5.1=py36h7560279_0
120 | - olefile=0.44=py36h0a7bdd2_0
121 | - opencv=3.3.1=py36h20b85fd_1
122 | - openpyxl=2.4.8=py36hf3b77f6_1
123 | - openssl=1.0.2l=vc14hcac20b0_2
124 | - packaging=16.8=py36ha0986f6_1
125 | - pandas=0.20.3=py36hce827b7_2
126 | - pandoc=1.19.2.1=hb2460c7_1
127 | - pandocfilters=1.4.2=py36h3ef6317_1
128 | - partd=0.3.8=py36hc8e763b_0
129 | - path.py=10.3.1=py36h3dd8b46_0
130 | - pathlib2=2.3.0=py36h7bfb78b_0
131 | - patsy=0.4.1=py36h42cefec_0
132 | - pep8=1.7.0=py36h0f3d67a_0
133 | - pickleshare=0.7.4=py36h9de030f_0
134 | - pillow=4.2.1=py36hdb25ab2_0
135 | - pip=9.0.1=py36hadba87b_3
136 | - pkginfo=1.4.1=py36hb0f9cfa_1
137 | - ply=3.10=py36h1211beb_0
138 | - progress=1.3=py36hbeca8d3_0
139 | - prompt_toolkit=1.0.15=py36h60b8f86_0
140 | - psutil=5.4.0=py36h4e662fb_0
141 | - py=1.4.34=py36ha4aca3a_1
142 | - pycodestyle=2.3.1=py36h7cc55cd_0
143 | - pycosat=0.6.2=py36hf17546d_1
144 | - pycparser=2.18=py36hd053e01_1
145 | - pycrypto=2.6.1=py36he68e6e2_1
146 | - pycurl=7.43.0=py36h086bf4c_3
147 | - pyflakes=1.6.0=py36h0b975d6_0
148 | - pygments=2.2.0=py36hb010967_0
149 | - pylint=1.7.4=py36ha4e6ded_0
150 | - pyodbc=4.0.17=py36h0006bc2_0
151 | - pyopenssl=17.2.0=py36h15ca2fc_0
152 | - pyparsing=2.2.0=py36h785a196_1
153 | - pyqt=5.6.0=py36hb5ed885_5
154 | - pysocks=1.6.7=py36h698d350_1
155 | - pytables=3.4.2=py36h71138e3_2
156 | - pytest=3.2.1=py36h753b05e_1
157 | - python=3.6.3=h9e2ca53_1
158 | - python-dateutil=2.6.1=py36h509ddcb_1
159 | - pytz=2017.2=py36h05d413f_1
160 | - pywavelets=0.5.2=py36hc649158_0
161 | - pywin32=221=py36h9c10281_0
162 | - pyyaml=3.12=py36h1d1928f_1
163 | - pyzmq=16.0.2=py36h38c27d9_2
164 | - qt=5.6.2=vc14h6f8c307_12
165 | - qtawesome=0.4.4=py36h5aa48f6_0
166 | - qtconsole=4.3.1=py36h99a29a9_0
167 | - qtpy=1.3.1=py36hb8717c5_0
168 | - requests=2.18.4=py36h4371aae_1
169 | - rope=0.10.5=py36hcaf5641_0
170 | - ruamel_yaml=0.11.14=py36h9b16331_2
171 | - scikit-image=0.13.0=py36h6dffa3f_1
172 | - seaborn=0.8.0=py36h62cb67c_0
173 | - setuptools=36.5.0=py36h65f9e6e_0
174 | - simplegeneric=0.8.1=py36heab741f_0
175 | - singledispatch=3.4.0.3=py36h17d0c80_0
176 | - sip=4.18.1=py36h9c25514_2
177 | - six=1.11.0=py36h4db2310_1
178 | - snowballstemmer=1.2.1=py36h763602f_0
179 | - sortedcollections=0.5.3=py36hbefa0ab_0
180 | - sortedcontainers=1.5.7=py36ha90ac20_0
181 | - sphinx=1.6.3=py36h9bb690b_0
182 | - sphinxcontrib=1.0=py36hbbac3d2_1
183 | - sphinxcontrib-websupport=1.0.1=py36hb5e5916_1
184 | - spyder=3.2.4=py36h8845eaa_0
185 | - sqlalchemy=1.1.13=py36h5948d12_0
186 | - sqlite=3.20.1=vc14h7ce8c62_1
187 | - statsmodels=0.8.0=py36h6189b4c_0
188 | - sympy=1.1.1=py36h96708e0_0
189 | - tblib=1.3.2=py36h30f5020_0
190 | - testpath=0.3.1=py36h2698cfe_0
191 | - tk=8.6.7=vc14hb68737d_1
192 | - toolz=0.8.2=py36he152a52_0
193 | - tornado=4.5.2=py36h57f6048_0
194 | - traitlets=4.3.2=py36h096827d_0
195 | - typing=3.6.2=py36hb035bda_0
196 | - unicodecsv=0.14.1=py36h6450c06_0
197 | - urllib3=1.22=py36h276f60a_0
198 | - vs2015_runtime=14.0.25123=hd4c4e62_2
199 | - wcwidth=0.1.7=py36h3d5aa90_0
200 | - webencodings=0.5.1=py36h67c50ae_1
201 | - werkzeug=0.12.2=py36h866a736_0
202 | - wheel=0.29.0=py36h6ce6cde_1
203 | - widgetsnbextension=3.0.2=py36h364476f_1
204 | - win_inet_pton=1.0.1=py36he67d7fd_1
205 | - win_unicode_console=0.5=py36hcdbd4b5_0
206 | - wincertstore=0.2=py36h7fe50ca_0
207 | - wrapt=1.10.11=py36he5f5981_0
208 | - xlrd=1.1.0=py36h1cb58dc_1
209 | - xlsxwriter=1.0.2=py36hf723b7d_0
210 | - xlwings=0.11.4=py36hd3cf94d_0
211 | - xlwt=1.3.0=py36h1a4751e_0
212 | - yaml=0.1.7=vc14hb31d195_1
213 | - zict=0.1.3=py36h2d8e73e_0
214 | - zlib=1.2.11=vc14h1cdd9ab_1
215 | - anaconda=custom=py36_0
216 | - cffi=1.10.0=py36_0
217 | - mkl=2017.0.3=0
218 | - mkl-service=1.1.2=py36_3
219 | - numexpr=2.6.2=np113py36_0
220 | - numpy=1.13.1=py36_0
221 | - scikit-learn=0.19.0=np113py36_0
222 | - scipy=0.19.1=np113py36_0
223 | - cuda80=1.0=h205658b_0
224 | - pytorch=0.3.0=py36_0.3.0cu80
225 | - vc=14.1=h21ff451_0
226 | - vs2017_runtime=15.4.27004.2010=0
227 | - pip:
228 | - backports.shutil-get-terminal-size==1.0.0
229 | - et-xmlfile==1.0.1
230 | - ipython-genutils==0.2.0
231 | - jupyter-client==5.1.0
232 | - jupyter-console==5.2.0
233 | - jupyter-core==4.3.0
234 | - jupyterlab-launcher==0.4.0
235 | - prompt-toolkit==1.0.15
236 | - ruamel-yaml==0.11.14
237 | - tables==3.4.2
238 | - torch==0.3.0b0+591e73e
239 | - torchvision==0.2.0
240 | - win-inet-pton==1.0.1
241 | - win-unicode-console==0.5
242 | prefix: D:\Tools\Anaconda3\envs\ai_gpu
243 |
244 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: pytorch
2 | channels:
3 | - soumith
4 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free
5 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
6 | - defaults
7 | dependencies:
8 | - cairo=1.14.8=0
9 | - certifi=2016.2.28=py27_0
10 | - cffi=1.10.0=py27_0
11 | - fontconfig=2.12.1=3
12 | - freetype=2.5.5=2
13 | - glib=2.50.2=1
14 | - harfbuzz=0.9.39=2
15 | - hdf5=1.8.17=2
16 | - jbig=2.1=0
17 | - jpeg=8d=2
18 | - libffi=3.2.1=1
19 | - libgcc=5.2.0=0
20 | - libiconv=1.14=0
21 | - libpng=1.6.30=1
22 | - libtiff=4.0.6=2
23 | - libxml2=2.9.4=0
24 | - mkl=2017.0.3=0
25 | - numpy=1.12.1=py27_0
26 | - olefile=0.44=py27_0
27 | - opencv=3.1.0=np112py27_1
28 | - openssl=1.0.2l=0
29 | - pcre=8.39=1
30 | - pillow=3.4.2=py27_0
31 | - pip=9.0.1=py27_1
32 | - pixman=0.34.0=0
33 | - pycparser=2.18=py27_0
34 | - python=2.7.13=0
35 | - readline=6.2=2
36 | - setuptools=36.4.0=py27_1
37 | - six=1.10.0=py27_0
38 | - sqlite=3.13.0=0
39 | - tk=8.5.18=0
40 | - wheel=0.29.0=py27_0
41 | - xz=5.2.3=0
42 | - zlib=1.2.11=0
43 | - cycler=0.10.0=py27_0
44 | - dbus=1.10.20=0
45 | - expat=2.1.0=0
46 | - functools32=3.2.3.2=py27_0
47 | - gst-plugins-base=1.8.0=0
48 | - gstreamer=1.8.0=0
49 | - icu=54.1=0
50 | - libxcb=1.12=1
51 | - matplotlib=2.0.2=np112py27_0
52 | - pycairo=1.10.0=py27_0
53 | - pyparsing=2.2.0=py27_0
54 | - pyqt=5.6.0=py27_2
55 | - python-dateutil=2.6.1=py27_0
56 | - pytz=2017.2=py27_0
57 | - qt=5.6.2=2
58 | - sip=4.18=py27_0
59 | - subprocess32=3.2.7=py27_0
60 | - cuda80=1.0=0
61 | - pytorch=0.2.0=py27hc03bea1_4cu80
62 | - torchvision=0.1.9=py27hdb88a65_1
63 | - pip:
64 | - torch==0.2.0.post4
65 | prefix: /home/asy/.conda/envs/pytorch
66 |
67 |
--------------------------------------------------------------------------------
/environment_osx.yaml:
--------------------------------------------------------------------------------
1 | name: dface
2 | channels:
3 | - pytorch
4 | - defaults
5 | dependencies:
6 | - backports=1.0=py27hb4f9756_1
7 | - backports.functools_lru_cache=1.4=py27h2aca819_1
8 | - backports_abc=0.5=py27h6972548_0
9 | - bzip2=1.0.6=h649919c_2
10 | - ca-certificates=2017.08.26=ha1e5d58_0
11 | - cairo=1.14.10=h913ea44_6
12 | - certifi=2017.11.5=py27hfa9a1c4_0
13 | - cffi=1.11.4=py27h342bebf_0
14 | - cycler=0.10.0=py27hfc73c78_0
15 | - ffmpeg=3.4=h766ddd1_0
16 | - fontconfig=2.12.4=hffb9db1_2
17 | - freetype=2.8=h12048fb_1
18 | - functools32=3.2.3.2=py27h8ceab06_1
19 | - gettext=0.19.8.1=h15daf44_3
20 | - glib=2.53.6=h33f6a65_2
21 | - graphite2=1.3.10=h233cf8b_0
22 | - harfbuzz=1.5.0=h6db888e_0
23 | - hdf5=1.10.1=ha036c08_1
24 | - icu=58.2=h4b95b61_1
25 | - intel-openmp=2018.0.0=h8158457_8
26 | - jasper=1.900.1=h1f36771_4
27 | - jpeg=9b=he5867d9_2
28 | - libcxx=4.0.1=h579ed51_0
29 | - libcxxabi=4.0.1=hebd6815_0
30 | - libedit=3.1=hb4e282d_0
31 | - libffi=3.2.1=h475c297_4
32 | - libgfortran=3.0.1=h93005f0_2
33 | - libiconv=1.15=hdd342a3_7
34 | - libopus=1.2.1=h169cedb_0
35 | - libpng=1.6.32=hd1e8b91_4
36 | - libprotobuf=3.4.1=h326466f_0
37 | - libtiff=4.0.9=h0dac147_0
38 | - libvpx=1.6.1=h057a404_0
39 | - libxml2=2.9.4=hf05c021_6
40 | - matplotlib=2.1.1=py27hb768455_0
41 | - mkl=2018.0.1=hfbd8650_4
42 | - ncurses=6.0=hd04f020_2
43 | - numpy=1.14.0=py27h8a80b8c_0
44 | - olefile=0.44=py27h73ba740_0
45 | - opencv=3.3.1=py27h60a5f38_1
46 | - openssl=1.0.2n=hdbc3d79_0
47 | - pcre=8.41=hfb6ab37_1
48 | - pillow=5.0.0=py27hfcce615_0
49 | - pip=9.0.1=py27h1567d89_4
50 | - pixman=0.34.0=hca0a616_3
51 | - pycparser=2.18=py27h0d28d88_1
52 | - pyparsing=2.2.0=py27h5bb6aaf_0
53 | - python=2.7.14=hde5916a_29
54 | - python-dateutil=2.6.1=py27hd56c96b_1
55 | - pytz=2017.3=py27h803c07a_0
56 | - readline=7.0=hc1231fa_4
57 | - setuptools=38.4.0=py27_0
58 | - singledispatch=3.4.0.3=py27he22c18d_0
59 | - six=1.11.0=py27h7252ba3_1
60 | - sqlite=3.20.1=h7e4c145_2
61 | - ssl_match_hostname=3.5.0.1=py27h8780752_2
62 | - subprocess32=3.2.7=py27h24b2887_0
63 | - tk=8.6.7=h35a86e2_3
64 | - tornado=4.5.3=py27_0
65 | - wheel=0.30.0=py27h677a027_1
66 | - xz=5.2.3=h0278029_2
67 | - zlib=1.2.11=hf3cbc9b_2
68 | - pytorch=0.3.0=py27_cuda0.0_cudnn0.0he480db7_4
69 | - torchvision=0.2.0=py27hfc0307a_1
70 | - pip:
71 | - backports-abc==0.5
72 | - backports.functools-lru-cache==1.4
73 | - backports.ssl-match-hostname==3.5.0.1
74 | - torch==0.3.0.post4
75 | prefix: /Users/hfu/anaconda2/envs/dface
76 |
77 |
--------------------------------------------------------------------------------
/log/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/log/__init__.py
--------------------------------------------------------------------------------
/log/info:
--------------------------------------------------------------------------------
1 | log dir
--------------------------------------------------------------------------------
/model_store/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/model_store/__init__.py
--------------------------------------------------------------------------------
/model_store/info:
--------------------------------------------------------------------------------
1 | This directory store trained model net parameters and structure
--------------------------------------------------------------------------------
/model_store/onet_epoch.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/model_store/onet_epoch.pt
--------------------------------------------------------------------------------
/model_store/pnet_epoch.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/model_store/pnet_epoch.pt
--------------------------------------------------------------------------------
/model_store/rnet_epoch.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/model_store/rnet_epoch.pt
--------------------------------------------------------------------------------
/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaikuaikim/dface/cb777c313d1ba4e03bc265871fa32bbdbe2765ce/test.jpg
--------------------------------------------------------------------------------
/test_image.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | from dface.core.detect import create_mtcnn_net, MtcnnDetector
3 | import dface.core.vision as vision
4 |
5 |
6 |
7 |
8 | if __name__ == '__main__':
9 |
10 | pnet, rnet, onet = create_mtcnn_net(p_model_path="./model_store/pnet_epoch.pt", r_model_path="./model_store/rnet_epoch.pt", o_model_path="./model_store/onet_epoch.pt", use_cuda=False)
11 | mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24)
12 |
13 | img = cv2.imread("./test.jpg")
14 | img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
15 | #b, g, r = cv2.split(img)
16 | #img2 = cv2.merge([r, g, b])
17 |
18 | bboxs, landmarks = mtcnn_detector.detect_face(img)
19 | # print box_align
20 |
21 | vision.vis_face(img_bg,bboxs,landmarks)
22 |
--------------------------------------------------------------------------------