├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── data
    ├── 2007_007763.jpg
    ├── 2007_007763_result.jpg
    ├── Aaron_Peirsol_0003.jpg
    ├── dogs.jpg
    └── models
    │   ├── det1.caffemodel
    │   ├── det1.prototxt
    │   ├── det2.caffemodel
    │   ├── det2.prototxt
    │   ├── det3.caffemodel
    │   └── det3.prototxt
├── lib
    ├── CMakeLists.txt
    ├── include
    │   └── mtcnn
    │   │   ├── detector.h
    │   │   ├── face.h
    │   │   ├── helpers.h
    │   │   ├── onet.h
    │   │   ├── pnet.h
    │   │   └── rnet.h
    └── src
    │   ├── detector.cc
    │   ├── onet.cc
    │   ├── pnet.cc
    │   └── rnet.cc
└── sample
    ├── CMakeLists.txt
    └── src
        └── main.cc


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 
34 | build
35 | _debug
36 | _release
37 | _xdebug
38 | .vscode


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.2)
 2 | 
 3 | project (MTCNN C CXX)
 4 | 
 5 | set_property (GLOBAL PROPERTY USE_FOLDERS ON)
 6 | 
 7 | set (CMAKE_CXX_STANDARD 11)
 8 | set (CMAKE_CXX_STANDARD_REQUIRED ON)
 9 | 
10 | add_subdirectory (lib)
11 | add_subdirectory (sample)
12 | 
13 | enable_testing()


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # opencv-mtcnn
 2 | 
 3 | This is an inference implementation of MTCNN (Multi-task Cascaded Convolutional Network) to perform Face Detection and Alignment using OpenCV's DNN module.
 4 | 
 5 | ## MTCNN
 6 | 
 7 | [ZHANG2016] Zhang, K., Zhang, Z., Li, Z., and Qiao, Y. (2016). Joint face detection and alignment using multitask cascaded convolutional networks. IEEE Signal Processing Letters, 23(10):1499–1503.
 8 | 
 9 | https://kpzhang93.github.io/MTCNN_face_detection_alignment/paper/spl.pdf
10 | 
11 | ## OpenCV's DNN module
12 | 
13 | Since OpenCV 3.1 there is a module called DNN that provides the inference support. The module is capable of taking models & weights from various popular frameworks such as Caffe, tensorflow, darknet etc.
14 | 
15 | You can read more about it here - https://github.com/opencv/opencv/wiki/Deep-Learning-in-OpenCV
16 | 
17 | Note that at present there is no support to perform training in OpenCV's DNN module and if I understood correctly there is no intention either.
18 | 
19 | ## Compile / Run
20 | 
21 | ### Requirements
22 | 
23 | * OpenCV 3.4+
24 | * Boost FileSystem (1.58+)  [only required for the sample application]
25 | * CMake 3.2+
26 | 
27 | I am using CMake as the build tool. Here are the steps to try the implementation -
28 | 
29 | ```bash
30 | # compiling the library and the sample application
31 | git clone https://github.com/ksachdeva/opencv-mtcnn
32 | cd opencv-mtcnn
33 | mkdir build
34 | cd build
35 | cmake ..
36 | cmake --build .
37 | ```
38 | 
39 | ```bash
40 | # running the sample application
41 | cd build
42 | ./sample/app <path_to_models_dir> <path_to_test_image>
43 | 
44 | # here are some example cmd lines to run with the model and images in the test repository
45 | 
46 | # An image with 0 human faces (have picture of 4 dogs)
47 | ./sample/app ../data/models ../data/dogs.jpg
48 | 
49 | # An image with 1 face
50 | ./sample/app ../data/models ../data/Aaron_Peirsol_0003.jpg
51 | 
52 | # An image with 7 faces
53 | ./sample/app ../data/models ../data/2007_007763.jpg
54 | ```
55 | 
56 | ## Result
57 | 
58 | Here is an example of how the execution of the sample application looks like
59 | 
60 | ![Result](data/2007_007763_result.jpg)
61 | 
62 | ## Acknowledgments
63 | 
64 | Most of the implementations of MTCNN are based on either Caffe or Tensorflow. I wanted to play with OpenCV's DNN implementation and understand the paper bit better. While implementing it, I looked at various other C++ implementations (again all of them use Caffe) and more specifically borrowed utilities from https://github.com/golunovas/mtcnn-cpp. IMHO, I found his implementation (in C++) that is based on Caffe to be the cleanest amongst many others.
65 | 
66 | The model files are taken from https://github.com/kpzhang93/MTCNN_face_detection_alignment/tree/master/code
67 | 
68 | The image file "Aaron_Peirsol_0003.jpg" is from the LFW database (http://vis-www.cs.umass.edu/lfw/)
69 | 
70 | The image files "dog.jpg" & "2007_007763.jpg" are from dlib's github repository (https://github.com/davisking/dlib/blob/master/examples/faces)


--------------------------------------------------------------------------------
/data/2007_007763.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksachdeva/opencv-mtcnn/ea26c4f872008f505685f853b2cb3ef897f691aa/data/2007_007763.jpg


--------------------------------------------------------------------------------
/data/2007_007763_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksachdeva/opencv-mtcnn/ea26c4f872008f505685f853b2cb3ef897f691aa/data/2007_007763_result.jpg


--------------------------------------------------------------------------------
/data/Aaron_Peirsol_0003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksachdeva/opencv-mtcnn/ea26c4f872008f505685f853b2cb3ef897f691aa/data/Aaron_Peirsol_0003.jpg


--------------------------------------------------------------------------------
/data/dogs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksachdeva/opencv-mtcnn/ea26c4f872008f505685f853b2cb3ef897f691aa/data/dogs.jpg


--------------------------------------------------------------------------------
/data/models/det1.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksachdeva/opencv-mtcnn/ea26c4f872008f505685f853b2cb3ef897f691aa/data/models/det1.caffemodel


--------------------------------------------------------------------------------
/data/models/det1.prototxt:
--------------------------------------------------------------------------------
  1 | name: "PNet"
  2 | input: "data"
  3 | input_dim: 1
  4 | input_dim: 3
  5 | input_dim: 12
  6 | input_dim: 12
  7 | 
  8 | layer {
  9 |   name: "conv1"
 10 |   type: "Convolution"
 11 |   bottom: "data"
 12 |   top: "conv1"
 13 |   param {
 14 |     lr_mult: 1
 15 |     decay_mult: 1
 16 |   }
 17 |   param {
 18 |     lr_mult: 2
 19 |     decay_mult: 0
 20 |   }
 21 |   convolution_param {
 22 |     num_output: 10
 23 |     kernel_size: 3
 24 |     stride: 1
 25 |     weight_filler {
 26 |       type: "xavier"
 27 |     }
 28 |     bias_filler {
 29 |       type: "constant"
 30 |       value: 0
 31 |     }
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "PReLU1"
 36 |   type: "PReLU"
 37 |   bottom: "conv1"
 38 |   top: "conv1"
 39 | }
 40 | layer {
 41 |   name: "pool1"
 42 |   type: "Pooling"
 43 |   bottom: "conv1"
 44 |   top: "pool1"
 45 |   pooling_param {
 46 |     pool: MAX
 47 |     kernel_size: 2
 48 |     stride: 2
 49 |   }
 50 | }
 51 | 
 52 | layer {
 53 |   name: "conv2"
 54 |   type: "Convolution"
 55 |   bottom: "pool1"
 56 |   top: "conv2"
 57 |   param {
 58 |     lr_mult: 1
 59 |     decay_mult: 1
 60 |   }
 61 |   param {
 62 |     lr_mult: 2
 63 |     decay_mult: 0
 64 |   }
 65 |   convolution_param {
 66 |     num_output: 16
 67 |     kernel_size: 3
 68 |     stride: 1
 69 |      weight_filler {
 70 |       type: "xavier"
 71 |     }
 72 |     bias_filler {
 73 |       type: "constant"
 74 |       value: 0
 75 |     }
 76 |   }
 77 | }
 78 | layer {
 79 |   name: "PReLU2"
 80 |   type: "PReLU"
 81 |   bottom: "conv2"
 82 |   top: "conv2"
 83 | }
 84 | 
 85 | layer {
 86 |   name: "conv3"
 87 |   type: "Convolution"
 88 |   bottom: "conv2"
 89 |   top: "conv3"
 90 |   param {
 91 |     lr_mult: 1
 92 |     decay_mult: 1
 93 |   }
 94 |   param {
 95 |     lr_mult: 2
 96 |     decay_mult: 0
 97 |   }
 98 |   convolution_param {
 99 |     num_output: 32
100 |     kernel_size: 3
101 |     stride: 1
102 |      weight_filler {
103 |       type: "xavier"
104 |     }
105 |     bias_filler {
106 | 	  type: "constant"
107 |       value: 0
108 |     }
109 |   }
110 | }
111 | layer {
112 |   name: "PReLU3"
113 |   type: "PReLU"
114 |   bottom: "conv3"
115 |   top: "conv3"
116 | }
117 | 
118 | 
119 | layer {
120 |   name: "conv4-1"
121 |   type: "Convolution"
122 |   bottom: "conv3"
123 |   top: "conv4-1"
124 |   param {
125 |     lr_mult: 1
126 |     decay_mult: 1
127 |   }
128 |   param {
129 |     lr_mult: 2
130 |     decay_mult: 0
131 |   }
132 |   convolution_param {
133 |     num_output: 2
134 |     kernel_size: 1
135 |     stride: 1
136 |      weight_filler {
137 |       type: "xavier"
138 |     }
139 |     bias_filler {
140 |       type: "constant"
141 |       value: 0
142 |     }
143 |   }
144 | }
145 | 
146 | layer {
147 |   name: "conv4-2"
148 |   type: "Convolution"
149 |   bottom: "conv3"
150 |   top: "conv4-2"
151 |   param {
152 |     lr_mult: 1
153 |     decay_mult: 1
154 |   }
155 |   param {
156 |     lr_mult: 2
157 |     decay_mult: 0
158 |   }
159 |   convolution_param {
160 |     num_output: 4
161 |     kernel_size: 1
162 |     stride: 1
163 |      weight_filler {
164 |       type: "xavier"
165 | 	}
166 |     bias_filler {
167 |       type: "constant"
168 |       value: 0
169 |     }
170 |   }
171 | }
172 | layer {
173 |   name: "prob1"
174 |   type: "Softmax"
175 |   bottom: "conv4-1"
176 |   top: "prob1"
177 | }
178 | 


--------------------------------------------------------------------------------
/data/models/det2.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksachdeva/opencv-mtcnn/ea26c4f872008f505685f853b2cb3ef897f691aa/data/models/det2.caffemodel


--------------------------------------------------------------------------------
/data/models/det2.prototxt:
--------------------------------------------------------------------------------
  1 | name: "RNet"
  2 | input: "data"
  3 | input_dim: 1
  4 | input_dim: 3
  5 | input_dim: 24
  6 | input_dim: 24
  7 | 
  8 | 
  9 | ##########################
 10 | ######################
 11 | layer {
 12 |   name: "conv1"
 13 |   type: "Convolution"
 14 |   bottom: "data"
 15 |   top: "conv1"
 16 |   param {
 17 |     lr_mult: 0
 18 |     decay_mult: 0
 19 |   }
 20 |   param {
 21 |     lr_mult: 0
 22 |     decay_mult: 0
 23 |   }
 24 |   convolution_param {
 25 |     num_output: 28
 26 |     kernel_size: 3
 27 |     stride: 1
 28 |      weight_filler {
 29 |       type: "xavier"
 30 | 	}
 31 |     bias_filler {
 32 |       type: "constant"
 33 |       value: 0
 34 |     }
 35 |   }
 36 | }
 37 | layer {
 38 |   name: "prelu1"
 39 |   type: "PReLU"
 40 |   bottom: "conv1"
 41 |   top: "conv1"
 42 |   propagate_down: true
 43 | }
 44 | layer {
 45 |   name: "pool1"
 46 |   type: "Pooling"
 47 |   bottom: "conv1"
 48 |   top: "pool1"
 49 |   pooling_param {
 50 |     pool: MAX
 51 |     kernel_size: 3
 52 |     stride: 2
 53 |   }
 54 | }
 55 | 
 56 | layer {
 57 |   name: "conv2"
 58 |   type: "Convolution"
 59 |   bottom: "pool1"
 60 |   top: "conv2"
 61 |   param {
 62 |     lr_mult: 0
 63 |     decay_mult: 0
 64 |   }
 65 |   param {
 66 |     lr_mult: 0
 67 |     decay_mult: 0
 68 |   }
 69 |   convolution_param {
 70 |     num_output: 48
 71 |     kernel_size: 3
 72 |     stride: 1
 73 |     weight_filler {
 74 |       type: "xavier"
 75 | 	}
 76 |     bias_filler {
 77 |       type: "constant"
 78 |       value: 0
 79 |     }
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "prelu2"
 84 |   type: "PReLU"
 85 |   bottom: "conv2"
 86 |   top: "conv2"
 87 |   propagate_down: true
 88 | }
 89 | layer {
 90 |   name: "pool2"
 91 |   type: "Pooling"
 92 |   bottom: "conv2"
 93 |   top: "pool2"
 94 |   pooling_param {
 95 |     pool: MAX
 96 |     kernel_size: 3
 97 |     stride: 2
 98 |   }
 99 | }
100 | ####################################
101 | 
102 | ##################################
103 | layer {
104 |   name: "conv3"
105 |   type: "Convolution"
106 |   bottom: "pool2"
107 |   top: "conv3"
108 |   param {
109 |     lr_mult: 0
110 |     decay_mult: 0
111 |   }
112 |   param {
113 |     lr_mult: 0
114 |     decay_mult: 0
115 |   }
116 |   convolution_param {
117 |     num_output: 64
118 |     kernel_size: 2
119 |     stride: 1
120 |     weight_filler {
121 |       type: "xavier"
122 | 	}
123 |     bias_filler {
124 |       type: "constant"
125 |       value: 0
126 |     }
127 |   }
128 | }
129 | layer {
130 |   name: "prelu3"
131 |   type: "PReLU"
132 |   bottom: "conv3"
133 |   top: "conv3"
134 |   propagate_down: true
135 | }
136 | ###############################
137 | 
138 | ###############################
139 | 
140 | layer {
141 |   name: "conv4"
142 |   type: "InnerProduct"
143 |   bottom: "conv3"
144 |   top: "conv4"
145 |   param {
146 |     lr_mult: 0
147 |     decay_mult: 0
148 |   }
149 |   param {
150 |     lr_mult: 0
151 |     decay_mult: 0
152 |   }
153 |   inner_product_param {
154 |     num_output: 128
155 |     weight_filler {
156 |       type: "xavier"
157 | 	}
158 |     bias_filler {
159 |       type: "constant"
160 |       value: 0
161 |     }
162 |   }
163 | }
164 | layer {
165 |   name: "prelu4"
166 |   type: "PReLU"
167 |   bottom: "conv4"
168 |   top: "conv4"
169 | }
170 | 
171 | layer {
172 |   name: "conv5-1"
173 |   type: "InnerProduct"
174 |   bottom: "conv4"
175 |   top: "conv5-1"
176 |   param {
177 |     lr_mult: 0
178 |     decay_mult: 0
179 |   }
180 |   param {
181 |     lr_mult: 0
182 |     decay_mult: 0
183 |   }
184 |   inner_product_param {
185 |     num_output: 2
186 |     #kernel_size: 1
187 |     #stride: 1
188 |     weight_filler {
189 |       type: "xavier"
190 | 	}
191 |     bias_filler {
192 |       type: "constant"
193 |       value: 0
194 |     }
195 |   }
196 | }
197 | layer {
198 |   name: "conv5-2"
199 |   type: "InnerProduct"
200 |   bottom: "conv4"
201 |   top: "conv5-2"
202 |   param {
203 |     lr_mult: 1
204 |     decay_mult: 1
205 |   }
206 |   param {
207 |     lr_mult: 2
208 |     decay_mult: 1
209 |   }
210 |   inner_product_param {
211 |     num_output: 4
212 |     #kernel_size: 1
213 |     #stride: 1
214 |      weight_filler {
215 |       type: "xavier"
216 | 	}
217 |     bias_filler {
218 |       type: "constant"
219 |       value: 0
220 |     }
221 |   }
222 | }
223 | layer {
224 |   name: "prob1"
225 |   type: "Softmax"
226 |   bottom: "conv5-1"
227 |   top: "prob1"
228 | }


--------------------------------------------------------------------------------
/data/models/det3.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ksachdeva/opencv-mtcnn/ea26c4f872008f505685f853b2cb3ef897f691aa/data/models/det3.caffemodel


--------------------------------------------------------------------------------
/data/models/det3.prototxt:
--------------------------------------------------------------------------------
  1 | name: "ONet"
  2 | input: "data"
  3 | input_dim: 1
  4 | input_dim: 3
  5 | input_dim: 48
  6 | input_dim: 48
  7 | ##################################
  8 | layer {
  9 |   name: "conv1"
 10 |   type: "Convolution"
 11 |   bottom: "data"
 12 |   top: "conv1"
 13 |   param {
 14 |     lr_mult: 1
 15 |     decay_mult: 1
 16 |   }
 17 |   param {
 18 |     lr_mult: 2
 19 |     decay_mult: 1
 20 |   }
 21 |   convolution_param {
 22 |     num_output: 32
 23 |     kernel_size: 3
 24 |     stride: 1
 25 |      weight_filler {
 26 |       type: "xavier"
 27 | 	}
 28 |     bias_filler {
 29 |       type: "constant"
 30 |       value: 0
 31 |     }
 32 |   }
 33 | }
 34 | layer {
 35 |   name: "prelu1"
 36 |   type: "PReLU"
 37 |   bottom: "conv1"
 38 |   top: "conv1"
 39 | }
 40 | layer {
 41 |   name: "pool1"
 42 |   type: "Pooling"
 43 |   bottom: "conv1"
 44 |   top: "pool1"
 45 |   pooling_param {
 46 |     pool: MAX
 47 |     kernel_size: 3
 48 |     stride: 2
 49 |   }
 50 | }
 51 | layer {
 52 |   name: "conv2"
 53 |   type: "Convolution"
 54 |   bottom: "pool1"
 55 |   top: "conv2"
 56 |   param {
 57 |     lr_mult: 1
 58 |     decay_mult: 1
 59 |   }
 60 |   param {
 61 |     lr_mult: 2
 62 |     decay_mult: 1
 63 |   }
 64 |   convolution_param {
 65 |     num_output: 64
 66 |     kernel_size: 3
 67 |     stride: 1
 68 |      weight_filler {
 69 |       type: "xavier"
 70 | 	}
 71 |     bias_filler {
 72 |       type: "constant"
 73 |       value: 0
 74 |     }
 75 |   }
 76 | }
 77 | 
 78 | layer {
 79 |   name: "prelu2"
 80 |   type: "PReLU"
 81 |   bottom: "conv2"
 82 |   top: "conv2"
 83 | }
 84 | layer {
 85 |   name: "pool2"
 86 |   type: "Pooling"
 87 |   bottom: "conv2"
 88 |   top: "pool2"
 89 |   pooling_param {
 90 |     pool: MAX
 91 |     kernel_size: 3
 92 |     stride: 2
 93 |   }
 94 | }
 95 | 
 96 | layer {
 97 |   name: "conv3"
 98 |   type: "Convolution"
 99 |   bottom: "pool2"
100 |   top: "conv3"
101 |   param {
102 |     lr_mult: 1
103 |     decay_mult: 1
104 |   }
105 |   param {
106 |     lr_mult: 2
107 |     decay_mult: 1
108 |   }
109 |   convolution_param {
110 | 	num_output: 64
111 | 	kernel_size: 3
112 |     weight_filler {
113 |       type: "xavier"
114 | 	}
115 |     bias_filler {
116 |       type: "constant"
117 |       value: 0
118 |     }
119 |   }
120 | }
121 | layer {
122 |   name: "prelu3"
123 |   type: "PReLU"
124 |   bottom: "conv3"
125 |   top: "conv3"
126 | }
127 | layer {
128 |   name: "pool3"
129 |   type: "Pooling"
130 |   bottom: "conv3"
131 |   top: "pool3"
132 |   pooling_param {
133 |     pool: MAX
134 |     kernel_size: 2
135 |     stride: 2
136 |   }
137 | }
138 | layer {
139 |   name: "conv4"
140 |   type: "Convolution"
141 |   bottom: "pool3"
142 |   top: "conv4"
143 |   param {
144 |     lr_mult: 1
145 |     decay_mult: 1
146 |   }
147 |   param {
148 |     lr_mult: 2
149 |     decay_mult: 1
150 |   }
151 |   convolution_param {
152 | 	num_output: 128
153 | 	kernel_size: 2
154 |     weight_filler {
155 |       type: "xavier"
156 | 	}
157 |     bias_filler {
158 |       type: "constant"
159 |       value: 0
160 |     }
161 |   }
162 | }
163 | layer {
164 |   name: "prelu4"
165 |   type: "PReLU"
166 |   bottom: "conv4"
167 |   top: "conv4"
168 | }
169 | 
170 | 
171 | layer {
172 |   name: "conv5"
173 |   type: "InnerProduct"
174 |   bottom: "conv4"
175 |   top: "conv5"
176 |   param {
177 |     lr_mult: 1
178 |     decay_mult: 1
179 |   }
180 |   param {
181 |     lr_mult: 2
182 |     decay_mult: 1
183 |   }
184 |   inner_product_param {
185 | 	#kernel_size: 3
186 | 	num_output: 256
187 |     weight_filler {
188 |       type: "xavier"
189 | 	}
190 |     bias_filler {
191 |       type: "constant"
192 |       value: 0
193 |     }
194 |   }
195 | }
196 | 
197 | layer {
198 |   name: "drop5"
199 |   type: "Dropout"
200 |   bottom: "conv5"
201 |   top: "conv5"
202 |   dropout_param {
203 |     dropout_ratio: 0.25
204 |   }
205 | }
206 | layer {
207 |   name: "prelu5"
208 |   type: "PReLU"
209 |   bottom: "conv5"
210 |   top: "conv5"
211 | }
212 | 
213 | 
214 | layer {
215 |   name: "conv6-1"
216 |   type: "InnerProduct"
217 |   bottom: "conv5"
218 |   top: "conv6-1"
219 |   param {
220 |     lr_mult: 1
221 |     decay_mult: 1
222 |   }
223 |   param {
224 |     lr_mult: 2
225 |     decay_mult: 1
226 |   }
227 |   inner_product_param {
228 |     #kernel_size: 1
229 | 	num_output: 2
230 |     weight_filler {
231 |       type: "xavier"
232 | 	}
233 |     bias_filler {
234 |       type: "constant"
235 |       value: 0
236 |     }
237 |   }
238 | }
239 | layer {
240 |   name: "conv6-2"
241 |   type: "InnerProduct"
242 |   bottom: "conv5"
243 |   top: "conv6-2"
244 |   param {
245 |     lr_mult: 1
246 |     decay_mult: 1
247 |   }
248 |   param {
249 |     lr_mult: 2
250 |     decay_mult: 1
251 |   }
252 |   inner_product_param {
253 |   	#kernel_size: 1
254 | 	num_output: 4
255 |     weight_filler {
256 |       type: "xavier"
257 | 	}
258 |     bias_filler {
259 |       type: "constant"
260 |       value: 0
261 |     }
262 |   }
263 | }
264 | layer {
265 |   name: "conv6-3"
266 |   type: "InnerProduct"
267 |   bottom: "conv5"
268 |   top: "conv6-3"
269 |   param {
270 |     lr_mult: 1
271 |     decay_mult: 1
272 |   }
273 |   param {
274 |     lr_mult: 2
275 |     decay_mult: 1
276 |   }
277 |   inner_product_param {
278 |   	#kernel_size: 1
279 | 	num_output: 10
280 |     weight_filler {
281 |       type: "xavier"
282 | 	}
283 |     bias_filler {
284 |       type: "constant"
285 |       value: 0
286 |     }
287 |   }
288 | }
289 | layer {
290 |   name: "prob1"
291 |   type: "Softmax"
292 |   bottom: "conv6-1"
293 |   top: "prob1"
294 | }
295 | 


--------------------------------------------------------------------------------
/lib/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #configure variables
 2 | set (MTCNN_LIB_NAME "mtcnn")
 3 | 
 4 | #configure directories
 5 | set (MTCNN_LIB_PATH "${PROJECT_SOURCE_DIR}/lib")
 6 | set (MTCNN_LIB_SRC_PATH  "${MTCNN_LIB_PATH}/src" )
 7 | set (MTCNN_LIB_INCLUDE_PATH  "${MTCNN_LIB_PATH}/include")
 8 | 
 9 | find_package(OpenCV REQUIRED)
10 | 
11 | #set includes
12 | include_directories (
13 |     ${MTCNN_LIB_INCLUDE_PATH}
14 |     ${OpenCV_INCLUDE_DIRS}   
15 | )
16 | 
17 | #set sources
18 | file (GLOB MTCNN_LIB_HEADER_FILES "${MTCNN_LIB_INCLUDE_PATH}/**/*.h" "${MTCNN_LIB_INCLUDE_PATH}/**/**/*.h")
19 | file (GLOB MTCNN_LIB_SOURCE_FILES "${MTCNN_LIB_SRC_PATH}/*.cc" "${MTCNN_LIB_SRC_PATH}/**/*.cc")
20 | 
21 | #set library
22 | add_library (${MTCNN_LIB_NAME} STATIC ${MTCNN_LIB_SOURCE_FILES} ${MTCNN_LIB_HEADER_FILES})
23 | 
24 | #export vars
25 | set (MTCNN_LIB_INCLUDE_PATH  ${MTCNN_LIB_INCLUDE_PATH} PARENT_SCOPE)
26 | set (MTCNN_LIB_NAME ${MTCNN_LIB_NAME} PARENT_SCOPE)
27 | 
28 | #test
29 | enable_testing ()
30 | 


--------------------------------------------------------------------------------
/lib/include/mtcnn/detector.h:
--------------------------------------------------------------------------------
 1 | #ifndef _include_opencv_mtcnn_detector_h_
 2 | #define _include_opencv_mtcnn_detector_h_
 3 | 
 4 | #include "face.h"
 5 | #include "onet.h"
 6 | #include "pnet.h"
 7 | #include "rnet.h"
 8 | 
 9 | class MTCNNDetector {
10 | private:
11 |   std::unique_ptr<ProposalNetwork> _pnet;
12 |   std::unique_ptr<RefineNetwork> _rnet;
13 |   std::unique_ptr<OutputNetwork> _onet;
14 | 
15 | public:
16 |   MTCNNDetector(const ProposalNetwork::Config &pConfig,
17 |                 const RefineNetwork::Config &rConfig,
18 |                 const OutputNetwork::Config &oConfig);
19 |   std::vector<Face> detect(const cv::Mat &img, const float minFaceSize,
20 |                            const float scaleFactor);
21 | };
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/lib/include/mtcnn/face.h:
--------------------------------------------------------------------------------
  1 | #ifndef _include_opencv_mtcnn_face_h_
  2 | #define _include_opencv_mtcnn_face_h_
  3 | 
  4 | #include <opencv2/opencv.hpp>
  5 | 
  6 | #define NUM_REGRESSIONS 4
  7 | #define NUM_PTS 5
  8 | 
  9 | struct BBox {
 10 |   float x1;
 11 |   float y1;
 12 |   float x2;
 13 |   float y2;
 14 | 
 15 |   cv::Rect getRect() const { return cv::Rect(x1, y1, x2 - x1, y2 - y1); }
 16 | 
 17 |   BBox getSquare() const {
 18 |     BBox bbox;
 19 |     float bboxWidth = x2 - x1;
 20 |     float bboxHeight = y2 - y1;
 21 |     float side = std::max(bboxWidth, bboxHeight);
 22 |     bbox.x1 = static_cast<int>(x1 + (bboxWidth - side) * 0.5f);
 23 |     bbox.y1 = static_cast<int>(y1 + (bboxHeight - side) * 0.5f);
 24 |     bbox.x2 = static_cast<int>(bbox.x1 + side);
 25 |     bbox.y2 = static_cast<int>(bbox.y1 + side);
 26 |     return bbox;
 27 |   }
 28 | };
 29 | 
 30 | struct Face {
 31 |   BBox bbox;
 32 |   float score;
 33 |   float regression[NUM_REGRESSIONS];
 34 |   float ptsCoords[2 * NUM_PTS];
 35 | 
 36 |   static void applyRegression(std::vector<Face> &faces, bool addOne = false) {
 37 |     for (size_t i = 0; i < faces.size(); ++i) {
 38 |       float bboxWidth =
 39 |           faces[i].bbox.x2 - faces[i].bbox.x1 + static_cast<float>(addOne);
 40 |       float bboxHeight =
 41 |           faces[i].bbox.y2 - faces[i].bbox.y1 + static_cast<float>(addOne);
 42 |       faces[i].bbox.x1 = faces[i].bbox.x1 + faces[i].regression[1] * bboxWidth;
 43 |       faces[i].bbox.y1 = faces[i].bbox.y1 + faces[i].regression[0] * bboxHeight;
 44 |       faces[i].bbox.x2 = faces[i].bbox.x2 + faces[i].regression[3] * bboxWidth;
 45 |       faces[i].bbox.y2 = faces[i].bbox.y2 + faces[i].regression[2] * bboxHeight;
 46 |     }
 47 |   }
 48 | 
 49 |   static void bboxes2Squares(std::vector<Face> &faces) {
 50 |     for (size_t i = 0; i < faces.size(); ++i) {
 51 |       faces[i].bbox = faces[i].bbox.getSquare();
 52 |     }
 53 |   }
 54 | 
 55 |   static std::vector<Face> runNMS(std::vector<Face> &faces, float threshold,
 56 |                                   bool useMin = false) {
 57 |     std::vector<Face> facesNMS;
 58 |     if (faces.empty()) {
 59 |       return facesNMS;
 60 |     }
 61 | 
 62 |     std::sort(faces.begin(), faces.end(), [](const Face &f1, const Face &f2) {
 63 |       return f1.score > f2.score;
 64 |     });
 65 | 
 66 |     std::vector<int> indices(faces.size());
 67 |     for (size_t i = 0; i < indices.size(); ++i) {
 68 |       indices[i] = i;
 69 |     }
 70 | 
 71 |     while (indices.size() > 0) {
 72 |       int idx = indices[0];
 73 |       facesNMS.push_back(faces[idx]);
 74 |       std::vector<int> tmpIndices = indices;
 75 |       indices.clear();
 76 |       for (size_t i = 1; i < tmpIndices.size(); ++i) {
 77 |         int tmpIdx = tmpIndices[i];
 78 |         float interX1 = std::max(faces[idx].bbox.x1, faces[tmpIdx].bbox.x1);
 79 |         float interY1 = std::max(faces[idx].bbox.y1, faces[tmpIdx].bbox.y1);
 80 |         float interX2 = std::min(faces[idx].bbox.x2, faces[tmpIdx].bbox.x2);
 81 |         float interY2 = std::min(faces[idx].bbox.y2, faces[tmpIdx].bbox.y2);
 82 | 
 83 |         float bboxWidth = std::max(0.f, (interX2 - interX1 + 1));
 84 |         float bboxHeight = std::max(0.f, (interY2 - interY1 + 1));
 85 | 
 86 |         float interArea = bboxWidth * bboxHeight;
 87 |         // TODO: compute outside the loop
 88 |         float area1 = (faces[idx].bbox.x2 - faces[idx].bbox.x1 + 1) *
 89 |                       (faces[idx].bbox.y2 - faces[idx].bbox.y1 + 1);
 90 |         float area2 = (faces[tmpIdx].bbox.x2 - faces[tmpIdx].bbox.x1 + 1) *
 91 |                       (faces[tmpIdx].bbox.y2 - faces[tmpIdx].bbox.y1 + 1);
 92 |         float o = 0.f;
 93 |         if (useMin) {
 94 |           o = interArea / std::min(area1, area2);
 95 |         } else {
 96 |           o = interArea / (area1 + area2 - interArea);
 97 |         }
 98 |         if (o <= threshold) {
 99 |           indices.push_back(tmpIdx);
100 |         }
101 |       }
102 |     }
103 |     return facesNMS;
104 |   }
105 | };
106 | 
107 | #endif
108 | 


--------------------------------------------------------------------------------
/lib/include/mtcnn/helpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef _include_opencv_helpers_h_
 2 | #define _include_opencv_helpers_h_
 3 | 
 4 | #include <opencv2/core.hpp>
 5 | 
 6 | inline cv::Mat cropImage(const cv::Mat &img, cv::Rect r) {
 7 |   cv::Mat m = cv::Mat::zeros(r.height, r.width, img.type());
 8 |   int dx = std::abs(std::min(0, r.x));
 9 |   if (dx > 0) {
10 |     r.x = 0;
11 |   }
12 |   r.width -= dx;
13 |   int dy = std::abs(std::min(0, r.y));
14 |   if (dy > 0) {
15 |     r.y = 0;
16 |   }
17 |   r.height -= dy;
18 |   int dw = std::abs(std::min(0, img.cols - 1 - (r.x + r.width)));
19 |   r.width -= dw;
20 |   int dh = std::abs(std::min(0, img.rows - 1 - (r.y + r.height)));
21 |   r.height -= dh;
22 |   if (r.width > 0 && r.height > 0) {
23 |     img(r).copyTo(m(cv::Range(dy, dy + r.height), cv::Range(dx, dx + r.width)));
24 |   }
25 |   return m;
26 | }
27 | 
28 | #endif


--------------------------------------------------------------------------------
/lib/include/mtcnn/onet.h:
--------------------------------------------------------------------------------
 1 | #ifndef _include_opencv_onet_h_
 2 | #define _include_opencv_onet_h_
 3 | 
 4 | #include "face.h"
 5 | #include <opencv2/dnn.hpp>
 6 | 
 7 | class OutputNetwork {
 8 | public:
 9 |   struct Config {
10 |   public:
11 |     std::string protoText;
12 |     std::string caffeModel;
13 |     float threshold;
14 |   };
15 | 
16 | private:
17 |   cv::dnn::Net _net;
18 |   float _threshold;
19 | 
20 | public:
21 |   OutputNetwork(const OutputNetwork::Config &config);
22 |   OutputNetwork();
23 | 
24 | private:
25 |   OutputNetwork(const OutputNetwork &rhs) = delete;
26 |   OutputNetwork &operator=(const OutputNetwork &rhs) = delete;
27 | 
28 | public:
29 |   std::vector<Face> run(const cv::Mat &img, const std::vector<Face> &faces);
30 | };
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/lib/include/mtcnn/pnet.h:
--------------------------------------------------------------------------------
 1 | #ifndef _include_opencv_pnet_h_
 2 | #define _include_opencv_pnet_h_
 3 | 
 4 | #include "face.h"
 5 | #include <opencv2/dnn.hpp>
 6 | 
 7 | class ProposalNetwork {
 8 | public:
 9 |   struct Config {
10 |   public:
11 |     std::string protoText;
12 |     std::string caffeModel;
13 |     float threshold;
14 |     float nmsThreshold;
15 |   };
16 | 
17 | private:
18 |   cv::dnn::Net _net;
19 |   float _threshold;
20 | 
21 | private:
22 |   std::vector<Face> buildFaces(const cv::Mat &scores,
23 |                                const cv::Mat &regressions,
24 |                                const float scaleFactor, const float threshold);
25 | 
26 | public:
27 |   ProposalNetwork(const ProposalNetwork::Config &config);
28 |   ~ProposalNetwork();
29 | 
30 | private:
31 |   ProposalNetwork(const ProposalNetwork &rhs) = delete;
32 |   ProposalNetwork &operator=(const ProposalNetwork &rhs) = delete;
33 | 
34 | public:
35 |   std::vector<Face> run(const cv::Mat &img, const float minFaceSize,
36 |                         const float scaleFactor);
37 | };
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/lib/include/mtcnn/rnet.h:
--------------------------------------------------------------------------------
 1 | #ifndef _include_opencv_rnet_h_
 2 | #define _include_opencv_rnet_h_
 3 | 
 4 | #include "face.h"
 5 | #include <opencv2/dnn.hpp>
 6 | 
 7 | class RefineNetwork {
 8 | public:
 9 |   struct Config {
10 |   public:
11 |     std::string protoText;
12 |     std::string caffeModel;
13 |     float threshold;
14 |   };
15 | 
16 | private:
17 |   cv::dnn::Net _net;
18 |   float _threshold;
19 | 
20 | public:
21 |   RefineNetwork(const RefineNetwork::Config &config);
22 |   ~RefineNetwork();
23 | 
24 | private:
25 |   RefineNetwork(const RefineNetwork &rhs) = delete;
26 |   RefineNetwork &operator=(const RefineNetwork &rhs) = delete;
27 | 
28 | public:
29 |   std::vector<Face> run(const cv::Mat &img, const std::vector<Face> &faces);
30 | };
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/lib/src/detector.cc:
--------------------------------------------------------------------------------
 1 | #include "mtcnn/detector.h"
 2 | 
 3 | MTCNNDetector::MTCNNDetector(const ProposalNetwork::Config &pConfig,
 4 |                              const RefineNetwork::Config &rConfig,
 5 |                              const OutputNetwork::Config &oConfig) {
 6 |   _pnet = std::unique_ptr<ProposalNetwork>(new ProposalNetwork(pConfig));
 7 |   _rnet = std::unique_ptr<RefineNetwork>(new RefineNetwork(rConfig));
 8 |   _onet = std::unique_ptr<OutputNetwork>(new OutputNetwork(oConfig));
 9 | }
10 | 
11 | std::vector<Face> MTCNNDetector::detect(const cv::Mat &img,
12 |                                         const float minFaceSize,
13 |                                         const float scaleFactor) {
14 | 
15 |   cv::Mat rgbImg;
16 |   if (img.channels() == 3) {
17 |     cv::cvtColor(img, rgbImg, CV_BGR2RGB);
18 |   } else if (img.channels() == 4) {
19 |     cv::cvtColor(img, rgbImg, CV_BGRA2RGB);
20 |   }
21 |   if (rgbImg.empty()) {
22 |     return std::vector<Face>();
23 |   }
24 |   rgbImg.convertTo(rgbImg, CV_32FC3);
25 |   rgbImg = rgbImg.t();
26 | 
27 |   // Run Proposal Network to find the initial set of faces
28 |   std::vector<Face> faces = _pnet->run(rgbImg, minFaceSize, scaleFactor);
29 | 
30 |   // Early exit if we do not have any faces
31 |   if (faces.empty()) {
32 |     return faces;
33 |   }
34 | 
35 |   // Run Refine network on the output of the Proposal network
36 |   faces = _rnet->run(rgbImg, faces);
37 | 
38 |   // Early exit if we do not have any faces
39 |   if (faces.empty()) {
40 |     return faces;
41 |   }
42 | 
43 |   // Run Output network on the output of the Refine network
44 |   faces = _onet->run(rgbImg, faces);
45 | 
46 |   for (size_t i = 0; i < faces.size(); ++i) {
47 |     std::swap(faces[i].bbox.x1, faces[i].bbox.y1);
48 |     std::swap(faces[i].bbox.x2, faces[i].bbox.y2);
49 |     for (int p = 0; p < NUM_PTS; ++p) {
50 |       std::swap(faces[i].ptsCoords[2 * p], faces[i].ptsCoords[2 * p + 1]);
51 |     }
52 |   }
53 | 
54 |   return faces;
55 | }
56 | 


--------------------------------------------------------------------------------
/lib/src/onet.cc:
--------------------------------------------------------------------------------
 1 | #include "mtcnn/onet.h"
 2 | #include "mtcnn/helpers.h"
 3 | 
 4 | const int INPUT_DATA_WIDTH = 48;
 5 | const int INPUT_DATA_HEIGHT = 48;
 6 | 
 7 | const float IMG_MEAN = 127.5f;
 8 | const float IMG_INV_STDDEV = 1.f / 128.f;
 9 | 
10 | OutputNetwork::OutputNetwork(const OutputNetwork::Config &config) {
11 |   _net = cv::dnn::readNetFromCaffe(config.protoText, config.caffeModel);
12 |   if (_net.empty()) {
13 |     throw std::invalid_argument("invalid protoText or caffeModel");
14 |   }
15 |   _threshold = config.threshold;
16 | }
17 | 
18 | OutputNetwork::OutputNetwork() {}
19 | 
20 | std::vector<Face> OutputNetwork::run(const cv::Mat &img,
21 |                                      const std::vector<Face> &faces) {
22 |   cv::Size windowSize = cv::Size(INPUT_DATA_WIDTH, INPUT_DATA_HEIGHT);
23 | 
24 |   std::vector<Face> totalFaces;
25 | 
26 |   for (auto &f : faces) {
27 |     cv::Mat roi = cropImage(img, f.bbox.getRect());
28 |     cv::resize(roi, roi, windowSize, 0, 0, cv::INTER_AREA);
29 | 
30 |     // we will run the ONet on each face
31 |     // TODO : see how this can be optimized such that we run
32 |     // it only 1 time
33 | 
34 |     // build blob images from the inputs
35 |     auto blobInput =
36 |         cv::dnn::blobFromImage(roi, IMG_INV_STDDEV, cv::Size(),
37 |                                cv::Scalar(IMG_MEAN, IMG_MEAN, IMG_MEAN), false);
38 | 
39 |     _net.setInput(blobInput, "data");
40 | 
41 |     const std::vector<cv::String> outBlobNames{"conv6-2", "conv6-3", "prob1"};
42 |     std::vector<cv::Mat> outputBlobs;
43 | 
44 |     _net.forward(outputBlobs, outBlobNames);
45 | 
46 |     cv::Mat regressionsBlob = outputBlobs[0];
47 |     cv::Mat landMarkBlob = outputBlobs[1];
48 |     cv::Mat scoresBlob = outputBlobs[2];
49 | 
50 |     const float *scores_data = (float *)scoresBlob.data;
51 |     const float *landmark_data = (float *)landMarkBlob.data;
52 |     const float *reg_data = (float *)regressionsBlob.data;
53 | 
54 |     if (scores_data[1] >= _threshold) {
55 |       Face info = f;
56 |       info.score = scores_data[1];
57 |       for (int i = 0; i < 4; ++i) {
58 |         info.regression[i] = reg_data[i];
59 |       }
60 | 
61 |       float w = info.bbox.x2 - info.bbox.x1 + 1.f;
62 |       float h = info.bbox.y2 - info.bbox.y1 + 1.f;
63 | 
64 |       for (int p = 0; p < NUM_PTS; ++p) {
65 |         info.ptsCoords[2 * p] =
66 |             info.bbox.x1 + landmark_data[NUM_PTS + p] * w - 1;
67 |         info.ptsCoords[2 * p + 1] = info.bbox.y1 + landmark_data[p] * h - 1;
68 |       }
69 | 
70 |       totalFaces.push_back(info);
71 |     }
72 |   }
73 | 
74 |   Face::applyRegression(totalFaces, true);
75 |   totalFaces = Face::runNMS(totalFaces, 0.7f, true);
76 | 
77 |   return totalFaces;
78 | }
79 | 


--------------------------------------------------------------------------------
/lib/src/pnet.cc:
--------------------------------------------------------------------------------
  1 | #include "mtcnn/pnet.h"
  2 | 
  3 | const float P_NET_WINDOW_SIZE = 12.f;
  4 | const int P_NET_STRIDE = 2;
  5 | 
  6 | const float IMG_MEAN = 127.5f;
  7 | const float IMG_INV_STDDEV = 1.f / 128.f;
  8 | 
  9 | ProposalNetwork::ProposalNetwork(const ProposalNetwork::Config &config) {
 10 |   _net = cv::dnn::readNetFromCaffe(config.protoText, config.caffeModel);
 11 |   if (_net.empty()) {
 12 |     throw std::invalid_argument("invalid protoText or caffeModel");
 13 |   }
 14 |   _threshold = config.threshold;
 15 | }
 16 | 
 17 | ProposalNetwork::~ProposalNetwork() {}
 18 | 
 19 | std::vector<Face> ProposalNetwork::buildFaces(const cv::Mat &scores,
 20 |                                               const cv::Mat &regressions,
 21 |                                               const float scaleFactor,
 22 |                                               const float threshold) {
 23 | 
 24 |   auto w = scores.size[3];
 25 |   auto h = scores.size[2];
 26 |   auto size = w * h;
 27 | 
 28 |   const float *scores_data = (float *)(scores.data);
 29 |   scores_data += size;
 30 | 
 31 |   const float *reg_data = (float *)(regressions.data);
 32 | 
 33 |   std::vector<Face> boxes;
 34 | 
 35 |   for (int i = 0; i < size; i++) {
 36 |     if (scores_data[i] >= (threshold)) {
 37 |       int y = i / w;
 38 |       int x = i - w * y;
 39 | 
 40 |       Face faceInfo;
 41 |       BBox &faceBox = faceInfo.bbox;
 42 | 
 43 |       faceBox.x1 = (float)(x * P_NET_STRIDE) / scaleFactor;
 44 |       faceBox.y1 = (float)(y * P_NET_STRIDE) / scaleFactor;
 45 |       faceBox.x2 =
 46 |           (float)(x * P_NET_STRIDE + P_NET_WINDOW_SIZE - 1.f) / scaleFactor;
 47 |       faceBox.y2 =
 48 |           (float)(y * P_NET_STRIDE + P_NET_WINDOW_SIZE - 1.f) / scaleFactor;
 49 |       faceInfo.regression[0] = reg_data[i];
 50 |       faceInfo.regression[1] = reg_data[i + size];
 51 |       faceInfo.regression[2] = reg_data[i + 2 * size];
 52 |       faceInfo.regression[3] = reg_data[i + 3 * size];
 53 |       faceInfo.score = scores_data[i];
 54 |       boxes.push_back(faceInfo);
 55 |     }
 56 |   }
 57 | 
 58 |   return boxes;
 59 | }
 60 | 
 61 | std::vector<Face> ProposalNetwork::run(const cv::Mat &img,
 62 |                                        const float minFaceSize,
 63 |                                        const float scaleFactor) {
 64 | 
 65 |   std::vector<Face> finalFaces;
 66 |   float maxFaceSize = static_cast<float>(std::min(img.rows, img.cols));
 67 |   float faceSize = minFaceSize;
 68 | 
 69 |   while (faceSize <= maxFaceSize) {
 70 |     float currentScale = (P_NET_WINDOW_SIZE) / faceSize;
 71 |     int imgHeight = std::ceil(img.rows * currentScale);
 72 |     int imgWidth = std::ceil(img.cols * currentScale);
 73 |     cv::Mat resizedImg;
 74 |     cv::resize(img, resizedImg, cv::Size(imgWidth, imgHeight), 0, 0,
 75 |                cv::INTER_AREA);
 76 | 
 77 |     // feed it to the proposal network
 78 |     cv::Mat inputBlob =
 79 |         cv::dnn::blobFromImage(resizedImg, IMG_INV_STDDEV, cv::Size(),
 80 |                                cv::Scalar(IMG_MEAN, IMG_MEAN, IMG_MEAN), false);
 81 | 
 82 |     _net.setInput(inputBlob, "data");
 83 | 
 84 |     const std::vector<cv::String> outBlobNames{"conv4-2", "prob1"};
 85 |     std::vector<cv::Mat> outputBlobs;
 86 | 
 87 |     _net.forward(outputBlobs, outBlobNames);
 88 | 
 89 |     cv::Mat regressionsBlob = outputBlobs[0];
 90 |     cv::Mat scoresBlob = outputBlobs[1];
 91 | 
 92 |     auto faces =
 93 |         buildFaces(scoresBlob, regressionsBlob, currentScale, _threshold);
 94 | 
 95 |     if (!faces.empty()) {
 96 |       faces = Face::runNMS(faces, 0.5f);
 97 |     }
 98 | 
 99 |     if (!faces.empty()) {
100 |       finalFaces.insert(finalFaces.end(), faces.begin(), faces.end());
101 |     }
102 | 
103 |     faceSize /= scaleFactor;
104 |   }
105 | 
106 |   if (!finalFaces.empty()) {
107 |     finalFaces = Face::runNMS(finalFaces, 0.7f);
108 |     if (!finalFaces.empty()) {
109 |       Face::applyRegression(finalFaces, false);
110 |       Face::bboxes2Squares(finalFaces);
111 |     }
112 |   }
113 | 
114 |   return finalFaces;
115 | }
116 | 


--------------------------------------------------------------------------------
/lib/src/rnet.cc:
--------------------------------------------------------------------------------
 1 | #include "mtcnn/rnet.h"
 2 | #include "mtcnn/helpers.h"
 3 | 
 4 | const int INPUT_DATA_WIDTH = 24;
 5 | const int INPUT_DATA_HEIGHT = 24;
 6 | 
 7 | const float IMG_MEAN = 127.5f;
 8 | const float IMG_INV_STDDEV = 1.f / 128.f;
 9 | 
10 | RefineNetwork::RefineNetwork(const RefineNetwork::Config &config) {
11 |   _net = cv::dnn::readNetFromCaffe(config.protoText, config.caffeModel);
12 |   if (_net.empty()) {
13 |     throw std::invalid_argument("invalid protoText or caffeModel");
14 |   }
15 |   _threshold = config.threshold;
16 | }
17 | 
18 | RefineNetwork::~RefineNetwork() {}
19 | 
20 | std::vector<Face> RefineNetwork::run(const cv::Mat &img,
21 |                                      const std::vector<Face> &faces) {
22 |   cv::Size windowSize = cv::Size(INPUT_DATA_WIDTH, INPUT_DATA_HEIGHT);
23 | 
24 |   std::vector<cv::Mat> inputs;
25 |   for (auto &f : faces) {
26 |     cv::Mat roi = cropImage(img, f.bbox.getRect());
27 |     cv::resize(roi, roi, windowSize, 0, 0, cv::INTER_AREA);
28 |     inputs.push_back(roi);
29 |   }
30 | 
31 |   // build blob images from the inputs
32 |   auto blobInputs =
33 |       cv::dnn::blobFromImages(inputs, IMG_INV_STDDEV, cv::Size(),
34 |                               cv::Scalar(IMG_MEAN, IMG_MEAN, IMG_MEAN), false);
35 | 
36 |   _net.setInput(blobInputs, "data");
37 | 
38 |   const std::vector<cv::String> outBlobNames{"conv5-2", "prob1"};
39 |   std::vector<cv::Mat> outputBlobs;
40 | 
41 |   _net.forward(outputBlobs, outBlobNames);
42 | 
43 |   cv::Mat regressionsBlob = outputBlobs[0];
44 |   cv::Mat scoresBlob = outputBlobs[1];
45 | 
46 |   std::vector<Face> totalFaces;
47 | 
48 |   const float *scores_data = (float *)scoresBlob.data;
49 |   const float *reg_data = (float *)regressionsBlob.data;
50 | 
51 |   for (int k = 0; k < faces.size(); ++k) {
52 |     if (scores_data[2 * k + 1] >= _threshold) {
53 |       Face info = faces[k];
54 |       info.score = scores_data[2 * k + 1];
55 |       for (int i = 0; i < 4; ++i) {
56 |         info.regression[i] = reg_data[4 * k + i];
57 |       }
58 |       totalFaces.push_back(info);
59 |     }
60 |   }
61 | 
62 |   // nms and regression
63 |   totalFaces = Face::runNMS(totalFaces, 0.7f);
64 |   Face::applyRegression(totalFaces, true);
65 |   Face::bboxes2Squares(totalFaces);
66 | 
67 |   return totalFaces;
68 | }
69 | 


--------------------------------------------------------------------------------
/sample/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #configure variables
 2 | set (SAMPLE_NAME "app")
 3 | 
 4 | #configure directories
 5 | set (SAMPLE_PATH "${PROJECT_SOURCE_DIR}/sample")
 6 | set (SAMPLE_SRC_PATH  "${SAMPLE_PATH}/src" )
 7 | set (SAMPLE_INCLUDE_PATH "${SAMPLE_PATH}/include")
 8 | 
 9 | find_package(OpenCV REQUIRED)
10 | 
11 | set(Boost_USE_STATIC_LIBS   ON)
12 | FIND_PACKAGE(Boost 1.58 COMPONENTS filesystem timer REQUIRED)
13 | 
14 | #set includes
15 | include_directories (
16 |     ${SAMPLE_INCLUDE_PATH}
17 |     ${MTCNN_LIB_INCLUDE_PATH}
18 | )
19 | 
20 | #set sources
21 | file (GLOB SAMPLE_SOURCE_FILES "${SAMPLE_SRC_PATH}/*.cc")
22 | file (GLOB SAMPLE_HEADER_FILES "${SAMPLE_INCLUDE_PATH}/*.h")
23 | 
24 | #set target executable
25 | add_executable (${SAMPLE_NAME} ${SAMPLE_SOURCE_FILES} ${SAMPLE_HEADER_FILES})
26 | 
27 | if (WIN32)
28 | set(LINK_OPTIONS shlwapi.lib)
29 | endif()
30 | 
31 | if ("${CMAKE_SYSTEM_NAME}" MATCHES "Linux")
32 | set(LINK_OPTIONS -pthread -ldl -static-libgcc -static-libstdc++)
33 | endif()
34 | 
35 | if (Darwin)
36 | set(LINK_OPTIONS -pthread)
37 | endif()
38 | 
39 | #add the library
40 | target_link_libraries (
41 |     ${SAMPLE_NAME}
42 |     ${OpenCV_LIBRARIES}
43 |     ${Boost_LIBRARIES}
44 |     ${MTCNN_LIB_NAME}
45 |     ${LINK_OPTIONS})
46 | 
47 | #test
48 | enable_testing ()
49 | 


--------------------------------------------------------------------------------
/sample/src/main.cc:
--------------------------------------------------------------------------------
 1 | #include <boost/filesystem.hpp>
 2 | #include <boost/filesystem/path.hpp>
 3 | #include <boost/timer/timer.hpp>
 4 | 
 5 | #include <opencv2/dnn.hpp>
 6 | #include <opencv2/highgui.hpp>
 7 | #include <opencv2/imgproc.hpp>
 8 | 
 9 | #include <mtcnn/detector.h>
10 | 
11 | namespace fs = boost::filesystem;
12 | 
13 | using rectPoints = std::pair<cv::Rect, std::vector<cv::Point>>;
14 | 
15 | static cv::Mat drawRectsAndPoints(const cv::Mat &img,
16 |                                   const std::vector<rectPoints> data) {
17 |   cv::Mat outImg;
18 |   img.convertTo(outImg, CV_8UC3);
19 | 
20 |   for (auto &d : data) {
21 |     cv::rectangle(outImg, d.first, cv::Scalar(0, 0, 255));
22 |     auto pts = d.second;
23 |     for (size_t i = 0; i < pts.size(); ++i) {
24 |       cv::circle(outImg, pts[i], 3, cv::Scalar(0, 0, 255));
25 |     }
26 |   }
27 |   return outImg;
28 | }
29 | 
30 | int main(int argc, char **argv) {
31 | 
32 |   if (argc < 3) {
33 |     std::cerr << "Usage " << argv[0] << ": "
34 |               << "<model-dir> "
35 |               << " "
36 |               << "<test-image>\n";
37 |     return -1;
38 |   }
39 | 
40 |   fs::path modelDir = fs::path(argv[1]);
41 | 
42 |   ProposalNetwork::Config pConfig;
43 |   pConfig.caffeModel = (modelDir / "det1.caffemodel").string();
44 |   pConfig.protoText = (modelDir / "det1.prototxt").string();
45 |   pConfig.threshold = 0.6f;
46 | 
47 |   RefineNetwork::Config rConfig;
48 |   rConfig.caffeModel = (modelDir / "det2.caffemodel").string();
49 |   rConfig.protoText = (modelDir / "det2.prototxt").string();
50 |   rConfig.threshold = 0.7f;
51 | 
52 |   OutputNetwork::Config oConfig;
53 |   oConfig.caffeModel = (modelDir / "det3.caffemodel").string();
54 |   oConfig.protoText = (modelDir / "det3.prototxt").string();
55 |   oConfig.threshold = 0.7f;
56 | 
57 |   MTCNNDetector detector(pConfig, rConfig, oConfig);
58 |   cv::Mat img = cv::imread(argv[2]);
59 | 
60 |   std::vector<Face> faces;
61 | 
62 |   {
63 |     boost::timer::auto_cpu_timer t(3, "%w seconds\n");
64 |     faces = detector.detect(img, 20.f, 0.709f);
65 |   }
66 | 
67 |   std::cout << "Number of faces found in the supplied image - " << faces.size()
68 |             << std::endl;
69 | 
70 |   std::vector<rectPoints> data;
71 | 
72 |   // show the image with faces in it
73 |   for (size_t i = 0; i < faces.size(); ++i) {
74 |     std::vector<cv::Point> pts;
75 |     for (int p = 0; p < NUM_PTS; ++p) {
76 |       pts.push_back(
77 |           cv::Point(faces[i].ptsCoords[2 * p], faces[i].ptsCoords[2 * p + 1]));
78 |     }
79 | 
80 |     auto rect = faces[i].bbox.getRect();
81 |     auto d = std::make_pair(rect, pts);
82 |     data.push_back(d);
83 |   }
84 | 
85 |   auto resultImg = drawRectsAndPoints(img, data);
86 |   cv::imshow("test-oc", resultImg);
87 |   cv::waitKey(0);
88 | 
89 |   return 0;
90 | }
91 | 


--------------------------------------------------------------------------------