├── 3065-1min.mp4
├── 3065-slides.pdf
├── Bias-FQA
├── adience_age.png
├── colorferet_ethnics.png
├── colorferet_pose.png
├── info
├── quality_distribution_SER-FIQ_adience_arcface_age.png
├── quality_distribution_SER-FIQ_colorferet_arcface_ethnic.png
├── quality_distribution_SER-FIQ_colorferet_arcface_pose.png
├── stack_SER-FIQ_adience_arcface_age.png
├── stack_SER-FIQ_adience_arcface_as_percent=True_v1.png
├── stack_SER-FIQ_colorferet_arcface_as_percent=True_v1.png
├── stack_SER-FIQ_colorferet_arcface_ethnic.png
└── stack_SER-FIQ_colorferet_arcface_pose.png
├── CVPR_2020_teaser_1200x1200.gif
├── FQA-Results
├── 001FMR_adience_arcface.png
├── 001FMR_lfw_arcface.png
└── info
├── README.md
├── Supplementary
└── info
├── data
├── img_src.txt
├── test_img.jpeg
└── test_img2.jpeg
├── face_image_quality.py
├── insightface
├── license.txt
├── model
│ └── download_link_license.txt
├── mtcnn-model
│ ├── det1-0001.params
│ ├── det1-symbol.json
│ ├── det1.caffemodel
│ ├── det1.prototxt
│ ├── det2-0001.params
│ ├── det2-symbol.json
│ ├── det2.caffemodel
│ ├── det2.prototxt
│ ├── det3-0001.params
│ ├── det3-symbol.json
│ ├── det3.caffemodel
│ ├── det3.prototxt
│ ├── det4-0001.params
│ ├── det4-symbol.json
│ ├── det4.caffemodel
│ └── det4.prototxt
└── src
│ ├── face_preprocess.py
│ ├── helper.py
│ └── mtcnn_detector.py
├── requirements.txt
└── serfiq_example.py
/3065-1min.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/3065-1min.mp4
--------------------------------------------------------------------------------
/3065-slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/3065-slides.pdf
--------------------------------------------------------------------------------
/Bias-FQA/adience_age.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/Bias-FQA/adience_age.png
--------------------------------------------------------------------------------
/Bias-FQA/colorferet_ethnics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/Bias-FQA/colorferet_ethnics.png
--------------------------------------------------------------------------------
/Bias-FQA/colorferet_pose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/Bias-FQA/colorferet_pose.png
--------------------------------------------------------------------------------
/Bias-FQA/info:
--------------------------------------------------------------------------------
1 | Some results on bias in face quality assessments
2 |
--------------------------------------------------------------------------------
/Bias-FQA/quality_distribution_SER-FIQ_adience_arcface_age.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/Bias-FQA/quality_distribution_SER-FIQ_adience_arcface_age.png
--------------------------------------------------------------------------------
/Bias-FQA/quality_distribution_SER-FIQ_colorferet_arcface_ethnic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/Bias-FQA/quality_distribution_SER-FIQ_colorferet_arcface_ethnic.png
--------------------------------------------------------------------------------
/Bias-FQA/quality_distribution_SER-FIQ_colorferet_arcface_pose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/Bias-FQA/quality_distribution_SER-FIQ_colorferet_arcface_pose.png
--------------------------------------------------------------------------------
/Bias-FQA/stack_SER-FIQ_adience_arcface_age.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/Bias-FQA/stack_SER-FIQ_adience_arcface_age.png
--------------------------------------------------------------------------------
/Bias-FQA/stack_SER-FIQ_adience_arcface_as_percent=True_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/Bias-FQA/stack_SER-FIQ_adience_arcface_as_percent=True_v1.png
--------------------------------------------------------------------------------
/Bias-FQA/stack_SER-FIQ_colorferet_arcface_as_percent=True_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/Bias-FQA/stack_SER-FIQ_colorferet_arcface_as_percent=True_v1.png
--------------------------------------------------------------------------------
/Bias-FQA/stack_SER-FIQ_colorferet_arcface_ethnic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/Bias-FQA/stack_SER-FIQ_colorferet_arcface_ethnic.png
--------------------------------------------------------------------------------
/Bias-FQA/stack_SER-FIQ_colorferet_arcface_pose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/Bias-FQA/stack_SER-FIQ_colorferet_arcface_pose.png
--------------------------------------------------------------------------------
/CVPR_2020_teaser_1200x1200.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/CVPR_2020_teaser_1200x1200.gif
--------------------------------------------------------------------------------
/FQA-Results/001FMR_adience_arcface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/FQA-Results/001FMR_adience_arcface.png
--------------------------------------------------------------------------------
/FQA-Results/001FMR_lfw_arcface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/FQA-Results/001FMR_lfw_arcface.png
--------------------------------------------------------------------------------
/FQA-Results/info:
--------------------------------------------------------------------------------
1 | Some results from the CVPR 2020 paper
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Face Image Quality Assessment
2 |
3 | ***15.05.2020*** _SER-FIQ (CVPR2020) was added._
4 |
5 | ***18.05.2020*** _Bias in FIQ (IJCB2020) was added._
6 |
7 | ***13.08.2021*** _The implementation now outputs normalized quality values._
8 |
9 | ***30.11.2021*** _Related works section was added_
10 |
11 |
12 | ## SER-FIQ: Unsupervised Estimation of Face Image Quality Based on Stochastic Embedding Robustness
13 |
14 |
15 |
16 | IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) 2020
17 |
18 | * [Research Paper](https://arxiv.org/abs/2003.09373)
19 | * [Implementation on ArcFace](face_image_quality.py)
20 | * [Video](https://www.youtube.com/watch?v=soW_Gg4NElc)
21 |
22 |
23 | ## Table of Contents
24 |
25 | - [Abstract](#abstract)
26 | - [Key Points](#key-points)
27 | - [Results](#results)
28 | - [Installation](#installation)
29 | - [Bias in Face Quality Assessment](#bias-in-face-quality-assessment)
30 | - [Related Works](#related-works)
31 | - [Citing](#citing)
32 | - [Acknowledgement](#acknowledgement)
33 | - [License](#license)
34 |
35 | ## Abstract
36 |
37 |
38 |
39 | Face image quality is an important factor to enable high-performance face recognition systems. Face quality assessment aims at estimating the suitability of a face image for recognition. Previous works proposed supervised solutions that require artificially or human labelled quality values. However, both labelling mechanisms are error-prone as they do not rely on a clear definition of quality and may not know the best characteristics for the utilized face recognition system. Avoiding the use of inaccurate quality labels, we proposed a novel concept to measure face quality based on an arbitrary face recognition model. By determining the embedding variations generated from random subnetworks of a face model, the robustness of a sample representation and thus, its quality is estimated. The experiments are conducted in a cross-database evaluation setting on three publicly available databases. We compare our proposed solution on two face embeddings against six state-of-the-art approaches from academia and industry. The results show that our unsupervised solution outperforms all other approaches in the majority of the investigated scenarios. In contrast to previous works, the proposed solution shows a stable performance over all scenarios. Utilizing the deployed face recognition model for our face quality assessment methodology avoids the training phase completely and further outperforms all baseline approaches by a large margin. Our solution can be easily integrated into current face recognition systems and can be modified to other tasks beyond face recognition.
40 |
41 | ## Key Points
42 |
43 | - Quality assessment with SER-FIQ is most effective when the quality measure is based on the deployed face recognition network, meaning that **the quality estimation and the recognition should be performed on the same network**. This way the quality estimation captures the same decision patterns as the face recognition system. If you use this model from this GitHub for your research, please make sure to label it as "SER-FIQ (on ArcFace)" since this is the underlying recognition model.
44 | - To get accurate quality estimations, the underlying face recognition network for SER-FIQ should be **trained with dropout**. This is suggested since our solution utilizes the robustness against dropout variations as a quality indicator.
45 | - The provided code is only a demonstration on how SER-FIQ can be utilized. The main contribution of SER-FIQ is the novel concept of measuring face image quality.
46 | - If the last layer contains dropout, it is sufficient to repeat the stochastic forward passes only on this layer. This significantly reduces the computation time to a time span of a face template generation. On ResNet-100, it takes 24.2 GFLOPS for creating an embedding and only 26.8 GFLOPS (+10%) for estimating the quality.
47 |
48 | ## Results
49 |
50 | Face image quality assessment results are shown below on LFW (left) and Adience (right). SER-FIQ (same model) is based on ArcFace and shown in red. The plots show the FNMR at  FMR as recommended by the [best practice guidelines](https://op.europa.eu/en/publication-detail/-/publication/e81d082d-20a8-11e6-86d0-01aa75ed71a1) of the European Border Guard Agency Frontex. For more details and results, please take a look at the paper.
51 |
52 |
53 |
54 | ## Installation
55 |
56 | We recommend using a virtual environment to install the required packages. Python 3.7 or 3.8 is recommended.
57 | To install them execute
58 |
59 | ```shell
60 | pip install -r requirements.txt
61 | ```
62 |
63 | or you can install them manually with the following command:
64 |
65 | ```shell
66 | pip install mxnet-cuXYZ scikit-image scikit-learn opencv-python
67 | ```
68 |
69 | Please replace mxnet-cuXYZ with your CUDA version.
70 | After the required packages have been installed, [download the model files](https://drive.google.com/file/d/17fEWczMzTUDzRTv9qN3hFwVbkqRD7HE7/view?usp=sharing) and place them in the
71 |
72 | ```
73 | insightface/model
74 | ```
75 |
76 | folder.
77 |
78 | After extracting the model files verify that your installation is working by executing **serfiq_example.py**. The score of both images should be printed.
79 |
80 |
81 | The implementation for SER-FIQ based on ArcFace can be found here: [Implementation](face_image_quality.py).
82 | In the [Paper](https://arxiv.org/abs/2003.09373), this is refered to _SER-FIQ (same model) based on ArcFace_.
83 |
84 |
85 |
86 | ## Bias in Face Quality Assessment
87 |
88 | The best face quality assessment performance is achieved when the quality assessment solutions build on the templates of the deployed face recognition system.
89 | In our work on ([Face Quality Estimation and Its Correlation to Demographic and Non-Demographic Bias in Face Recognition](https://arxiv.org/abs/2004.01019)), we showed that this lead to a bias transfer from the face recognition system to the quality assessment solution.
90 | On all investigated quality assessment approaches, we observed performance differences based on on demographics and non-demographics of the face images.
91 |
92 |
93 |
94 |
95 |
96 |
97 | ## Related Works
98 |
99 | You might be also interested in some of our follow-up works:
100 |
101 | - [Pixel-Level Face Image Quality Assessment for Explainable Face Recognition](https://github.com/pterhoer/ExplainableFaceImageQuality) - The concept of face image quality assessment is transferred to the level of single pixels with the goal to make the face recognition process understable for humans.
102 | - [QMagFace: Simple and Accurate Quality-Aware Face Recognition](https://github.com/pterhoer/QMagFace) - Face image quality information is included in the recognition process of a face recognition model trained with a magnitude-aware angular margin with the result of reaching SOTA performance on several unconstrained face recognition benchmarks.
103 |
104 | ## Citing
105 |
106 | If you use this code, please cite the following papers.
107 |
108 |
109 | ```
110 | @inproceedings{DBLP:conf/cvpr/TerhorstKDKK20,
111 | author = {Philipp Terh{\"{o}}rst and
112 | Jan Niklas Kolf and
113 | Naser Damer and
114 | Florian Kirchbuchner and
115 | Arjan Kuijper},
116 | title = {{SER-FIQ:} Unsupervised Estimation of Face Image Quality Based on
117 | Stochastic Embedding Robustness},
118 | booktitle = {2020 {IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
119 | {CVPR} 2020, Seattle, WA, USA, June 13-19, 2020},
120 | pages = {5650--5659},
121 | publisher = {{IEEE}},
122 | year = {2020},
123 | url = {https://doi.org/10.1109/CVPR42600.2020.00569},
124 | doi = {10.1109/CVPR42600.2020.00569},
125 | timestamp = {Tue, 11 Aug 2020 16:59:49 +0200},
126 | biburl = {https://dblp.org/rec/conf/cvpr/TerhorstKDKK20.bib},
127 | bibsource = {dblp computer science bibliography, https://dblp.org}
128 | }
129 | ```
130 |
131 | ```
132 | @inproceedings{DBLP:conf/icb/TerhorstKDKK20,
133 | author = {Philipp Terh{\"{o}}rst and
134 | Jan Niklas Kolf and
135 | Naser Damer and
136 | Florian Kirchbuchner and
137 | Arjan Kuijper},
138 | title = {Face Quality Estimation and Its Correlation to Demographic and Non-Demographic
139 | Bias in Face Recognition},
140 | booktitle = {2020 {IEEE} International Joint Conference on Biometrics, {IJCB} 2020,
141 | Houston, TX, USA, September 28 - October 1, 2020},
142 | pages = {1--11},
143 | publisher = {{IEEE}},
144 | year = {2020},
145 | url = {https://doi.org/10.1109/IJCB48548.2020.9304865},
146 | doi = {10.1109/IJCB48548.2020.9304865},
147 | timestamp = {Thu, 14 Jan 2021 15:14:18 +0100},
148 | biburl = {https://dblp.org/rec/conf/icb/TerhorstKDKK20.bib},
149 | bibsource = {dblp computer science bibliography, https://dblp.org}
150 | }
151 |
152 |
153 | ```
154 |
155 | If you make use of our SER-FIQ implementation based on ArcFace, please additionally cite the original .
156 |
157 | ## Acknowledgement
158 |
159 | This research work has been funded by the German Federal Ministry of Education and Research and the Hessen State Ministry for Higher Education, Research and the Arts within their joint support of the National Research Center for Applied Cybersecurity ATHENE.
160 |
161 | ## License
162 |
163 | This project is licensed under the terms of the Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) license.
164 | Copyright (c) 2020 Fraunhofer Institute for Computer Graphics Research IGD Darmstadt
165 |
--------------------------------------------------------------------------------
/Supplementary/info:
--------------------------------------------------------------------------------
1 | Files for illustrating the methodology
2 |
--------------------------------------------------------------------------------
/data/img_src.txt:
--------------------------------------------------------------------------------
1 | test_img.jpeg:
2 | Image source:
3 | https://www.pexels.com/photo/adult-beard-boy-casual-220453/
4 | Linked license:
5 | https://www.pexels.com/creative-commons-images/
6 |
7 | test_img2.jpeg:
8 | Image Source:
9 | https://www.pexels.com/photo/man-in-zip-up-hoodie-wearing-blue-mask-3959642/
10 | Linked license:
11 | https://www.pexels.com/photo-license/
--------------------------------------------------------------------------------
/data/test_img.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/data/test_img.jpeg
--------------------------------------------------------------------------------
/data/test_img2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/data/test_img2.jpeg
--------------------------------------------------------------------------------
/face_image_quality.py:
--------------------------------------------------------------------------------
1 | """
2 | Authors: Jan Niklas Kolf, Philipp Terhörst
3 |
4 | This code is licensed under the terms of the
5 | Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) license.
6 | https://creativecommons.org/licenses/by-nc-sa/4.0/
7 |
8 |
9 | Copyright (c) 2020 Fraunhofer Institute for Computer Graphics Research IGD Darmstadt
10 |
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17 | SOFTWARE.
18 |
19 | """
20 | # Installed modules imports
21 | import numpy as np
22 | import mxnet as mx
23 | from mxnet import gluon
24 | import cv2
25 |
26 | from sklearn.preprocessing import normalize
27 | from sklearn.metrics.pairwise import euclidean_distances
28 |
29 | # Insightface imports
30 | from insightface.src import mtcnn_detector
31 | from insightface.src import face_preprocess
32 |
33 |
34 | class SER_FIQ:
35 |
36 | def __init__(self,
37 | gpu:int=0, # Which gpu should be used -> gpu id
38 | det:int=0, # Mtcnn option, 1= Use R+O, 0=Detect from beginning
39 | ):
40 | """
41 | Reimplementing Insightface's FaceModel class.
42 | Now the dropout output and the network output are returned after a forward pass.
43 |
44 | Parameters
45 | ----------
46 | gpu : int, optional
47 | The GPU to be used by Mxnet. The default is 0.
48 | If set to None, CPU is used instead.
49 | det : int, optional
50 | Mtcnn option, 1= Use R+0, 0= Detect from beginning. The default is 0.
51 |
52 | Returns
53 | -------
54 | None.
55 |
56 | """
57 |
58 | if gpu is None:
59 | self.device = mx.cpu()
60 | else:
61 | self.device = mx.gpu(gpu)
62 |
63 | self.insightface = gluon.nn.SymbolBlock.imports(
64 | "./insightface/model/insightface-symbol.json",
65 | ['data'],
66 | "./insightface/model/insightface-0000.params",
67 | ctx=self.device
68 | )
69 |
70 |
71 | self.det_minsize = 50
72 | self.det_threshold = [0.6,0.7,0.8]
73 | self.det = det
74 |
75 | self.preprocess = face_preprocess.preprocess
76 |
77 | thrs = self.det_threshold if det==0 else [0.0,0.0,0.2]
78 |
79 | self.detector = mtcnn_detector.MtcnnDetector(model_folder="./insightface/mtcnn-model/",
80 | ctx=self.device,
81 | num_worker=1,
82 | accurate_landmark = True,
83 | threshold=thrs
84 | )
85 |
86 | def apply_mtcnn(self, face_image : np.ndarray):
87 | """
88 | Applies MTCNN Detector on the given face image and returns
89 | the cropped image.
90 |
91 | If no face could be detected None is returned.
92 |
93 | Parameters
94 | ----------
95 | face_image : np.ndarray
96 | Face imaged loaded via OpenCV.
97 |
98 | Returns
99 | -------
100 | Face Image : np.ndarray, shape (3,112,112).
101 | None, if no face could be detected
102 |
103 | """
104 | detected = self.detector.detect_face(face_image, det_type=self.det)
105 |
106 | if detected is None:
107 | return None
108 |
109 | bbox, points = detected
110 |
111 | if bbox.shape[0] == 0:
112 | return None
113 |
114 | points = points[0, :].reshape((2,5)).T
115 |
116 | image = self.preprocess(face_image, bbox, points, image_size="112,112")
117 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
118 |
119 | return np.transpose(image, (2,0,1))
120 |
121 |
122 |
123 | def get_score(self, aligned_img : np.ndarray,
124 | T : int = 100,
125 | alpha : float = 130.0,
126 | r : float = 0.88):
127 | """
128 | Calculates the SER-FIQ score for a given aligned image using T passes.
129 |
130 |
131 | Parameters
132 | ----------
133 | aligned_img : np.ndarray, shape (3, h, w)
134 | Aligned face image, in RGB format.
135 | T : int, optional
136 | Amount of forward passes to use. The default is 100.
137 | alpha : float, optional
138 | Stretching factor, can be choosen to scale the score values
139 | r : float, optional
140 | Score displacement
141 |
142 | Returns
143 | -------
144 | SER-FIQ score : float.
145 |
146 | """
147 | # Color Channel is not the first dimension, swap dims.
148 | if aligned_img.shape[0] != 3:
149 | aligned_img = np.transpose(aligned_img, (2,0,1))
150 |
151 | input_blob = np.expand_dims(aligned_img, axis=0)
152 | repeated = np.repeat(input_blob, T, axis=0)
153 | gpu_repeated = mx.nd.array(repeated, ctx=self.device)
154 |
155 | X = self.insightface(gpu_repeated).asnumpy()
156 |
157 | norm = normalize(X, axis=1)
158 |
159 | # Only get the upper triangle of the distance matrix
160 | eucl_dist = euclidean_distances(norm, norm)[np.triu_indices(T, k=1)]
161 |
162 | # Calculate score as given in the paper
163 | score = 2*(1/(1+np.exp(np.mean(eucl_dist))))
164 | # Normalize value based on alpha and r
165 | return 1 / (1+np.exp(-(alpha * (score - r))))
166 |
--------------------------------------------------------------------------------
/insightface/license.txt:
--------------------------------------------------------------------------------
1 | The source code and files contained in this folder are from deepinsight's
2 | insightface repository [https://github.com/deepinsight/insightface],
3 | developed by Jiankang Deng and Jia Guo.
4 |
5 | The files in this directory and it's subdirectories are licensed under the MIT license.
6 | The MIT license applies exclusively to these files.
7 | The following license must be included with these files:
8 |
9 | MIT License
10 |
11 | Copyright (c) 2018 Jiankang Deng and Jia Guo
12 |
13 | Permission is hereby granted, free of charge, to any person obtaining a copy
14 | of this software and associated documentation files (the "Software"), to deal
15 | in the Software without restriction, including without limitation the rights
16 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 | copies of the Software, and to permit persons to whom the Software is
18 | furnished to do so, subject to the following conditions:
19 |
20 | The above copyright notice and this permission notice shall be included in all
21 | copies or substantial portions of the Software.
22 |
23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 | SOFTWARE.
--------------------------------------------------------------------------------
/insightface/model/download_link_license.txt:
--------------------------------------------------------------------------------
1 | The model files are taken from the insightface repository [https://github.com/deepinsight/insightface],
2 | developed by Jiankang Deng and Jia Guo.
3 |
4 | The model files are licensed under the MIT License.
5 |
6 | You can download the Arcface model files with Dropout enabled here:
7 | https://drive.google.com/file/d/17fEWczMzTUDzRTv9qN3hFwVbkqRD7HE7/view?usp=sharing
8 |
9 | MIT License
10 |
11 | Copyright (c) 2018 Jiankang Deng and Jia Guo
12 |
13 | Permission is hereby granted, free of charge, to any person obtaining a copy
14 | of this software and associated documentation files (the "Software"), to deal
15 | in the Software without restriction, including without limitation the rights
16 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 | copies of the Software, and to permit persons to whom the Software is
18 | furnished to do so, subject to the following conditions:
19 |
20 | The above copyright notice and this permission notice shall be included in all
21 | copies or substantial portions of the Software.
22 |
23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 | SOFTWARE.
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det1-0001.params:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/insightface/mtcnn-model/det1-0001.params
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det1-symbol.json:
--------------------------------------------------------------------------------
1 | {
2 | "nodes": [
3 | {
4 | "op": "null",
5 | "param": {},
6 | "name": "data",
7 | "inputs": [],
8 | "backward_source_id": -1
9 | },
10 | {
11 | "op": "null",
12 | "param": {},
13 | "name": "conv1_weight",
14 | "inputs": [],
15 | "backward_source_id": -1
16 | },
17 | {
18 | "op": "null",
19 | "param": {},
20 | "name": "conv1_bias",
21 | "inputs": [],
22 | "backward_source_id": -1
23 | },
24 | {
25 | "op": "Convolution",
26 | "param": {
27 | "cudnn_off": "False",
28 | "cudnn_tune": "off",
29 | "dilate": "(1,1)",
30 | "kernel": "(3,3)",
31 | "no_bias": "False",
32 | "num_filter": "10",
33 | "num_group": "1",
34 | "pad": "(0,0)",
35 | "stride": "(1,1)",
36 | "workspace": "1024"
37 | },
38 | "name": "conv1",
39 | "inputs": [[0, 0], [1, 0], [2, 0]],
40 | "backward_source_id": -1
41 | },
42 | {
43 | "op": "null",
44 | "param": {},
45 | "name": "prelu1_gamma",
46 | "inputs": [],
47 | "backward_source_id": -1
48 | },
49 | {
50 | "op": "LeakyReLU",
51 | "param": {
52 | "act_type": "prelu",
53 | "lower_bound": "0.125",
54 | "slope": "0.25",
55 | "upper_bound": "0.334"
56 | },
57 | "name": "prelu1",
58 | "inputs": [[3, 0], [4, 0]],
59 | "backward_source_id": -1
60 | },
61 | {
62 | "op": "Pooling",
63 | "param": {
64 | "global_pool": "False",
65 | "kernel": "(2,2)",
66 | "pad": "(0,0)",
67 | "pool_type": "max",
68 | "pooling_convention": "full",
69 | "stride": "(2,2)"
70 | },
71 | "name": "pool1",
72 | "inputs": [[5, 0]],
73 | "backward_source_id": -1
74 | },
75 | {
76 | "op": "null",
77 | "param": {},
78 | "name": "conv2_weight",
79 | "inputs": [],
80 | "backward_source_id": -1
81 | },
82 | {
83 | "op": "null",
84 | "param": {},
85 | "name": "conv2_bias",
86 | "inputs": [],
87 | "backward_source_id": -1
88 | },
89 | {
90 | "op": "Convolution",
91 | "param": {
92 | "cudnn_off": "False",
93 | "cudnn_tune": "off",
94 | "dilate": "(1,1)",
95 | "kernel": "(3,3)",
96 | "no_bias": "False",
97 | "num_filter": "16",
98 | "num_group": "1",
99 | "pad": "(0,0)",
100 | "stride": "(1,1)",
101 | "workspace": "1024"
102 | },
103 | "name": "conv2",
104 | "inputs": [[6, 0], [7, 0], [8, 0]],
105 | "backward_source_id": -1
106 | },
107 | {
108 | "op": "null",
109 | "param": {},
110 | "name": "prelu2_gamma",
111 | "inputs": [],
112 | "backward_source_id": -1
113 | },
114 | {
115 | "op": "LeakyReLU",
116 | "param": {
117 | "act_type": "prelu",
118 | "lower_bound": "0.125",
119 | "slope": "0.25",
120 | "upper_bound": "0.334"
121 | },
122 | "name": "prelu2",
123 | "inputs": [[9, 0], [10, 0]],
124 | "backward_source_id": -1
125 | },
126 | {
127 | "op": "null",
128 | "param": {},
129 | "name": "conv3_weight",
130 | "inputs": [],
131 | "backward_source_id": -1
132 | },
133 | {
134 | "op": "null",
135 | "param": {},
136 | "name": "conv3_bias",
137 | "inputs": [],
138 | "backward_source_id": -1
139 | },
140 | {
141 | "op": "Convolution",
142 | "param": {
143 | "cudnn_off": "False",
144 | "cudnn_tune": "off",
145 | "dilate": "(1,1)",
146 | "kernel": "(3,3)",
147 | "no_bias": "False",
148 | "num_filter": "32",
149 | "num_group": "1",
150 | "pad": "(0,0)",
151 | "stride": "(1,1)",
152 | "workspace": "1024"
153 | },
154 | "name": "conv3",
155 | "inputs": [[11, 0], [12, 0], [13, 0]],
156 | "backward_source_id": -1
157 | },
158 | {
159 | "op": "null",
160 | "param": {},
161 | "name": "prelu3_gamma",
162 | "inputs": [],
163 | "backward_source_id": -1
164 | },
165 | {
166 | "op": "LeakyReLU",
167 | "param": {
168 | "act_type": "prelu",
169 | "lower_bound": "0.125",
170 | "slope": "0.25",
171 | "upper_bound": "0.334"
172 | },
173 | "name": "prelu3",
174 | "inputs": [[14, 0], [15, 0]],
175 | "backward_source_id": -1
176 | },
177 | {
178 | "op": "null",
179 | "param": {},
180 | "name": "conv4_2_weight",
181 | "inputs": [],
182 | "backward_source_id": -1
183 | },
184 | {
185 | "op": "null",
186 | "param": {},
187 | "name": "conv4_2_bias",
188 | "inputs": [],
189 | "backward_source_id": -1
190 | },
191 | {
192 | "op": "Convolution",
193 | "param": {
194 | "cudnn_off": "False",
195 | "cudnn_tune": "off",
196 | "dilate": "(1,1)",
197 | "kernel": "(1,1)",
198 | "no_bias": "False",
199 | "num_filter": "4",
200 | "num_group": "1",
201 | "pad": "(0,0)",
202 | "stride": "(1,1)",
203 | "workspace": "1024"
204 | },
205 | "name": "conv4_2",
206 | "inputs": [[16, 0], [17, 0], [18, 0]],
207 | "backward_source_id": -1
208 | },
209 | {
210 | "op": "null",
211 | "param": {},
212 | "name": "conv4_1_weight",
213 | "inputs": [],
214 | "backward_source_id": -1
215 | },
216 | {
217 | "op": "null",
218 | "param": {},
219 | "name": "conv4_1_bias",
220 | "inputs": [],
221 | "backward_source_id": -1
222 | },
223 | {
224 | "op": "Convolution",
225 | "param": {
226 | "cudnn_off": "False",
227 | "cudnn_tune": "off",
228 | "dilate": "(1,1)",
229 | "kernel": "(1,1)",
230 | "no_bias": "False",
231 | "num_filter": "2",
232 | "num_group": "1",
233 | "pad": "(0,0)",
234 | "stride": "(1,1)",
235 | "workspace": "1024"
236 | },
237 | "name": "conv4_1",
238 | "inputs": [[16, 0], [20, 0], [21, 0]],
239 | "backward_source_id": -1
240 | },
241 | {
242 | "op": "SoftmaxActivation",
243 | "param": {"mode": "channel"},
244 | "name": "prob1",
245 | "inputs": [[22, 0]],
246 | "backward_source_id": -1
247 | }
248 | ],
249 | "arg_nodes": [
250 | 0,
251 | 1,
252 | 2,
253 | 4,
254 | 7,
255 | 8,
256 | 10,
257 | 12,
258 | 13,
259 | 15,
260 | 17,
261 | 18,
262 | 20,
263 | 21
264 | ],
265 | "heads": [[19, 0], [23, 0]]
266 | }
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det1.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/insightface/mtcnn-model/det1.caffemodel
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det1.prototxt:
--------------------------------------------------------------------------------
1 | name: "PNet"
2 | input: "data"
3 | input_dim: 1
4 | input_dim: 3
5 | input_dim: 12
6 | input_dim: 12
7 |
8 | layer {
9 | name: "conv1"
10 | type: "Convolution"
11 | bottom: "data"
12 | top: "conv1"
13 | param {
14 | lr_mult: 1
15 | decay_mult: 1
16 | }
17 | param {
18 | lr_mult: 2
19 | decay_mult: 0
20 | }
21 | convolution_param {
22 | num_output: 10
23 | kernel_size: 3
24 | stride: 1
25 | weight_filler {
26 | type: "xavier"
27 | }
28 | bias_filler {
29 | type: "constant"
30 | value: 0
31 | }
32 | }
33 | }
34 | layer {
35 | name: "PReLU1"
36 | type: "PReLU"
37 | bottom: "conv1"
38 | top: "conv1"
39 | }
40 | layer {
41 | name: "pool1"
42 | type: "Pooling"
43 | bottom: "conv1"
44 | top: "pool1"
45 | pooling_param {
46 | pool: MAX
47 | kernel_size: 2
48 | stride: 2
49 | }
50 | }
51 |
52 | layer {
53 | name: "conv2"
54 | type: "Convolution"
55 | bottom: "pool1"
56 | top: "conv2"
57 | param {
58 | lr_mult: 1
59 | decay_mult: 1
60 | }
61 | param {
62 | lr_mult: 2
63 | decay_mult: 0
64 | }
65 | convolution_param {
66 | num_output: 16
67 | kernel_size: 3
68 | stride: 1
69 | weight_filler {
70 | type: "xavier"
71 | }
72 | bias_filler {
73 | type: "constant"
74 | value: 0
75 | }
76 | }
77 | }
78 | layer {
79 | name: "PReLU2"
80 | type: "PReLU"
81 | bottom: "conv2"
82 | top: "conv2"
83 | }
84 |
85 | layer {
86 | name: "conv3"
87 | type: "Convolution"
88 | bottom: "conv2"
89 | top: "conv3"
90 | param {
91 | lr_mult: 1
92 | decay_mult: 1
93 | }
94 | param {
95 | lr_mult: 2
96 | decay_mult: 0
97 | }
98 | convolution_param {
99 | num_output: 32
100 | kernel_size: 3
101 | stride: 1
102 | weight_filler {
103 | type: "xavier"
104 | }
105 | bias_filler {
106 | type: "constant"
107 | value: 0
108 | }
109 | }
110 | }
111 | layer {
112 | name: "PReLU3"
113 | type: "PReLU"
114 | bottom: "conv3"
115 | top: "conv3"
116 | }
117 |
118 |
119 | layer {
120 | name: "conv4-1"
121 | type: "Convolution"
122 | bottom: "conv3"
123 | top: "conv4-1"
124 | param {
125 | lr_mult: 1
126 | decay_mult: 1
127 | }
128 | param {
129 | lr_mult: 2
130 | decay_mult: 0
131 | }
132 | convolution_param {
133 | num_output: 2
134 | kernel_size: 1
135 | stride: 1
136 | weight_filler {
137 | type: "xavier"
138 | }
139 | bias_filler {
140 | type: "constant"
141 | value: 0
142 | }
143 | }
144 | }
145 |
146 | layer {
147 | name: "conv4-2"
148 | type: "Convolution"
149 | bottom: "conv3"
150 | top: "conv4-2"
151 | param {
152 | lr_mult: 1
153 | decay_mult: 1
154 | }
155 | param {
156 | lr_mult: 2
157 | decay_mult: 0
158 | }
159 | convolution_param {
160 | num_output: 4
161 | kernel_size: 1
162 | stride: 1
163 | weight_filler {
164 | type: "xavier"
165 | }
166 | bias_filler {
167 | type: "constant"
168 | value: 0
169 | }
170 | }
171 | }
172 | layer {
173 | name: "prob1"
174 | type: "Softmax"
175 | bottom: "conv4-1"
176 | top: "prob1"
177 | }
178 |
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det2-0001.params:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/insightface/mtcnn-model/det2-0001.params
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det2-symbol.json:
--------------------------------------------------------------------------------
1 | {
2 | "nodes": [
3 | {
4 | "op": "null",
5 | "param": {},
6 | "name": "data",
7 | "inputs": [],
8 | "backward_source_id": -1
9 | },
10 | {
11 | "op": "null",
12 | "param": {},
13 | "name": "conv1_weight",
14 | "inputs": [],
15 | "backward_source_id": -1
16 | },
17 | {
18 | "op": "null",
19 | "param": {},
20 | "name": "conv1_bias",
21 | "inputs": [],
22 | "backward_source_id": -1
23 | },
24 | {
25 | "op": "Convolution",
26 | "param": {
27 | "cudnn_off": "False",
28 | "cudnn_tune": "off",
29 | "dilate": "(1,1)",
30 | "kernel": "(3,3)",
31 | "no_bias": "False",
32 | "num_filter": "28",
33 | "num_group": "1",
34 | "pad": "(0,0)",
35 | "stride": "(1,1)",
36 | "workspace": "1024"
37 | },
38 | "name": "conv1",
39 | "inputs": [[0, 0], [1, 0], [2, 0]],
40 | "backward_source_id": -1
41 | },
42 | {
43 | "op": "null",
44 | "param": {},
45 | "name": "prelu1_gamma",
46 | "inputs": [],
47 | "backward_source_id": -1
48 | },
49 | {
50 | "op": "LeakyReLU",
51 | "param": {
52 | "act_type": "prelu",
53 | "lower_bound": "0.125",
54 | "slope": "0.25",
55 | "upper_bound": "0.334"
56 | },
57 | "name": "prelu1",
58 | "inputs": [[3, 0], [4, 0]],
59 | "backward_source_id": -1
60 | },
61 | {
62 | "op": "Pooling",
63 | "param": {
64 | "global_pool": "False",
65 | "kernel": "(3,3)",
66 | "pad": "(0,0)",
67 | "pool_type": "max",
68 | "pooling_convention": "full",
69 | "stride": "(2,2)"
70 | },
71 | "name": "pool1",
72 | "inputs": [[5, 0]],
73 | "backward_source_id": -1
74 | },
75 | {
76 | "op": "null",
77 | "param": {},
78 | "name": "conv2_weight",
79 | "inputs": [],
80 | "backward_source_id": -1
81 | },
82 | {
83 | "op": "null",
84 | "param": {},
85 | "name": "conv2_bias",
86 | "inputs": [],
87 | "backward_source_id": -1
88 | },
89 | {
90 | "op": "Convolution",
91 | "param": {
92 | "cudnn_off": "False",
93 | "cudnn_tune": "off",
94 | "dilate": "(1,1)",
95 | "kernel": "(3,3)",
96 | "no_bias": "False",
97 | "num_filter": "48",
98 | "num_group": "1",
99 | "pad": "(0,0)",
100 | "stride": "(1,1)",
101 | "workspace": "1024"
102 | },
103 | "name": "conv2",
104 | "inputs": [[6, 0], [7, 0], [8, 0]],
105 | "backward_source_id": -1
106 | },
107 | {
108 | "op": "null",
109 | "param": {},
110 | "name": "prelu2_gamma",
111 | "inputs": [],
112 | "backward_source_id": -1
113 | },
114 | {
115 | "op": "LeakyReLU",
116 | "param": {
117 | "act_type": "prelu",
118 | "lower_bound": "0.125",
119 | "slope": "0.25",
120 | "upper_bound": "0.334"
121 | },
122 | "name": "prelu2",
123 | "inputs": [[9, 0], [10, 0]],
124 | "backward_source_id": -1
125 | },
126 | {
127 | "op": "Pooling",
128 | "param": {
129 | "global_pool": "False",
130 | "kernel": "(3,3)",
131 | "pad": "(0,0)",
132 | "pool_type": "max",
133 | "pooling_convention": "full",
134 | "stride": "(2,2)"
135 | },
136 | "name": "pool2",
137 | "inputs": [[11, 0]],
138 | "backward_source_id": -1
139 | },
140 | {
141 | "op": "null",
142 | "param": {},
143 | "name": "conv3_weight",
144 | "inputs": [],
145 | "backward_source_id": -1
146 | },
147 | {
148 | "op": "null",
149 | "param": {},
150 | "name": "conv3_bias",
151 | "inputs": [],
152 | "backward_source_id": -1
153 | },
154 | {
155 | "op": "Convolution",
156 | "param": {
157 | "cudnn_off": "False",
158 | "cudnn_tune": "off",
159 | "dilate": "(1,1)",
160 | "kernel": "(2,2)",
161 | "no_bias": "False",
162 | "num_filter": "64",
163 | "num_group": "1",
164 | "pad": "(0,0)",
165 | "stride": "(1,1)",
166 | "workspace": "1024"
167 | },
168 | "name": "conv3",
169 | "inputs": [[12, 0], [13, 0], [14, 0]],
170 | "backward_source_id": -1
171 | },
172 | {
173 | "op": "null",
174 | "param": {},
175 | "name": "prelu3_gamma",
176 | "inputs": [],
177 | "backward_source_id": -1
178 | },
179 | {
180 | "op": "LeakyReLU",
181 | "param": {
182 | "act_type": "prelu",
183 | "lower_bound": "0.125",
184 | "slope": "0.25",
185 | "upper_bound": "0.334"
186 | },
187 | "name": "prelu3",
188 | "inputs": [[15, 0], [16, 0]],
189 | "backward_source_id": -1
190 | },
191 | {
192 | "op": "null",
193 | "param": {},
194 | "name": "conv4_weight",
195 | "inputs": [],
196 | "backward_source_id": -1
197 | },
198 | {
199 | "op": "null",
200 | "param": {},
201 | "name": "conv4_bias",
202 | "inputs": [],
203 | "backward_source_id": -1
204 | },
205 | {
206 | "op": "FullyConnected",
207 | "param": {
208 | "no_bias": "False",
209 | "num_hidden": "128"
210 | },
211 | "name": "conv4",
212 | "inputs": [[17, 0], [18, 0], [19, 0]],
213 | "backward_source_id": -1
214 | },
215 | {
216 | "op": "null",
217 | "param": {},
218 | "name": "prelu4_gamma",
219 | "inputs": [],
220 | "backward_source_id": -1
221 | },
222 | {
223 | "op": "LeakyReLU",
224 | "param": {
225 | "act_type": "prelu",
226 | "lower_bound": "0.125",
227 | "slope": "0.25",
228 | "upper_bound": "0.334"
229 | },
230 | "name": "prelu4",
231 | "inputs": [[20, 0], [21, 0]],
232 | "backward_source_id": -1
233 | },
234 | {
235 | "op": "null",
236 | "param": {},
237 | "name": "conv5_2_weight",
238 | "inputs": [],
239 | "backward_source_id": -1
240 | },
241 | {
242 | "op": "null",
243 | "param": {},
244 | "name": "conv5_2_bias",
245 | "inputs": [],
246 | "backward_source_id": -1
247 | },
248 | {
249 | "op": "FullyConnected",
250 | "param": {
251 | "no_bias": "False",
252 | "num_hidden": "4"
253 | },
254 | "name": "conv5_2",
255 | "inputs": [[22, 0], [23, 0], [24, 0]],
256 | "backward_source_id": -1
257 | },
258 | {
259 | "op": "null",
260 | "param": {},
261 | "name": "conv5_1_weight",
262 | "inputs": [],
263 | "backward_source_id": -1
264 | },
265 | {
266 | "op": "null",
267 | "param": {},
268 | "name": "conv5_1_bias",
269 | "inputs": [],
270 | "backward_source_id": -1
271 | },
272 | {
273 | "op": "FullyConnected",
274 | "param": {
275 | "no_bias": "False",
276 | "num_hidden": "2"
277 | },
278 | "name": "conv5_1",
279 | "inputs": [[22, 0], [26, 0], [27, 0]],
280 | "backward_source_id": -1
281 | },
282 | {
283 | "op": "null",
284 | "param": {},
285 | "name": "prob1_label",
286 | "inputs": [],
287 | "backward_source_id": -1
288 | },
289 | {
290 | "op": "SoftmaxOutput",
291 | "param": {
292 | "grad_scale": "1",
293 | "ignore_label": "-1",
294 | "multi_output": "False",
295 | "normalization": "null",
296 | "use_ignore": "False"
297 | },
298 | "name": "prob1",
299 | "inputs": [[28, 0], [29, 0]],
300 | "backward_source_id": -1
301 | }
302 | ],
303 | "arg_nodes": [
304 | 0,
305 | 1,
306 | 2,
307 | 4,
308 | 7,
309 | 8,
310 | 10,
311 | 13,
312 | 14,
313 | 16,
314 | 18,
315 | 19,
316 | 21,
317 | 23,
318 | 24,
319 | 26,
320 | 27,
321 | 29
322 | ],
323 | "heads": [[25, 0], [30, 0]]
324 | }
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det2.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/insightface/mtcnn-model/det2.caffemodel
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det2.prototxt:
--------------------------------------------------------------------------------
1 | name: "RNet"
2 | input: "data"
3 | input_dim: 1
4 | input_dim: 3
5 | input_dim: 24
6 | input_dim: 24
7 |
8 |
9 | ##########################
10 | ######################
11 | layer {
12 | name: "conv1"
13 | type: "Convolution"
14 | bottom: "data"
15 | top: "conv1"
16 | param {
17 | lr_mult: 0
18 | decay_mult: 0
19 | }
20 | param {
21 | lr_mult: 0
22 | decay_mult: 0
23 | }
24 | convolution_param {
25 | num_output: 28
26 | kernel_size: 3
27 | stride: 1
28 | weight_filler {
29 | type: "xavier"
30 | }
31 | bias_filler {
32 | type: "constant"
33 | value: 0
34 | }
35 | }
36 | }
37 | layer {
38 | name: "prelu1"
39 | type: "PReLU"
40 | bottom: "conv1"
41 | top: "conv1"
42 | propagate_down: true
43 | }
44 | layer {
45 | name: "pool1"
46 | type: "Pooling"
47 | bottom: "conv1"
48 | top: "pool1"
49 | pooling_param {
50 | pool: MAX
51 | kernel_size: 3
52 | stride: 2
53 | }
54 | }
55 |
56 | layer {
57 | name: "conv2"
58 | type: "Convolution"
59 | bottom: "pool1"
60 | top: "conv2"
61 | param {
62 | lr_mult: 0
63 | decay_mult: 0
64 | }
65 | param {
66 | lr_mult: 0
67 | decay_mult: 0
68 | }
69 | convolution_param {
70 | num_output: 48
71 | kernel_size: 3
72 | stride: 1
73 | weight_filler {
74 | type: "xavier"
75 | }
76 | bias_filler {
77 | type: "constant"
78 | value: 0
79 | }
80 | }
81 | }
82 | layer {
83 | name: "prelu2"
84 | type: "PReLU"
85 | bottom: "conv2"
86 | top: "conv2"
87 | propagate_down: true
88 | }
89 | layer {
90 | name: "pool2"
91 | type: "Pooling"
92 | bottom: "conv2"
93 | top: "pool2"
94 | pooling_param {
95 | pool: MAX
96 | kernel_size: 3
97 | stride: 2
98 | }
99 | }
100 | ####################################
101 |
102 | ##################################
103 | layer {
104 | name: "conv3"
105 | type: "Convolution"
106 | bottom: "pool2"
107 | top: "conv3"
108 | param {
109 | lr_mult: 0
110 | decay_mult: 0
111 | }
112 | param {
113 | lr_mult: 0
114 | decay_mult: 0
115 | }
116 | convolution_param {
117 | num_output: 64
118 | kernel_size: 2
119 | stride: 1
120 | weight_filler {
121 | type: "xavier"
122 | }
123 | bias_filler {
124 | type: "constant"
125 | value: 0
126 | }
127 | }
128 | }
129 | layer {
130 | name: "prelu3"
131 | type: "PReLU"
132 | bottom: "conv3"
133 | top: "conv3"
134 | propagate_down: true
135 | }
136 | ###############################
137 |
138 | ###############################
139 |
140 | layer {
141 | name: "conv4"
142 | type: "InnerProduct"
143 | bottom: "conv3"
144 | top: "conv4"
145 | param {
146 | lr_mult: 0
147 | decay_mult: 0
148 | }
149 | param {
150 | lr_mult: 0
151 | decay_mult: 0
152 | }
153 | inner_product_param {
154 | num_output: 128
155 | weight_filler {
156 | type: "xavier"
157 | }
158 | bias_filler {
159 | type: "constant"
160 | value: 0
161 | }
162 | }
163 | }
164 | layer {
165 | name: "prelu4"
166 | type: "PReLU"
167 | bottom: "conv4"
168 | top: "conv4"
169 | }
170 |
171 | layer {
172 | name: "conv5-1"
173 | type: "InnerProduct"
174 | bottom: "conv4"
175 | top: "conv5-1"
176 | param {
177 | lr_mult: 0
178 | decay_mult: 0
179 | }
180 | param {
181 | lr_mult: 0
182 | decay_mult: 0
183 | }
184 | inner_product_param {
185 | num_output: 2
186 | #kernel_size: 1
187 | #stride: 1
188 | weight_filler {
189 | type: "xavier"
190 | }
191 | bias_filler {
192 | type: "constant"
193 | value: 0
194 | }
195 | }
196 | }
197 | layer {
198 | name: "conv5-2"
199 | type: "InnerProduct"
200 | bottom: "conv4"
201 | top: "conv5-2"
202 | param {
203 | lr_mult: 1
204 | decay_mult: 1
205 | }
206 | param {
207 | lr_mult: 2
208 | decay_mult: 1
209 | }
210 | inner_product_param {
211 | num_output: 4
212 | #kernel_size: 1
213 | #stride: 1
214 | weight_filler {
215 | type: "xavier"
216 | }
217 | bias_filler {
218 | type: "constant"
219 | value: 0
220 | }
221 | }
222 | }
223 | layer {
224 | name: "prob1"
225 | type: "Softmax"
226 | bottom: "conv5-1"
227 | top: "prob1"
228 | }
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det3-0001.params:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/insightface/mtcnn-model/det3-0001.params
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det3-symbol.json:
--------------------------------------------------------------------------------
1 | {
2 | "nodes": [
3 | {
4 | "op": "null",
5 | "param": {},
6 | "name": "data",
7 | "inputs": [],
8 | "backward_source_id": -1
9 | },
10 | {
11 | "op": "null",
12 | "param": {},
13 | "name": "conv1_weight",
14 | "inputs": [],
15 | "backward_source_id": -1
16 | },
17 | {
18 | "op": "null",
19 | "param": {},
20 | "name": "conv1_bias",
21 | "inputs": [],
22 | "backward_source_id": -1
23 | },
24 | {
25 | "op": "Convolution",
26 | "param": {
27 | "cudnn_off": "False",
28 | "cudnn_tune": "off",
29 | "dilate": "(1,1)",
30 | "kernel": "(3,3)",
31 | "no_bias": "False",
32 | "num_filter": "32",
33 | "num_group": "1",
34 | "pad": "(0,0)",
35 | "stride": "(1,1)",
36 | "workspace": "1024"
37 | },
38 | "name": "conv1",
39 | "inputs": [[0, 0], [1, 0], [2, 0]],
40 | "backward_source_id": -1
41 | },
42 | {
43 | "op": "null",
44 | "param": {},
45 | "name": "prelu1_gamma",
46 | "inputs": [],
47 | "backward_source_id": -1
48 | },
49 | {
50 | "op": "LeakyReLU",
51 | "param": {
52 | "act_type": "prelu",
53 | "lower_bound": "0.125",
54 | "slope": "0.25",
55 | "upper_bound": "0.334"
56 | },
57 | "name": "prelu1",
58 | "inputs": [[3, 0], [4, 0]],
59 | "backward_source_id": -1
60 | },
61 | {
62 | "op": "Pooling",
63 | "param": {
64 | "global_pool": "False",
65 | "kernel": "(3,3)",
66 | "pad": "(0,0)",
67 | "pool_type": "max",
68 | "pooling_convention": "full",
69 | "stride": "(2,2)"
70 | },
71 | "name": "pool1",
72 | "inputs": [[5, 0]],
73 | "backward_source_id": -1
74 | },
75 | {
76 | "op": "null",
77 | "param": {},
78 | "name": "conv2_weight",
79 | "inputs": [],
80 | "backward_source_id": -1
81 | },
82 | {
83 | "op": "null",
84 | "param": {},
85 | "name": "conv2_bias",
86 | "inputs": [],
87 | "backward_source_id": -1
88 | },
89 | {
90 | "op": "Convolution",
91 | "param": {
92 | "cudnn_off": "False",
93 | "cudnn_tune": "off",
94 | "dilate": "(1,1)",
95 | "kernel": "(3,3)",
96 | "no_bias": "False",
97 | "num_filter": "64",
98 | "num_group": "1",
99 | "pad": "(0,0)",
100 | "stride": "(1,1)",
101 | "workspace": "1024"
102 | },
103 | "name": "conv2",
104 | "inputs": [[6, 0], [7, 0], [8, 0]],
105 | "backward_source_id": -1
106 | },
107 | {
108 | "op": "null",
109 | "param": {},
110 | "name": "prelu2_gamma",
111 | "inputs": [],
112 | "backward_source_id": -1
113 | },
114 | {
115 | "op": "LeakyReLU",
116 | "param": {
117 | "act_type": "prelu",
118 | "lower_bound": "0.125",
119 | "slope": "0.25",
120 | "upper_bound": "0.334"
121 | },
122 | "name": "prelu2",
123 | "inputs": [[9, 0], [10, 0]],
124 | "backward_source_id": -1
125 | },
126 | {
127 | "op": "Pooling",
128 | "param": {
129 | "global_pool": "False",
130 | "kernel": "(3,3)",
131 | "pad": "(0,0)",
132 | "pool_type": "max",
133 | "pooling_convention": "full",
134 | "stride": "(2,2)"
135 | },
136 | "name": "pool2",
137 | "inputs": [[11, 0]],
138 | "backward_source_id": -1
139 | },
140 | {
141 | "op": "null",
142 | "param": {},
143 | "name": "conv3_weight",
144 | "inputs": [],
145 | "backward_source_id": -1
146 | },
147 | {
148 | "op": "null",
149 | "param": {},
150 | "name": "conv3_bias",
151 | "inputs": [],
152 | "backward_source_id": -1
153 | },
154 | {
155 | "op": "Convolution",
156 | "param": {
157 | "cudnn_off": "False",
158 | "cudnn_tune": "off",
159 | "dilate": "(1,1)",
160 | "kernel": "(3,3)",
161 | "no_bias": "False",
162 | "num_filter": "64",
163 | "num_group": "1",
164 | "pad": "(0,0)",
165 | "stride": "(1,1)",
166 | "workspace": "1024"
167 | },
168 | "name": "conv3",
169 | "inputs": [[12, 0], [13, 0], [14, 0]],
170 | "backward_source_id": -1
171 | },
172 | {
173 | "op": "null",
174 | "param": {},
175 | "name": "prelu3_gamma",
176 | "inputs": [],
177 | "backward_source_id": -1
178 | },
179 | {
180 | "op": "LeakyReLU",
181 | "param": {
182 | "act_type": "prelu",
183 | "lower_bound": "0.125",
184 | "slope": "0.25",
185 | "upper_bound": "0.334"
186 | },
187 | "name": "prelu3",
188 | "inputs": [[15, 0], [16, 0]],
189 | "backward_source_id": -1
190 | },
191 | {
192 | "op": "Pooling",
193 | "param": {
194 | "global_pool": "False",
195 | "kernel": "(2,2)",
196 | "pad": "(0,0)",
197 | "pool_type": "max",
198 | "pooling_convention": "full",
199 | "stride": "(2,2)"
200 | },
201 | "name": "pool3",
202 | "inputs": [[17, 0]],
203 | "backward_source_id": -1
204 | },
205 | {
206 | "op": "null",
207 | "param": {},
208 | "name": "conv4_weight",
209 | "inputs": [],
210 | "backward_source_id": -1
211 | },
212 | {
213 | "op": "null",
214 | "param": {},
215 | "name": "conv4_bias",
216 | "inputs": [],
217 | "backward_source_id": -1
218 | },
219 | {
220 | "op": "Convolution",
221 | "param": {
222 | "cudnn_off": "False",
223 | "cudnn_tune": "off",
224 | "dilate": "(1,1)",
225 | "kernel": "(2,2)",
226 | "no_bias": "False",
227 | "num_filter": "128",
228 | "num_group": "1",
229 | "pad": "(0,0)",
230 | "stride": "(1,1)",
231 | "workspace": "1024"
232 | },
233 | "name": "conv4",
234 | "inputs": [[18, 0], [19, 0], [20, 0]],
235 | "backward_source_id": -1
236 | },
237 | {
238 | "op": "null",
239 | "param": {},
240 | "name": "prelu4_gamma",
241 | "inputs": [],
242 | "backward_source_id": -1
243 | },
244 | {
245 | "op": "LeakyReLU",
246 | "param": {
247 | "act_type": "prelu",
248 | "lower_bound": "0.125",
249 | "slope": "0.25",
250 | "upper_bound": "0.334"
251 | },
252 | "name": "prelu4",
253 | "inputs": [[21, 0], [22, 0]],
254 | "backward_source_id": -1
255 | },
256 | {
257 | "op": "null",
258 | "param": {},
259 | "name": "conv5_weight",
260 | "inputs": [],
261 | "backward_source_id": -1
262 | },
263 | {
264 | "op": "null",
265 | "param": {},
266 | "name": "conv5_bias",
267 | "inputs": [],
268 | "backward_source_id": -1
269 | },
270 | {
271 | "op": "FullyConnected",
272 | "param": {
273 | "no_bias": "False",
274 | "num_hidden": "256"
275 | },
276 | "name": "conv5",
277 | "inputs": [[23, 0], [24, 0], [25, 0]],
278 | "backward_source_id": -1
279 | },
280 | {
281 | "op": "null",
282 | "param": {},
283 | "name": "prelu5_gamma",
284 | "inputs": [],
285 | "backward_source_id": -1
286 | },
287 | {
288 | "op": "LeakyReLU",
289 | "param": {
290 | "act_type": "prelu",
291 | "lower_bound": "0.125",
292 | "slope": "0.25",
293 | "upper_bound": "0.334"
294 | },
295 | "name": "prelu5",
296 | "inputs": [[26, 0], [27, 0]],
297 | "backward_source_id": -1
298 | },
299 | {
300 | "op": "null",
301 | "param": {},
302 | "name": "conv6_3_weight",
303 | "inputs": [],
304 | "backward_source_id": -1
305 | },
306 | {
307 | "op": "null",
308 | "param": {},
309 | "name": "conv6_3_bias",
310 | "inputs": [],
311 | "backward_source_id": -1
312 | },
313 | {
314 | "op": "FullyConnected",
315 | "param": {
316 | "no_bias": "False",
317 | "num_hidden": "10"
318 | },
319 | "name": "conv6_3",
320 | "inputs": [[28, 0], [29, 0], [30, 0]],
321 | "backward_source_id": -1
322 | },
323 | {
324 | "op": "null",
325 | "param": {},
326 | "name": "conv6_2_weight",
327 | "inputs": [],
328 | "backward_source_id": -1
329 | },
330 | {
331 | "op": "null",
332 | "param": {},
333 | "name": "conv6_2_bias",
334 | "inputs": [],
335 | "backward_source_id": -1
336 | },
337 | {
338 | "op": "FullyConnected",
339 | "param": {
340 | "no_bias": "False",
341 | "num_hidden": "4"
342 | },
343 | "name": "conv6_2",
344 | "inputs": [[28, 0], [32, 0], [33, 0]],
345 | "backward_source_id": -1
346 | },
347 | {
348 | "op": "null",
349 | "param": {},
350 | "name": "conv6_1_weight",
351 | "inputs": [],
352 | "backward_source_id": -1
353 | },
354 | {
355 | "op": "null",
356 | "param": {},
357 | "name": "conv6_1_bias",
358 | "inputs": [],
359 | "backward_source_id": -1
360 | },
361 | {
362 | "op": "FullyConnected",
363 | "param": {
364 | "no_bias": "False",
365 | "num_hidden": "2"
366 | },
367 | "name": "conv6_1",
368 | "inputs": [[28, 0], [35, 0], [36, 0]],
369 | "backward_source_id": -1
370 | },
371 | {
372 | "op": "null",
373 | "param": {},
374 | "name": "prob1_label",
375 | "inputs": [],
376 | "backward_source_id": -1
377 | },
378 | {
379 | "op": "SoftmaxOutput",
380 | "param": {
381 | "grad_scale": "1",
382 | "ignore_label": "-1",
383 | "multi_output": "False",
384 | "normalization": "null",
385 | "use_ignore": "False"
386 | },
387 | "name": "prob1",
388 | "inputs": [[37, 0], [38, 0]],
389 | "backward_source_id": -1
390 | }
391 | ],
392 | "arg_nodes": [
393 | 0,
394 | 1,
395 | 2,
396 | 4,
397 | 7,
398 | 8,
399 | 10,
400 | 13,
401 | 14,
402 | 16,
403 | 19,
404 | 20,
405 | 22,
406 | 24,
407 | 25,
408 | 27,
409 | 29,
410 | 30,
411 | 32,
412 | 33,
413 | 35,
414 | 36,
415 | 38
416 | ],
417 | "heads": [[31, 0], [34, 0], [39, 0]]
418 | }
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det3.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/insightface/mtcnn-model/det3.caffemodel
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det3.prototxt:
--------------------------------------------------------------------------------
1 | name: "ONet"
2 | input: "data"
3 | input_dim: 1
4 | input_dim: 3
5 | input_dim: 48
6 | input_dim: 48
7 | ##################################
8 | layer {
9 | name: "conv1"
10 | type: "Convolution"
11 | bottom: "data"
12 | top: "conv1"
13 | param {
14 | lr_mult: 1
15 | decay_mult: 1
16 | }
17 | param {
18 | lr_mult: 2
19 | decay_mult: 1
20 | }
21 | convolution_param {
22 | num_output: 32
23 | kernel_size: 3
24 | stride: 1
25 | weight_filler {
26 | type: "xavier"
27 | }
28 | bias_filler {
29 | type: "constant"
30 | value: 0
31 | }
32 | }
33 | }
34 | layer {
35 | name: "prelu1"
36 | type: "PReLU"
37 | bottom: "conv1"
38 | top: "conv1"
39 | }
40 | layer {
41 | name: "pool1"
42 | type: "Pooling"
43 | bottom: "conv1"
44 | top: "pool1"
45 | pooling_param {
46 | pool: MAX
47 | kernel_size: 3
48 | stride: 2
49 | }
50 | }
51 | layer {
52 | name: "conv2"
53 | type: "Convolution"
54 | bottom: "pool1"
55 | top: "conv2"
56 | param {
57 | lr_mult: 1
58 | decay_mult: 1
59 | }
60 | param {
61 | lr_mult: 2
62 | decay_mult: 1
63 | }
64 | convolution_param {
65 | num_output: 64
66 | kernel_size: 3
67 | stride: 1
68 | weight_filler {
69 | type: "xavier"
70 | }
71 | bias_filler {
72 | type: "constant"
73 | value: 0
74 | }
75 | }
76 | }
77 |
78 | layer {
79 | name: "prelu2"
80 | type: "PReLU"
81 | bottom: "conv2"
82 | top: "conv2"
83 | }
84 | layer {
85 | name: "pool2"
86 | type: "Pooling"
87 | bottom: "conv2"
88 | top: "pool2"
89 | pooling_param {
90 | pool: MAX
91 | kernel_size: 3
92 | stride: 2
93 | }
94 | }
95 |
96 | layer {
97 | name: "conv3"
98 | type: "Convolution"
99 | bottom: "pool2"
100 | top: "conv3"
101 | param {
102 | lr_mult: 1
103 | decay_mult: 1
104 | }
105 | param {
106 | lr_mult: 2
107 | decay_mult: 1
108 | }
109 | convolution_param {
110 | num_output: 64
111 | kernel_size: 3
112 | weight_filler {
113 | type: "xavier"
114 | }
115 | bias_filler {
116 | type: "constant"
117 | value: 0
118 | }
119 | }
120 | }
121 | layer {
122 | name: "prelu3"
123 | type: "PReLU"
124 | bottom: "conv3"
125 | top: "conv3"
126 | }
127 | layer {
128 | name: "pool3"
129 | type: "Pooling"
130 | bottom: "conv3"
131 | top: "pool3"
132 | pooling_param {
133 | pool: MAX
134 | kernel_size: 2
135 | stride: 2
136 | }
137 | }
138 | layer {
139 | name: "conv4"
140 | type: "Convolution"
141 | bottom: "pool3"
142 | top: "conv4"
143 | param {
144 | lr_mult: 1
145 | decay_mult: 1
146 | }
147 | param {
148 | lr_mult: 2
149 | decay_mult: 1
150 | }
151 | convolution_param {
152 | num_output: 128
153 | kernel_size: 2
154 | weight_filler {
155 | type: "xavier"
156 | }
157 | bias_filler {
158 | type: "constant"
159 | value: 0
160 | }
161 | }
162 | }
163 | layer {
164 | name: "prelu4"
165 | type: "PReLU"
166 | bottom: "conv4"
167 | top: "conv4"
168 | }
169 |
170 |
171 | layer {
172 | name: "conv5"
173 | type: "InnerProduct"
174 | bottom: "conv4"
175 | top: "conv5"
176 | param {
177 | lr_mult: 1
178 | decay_mult: 1
179 | }
180 | param {
181 | lr_mult: 2
182 | decay_mult: 1
183 | }
184 | inner_product_param {
185 | #kernel_size: 3
186 | num_output: 256
187 | weight_filler {
188 | type: "xavier"
189 | }
190 | bias_filler {
191 | type: "constant"
192 | value: 0
193 | }
194 | }
195 | }
196 |
197 | layer {
198 | name: "drop5"
199 | type: "Dropout"
200 | bottom: "conv5"
201 | top: "conv5"
202 | dropout_param {
203 | dropout_ratio: 0.25
204 | }
205 | }
206 | layer {
207 | name: "prelu5"
208 | type: "PReLU"
209 | bottom: "conv5"
210 | top: "conv5"
211 | }
212 |
213 |
214 | layer {
215 | name: "conv6-1"
216 | type: "InnerProduct"
217 | bottom: "conv5"
218 | top: "conv6-1"
219 | param {
220 | lr_mult: 1
221 | decay_mult: 1
222 | }
223 | param {
224 | lr_mult: 2
225 | decay_mult: 1
226 | }
227 | inner_product_param {
228 | #kernel_size: 1
229 | num_output: 2
230 | weight_filler {
231 | type: "xavier"
232 | }
233 | bias_filler {
234 | type: "constant"
235 | value: 0
236 | }
237 | }
238 | }
239 | layer {
240 | name: "conv6-2"
241 | type: "InnerProduct"
242 | bottom: "conv5"
243 | top: "conv6-2"
244 | param {
245 | lr_mult: 1
246 | decay_mult: 1
247 | }
248 | param {
249 | lr_mult: 2
250 | decay_mult: 1
251 | }
252 | inner_product_param {
253 | #kernel_size: 1
254 | num_output: 4
255 | weight_filler {
256 | type: "xavier"
257 | }
258 | bias_filler {
259 | type: "constant"
260 | value: 0
261 | }
262 | }
263 | }
264 | layer {
265 | name: "conv6-3"
266 | type: "InnerProduct"
267 | bottom: "conv5"
268 | top: "conv6-3"
269 | param {
270 | lr_mult: 1
271 | decay_mult: 1
272 | }
273 | param {
274 | lr_mult: 2
275 | decay_mult: 1
276 | }
277 | inner_product_param {
278 | #kernel_size: 1
279 | num_output: 10
280 | weight_filler {
281 | type: "xavier"
282 | }
283 | bias_filler {
284 | type: "constant"
285 | value: 0
286 | }
287 | }
288 | }
289 | layer {
290 | name: "prob1"
291 | type: "Softmax"
292 | bottom: "conv6-1"
293 | top: "prob1"
294 | }
295 |
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det4-0001.params:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/insightface/mtcnn-model/det4-0001.params
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det4-symbol.json:
--------------------------------------------------------------------------------
1 | {
2 | "nodes": [
3 | {
4 | "op": "null",
5 | "param": {},
6 | "name": "data",
7 | "inputs": [],
8 | "backward_source_id": -1
9 | },
10 | {
11 | "op": "SliceChannel",
12 | "param": {
13 | "axis": "1",
14 | "num_outputs": "5",
15 | "squeeze_axis": "False"
16 | },
17 | "name": "slice",
18 | "inputs": [[0, 0]],
19 | "backward_source_id": -1
20 | },
21 | {
22 | "op": "null",
23 | "param": {},
24 | "name": "conv1_1_weight",
25 | "inputs": [],
26 | "backward_source_id": -1
27 | },
28 | {
29 | "op": "null",
30 | "param": {},
31 | "name": "conv1_1_bias",
32 | "inputs": [],
33 | "backward_source_id": -1
34 | },
35 | {
36 | "op": "Convolution",
37 | "param": {
38 | "cudnn_off": "False",
39 | "cudnn_tune": "off",
40 | "dilate": "(1,1)",
41 | "kernel": "(3,3)",
42 | "no_bias": "False",
43 | "num_filter": "28",
44 | "num_group": "1",
45 | "pad": "(0,0)",
46 | "stride": "(1,1)",
47 | "workspace": "1024"
48 | },
49 | "name": "conv1_1",
50 | "inputs": [[1, 0], [2, 0], [3, 0]],
51 | "backward_source_id": -1
52 | },
53 | {
54 | "op": "null",
55 | "param": {},
56 | "name": "prelu1_1_gamma",
57 | "inputs": [],
58 | "backward_source_id": -1
59 | },
60 | {
61 | "op": "LeakyReLU",
62 | "param": {
63 | "act_type": "prelu",
64 | "lower_bound": "0.125",
65 | "slope": "0.25",
66 | "upper_bound": "0.334"
67 | },
68 | "name": "prelu1_1",
69 | "inputs": [[4, 0], [5, 0]],
70 | "backward_source_id": -1
71 | },
72 | {
73 | "op": "Pooling",
74 | "param": {
75 | "global_pool": "False",
76 | "kernel": "(3,3)",
77 | "pad": "(0,0)",
78 | "pool_type": "max",
79 | "pooling_convention": "full",
80 | "stride": "(2,2)"
81 | },
82 | "name": "pool1_1",
83 | "inputs": [[6, 0]],
84 | "backward_source_id": -1
85 | },
86 | {
87 | "op": "null",
88 | "param": {},
89 | "name": "conv2_1_weight",
90 | "inputs": [],
91 | "backward_source_id": -1
92 | },
93 | {
94 | "op": "null",
95 | "param": {},
96 | "name": "conv2_1_bias",
97 | "inputs": [],
98 | "backward_source_id": -1
99 | },
100 | {
101 | "op": "Convolution",
102 | "param": {
103 | "cudnn_off": "False",
104 | "cudnn_tune": "off",
105 | "dilate": "(1,1)",
106 | "kernel": "(3,3)",
107 | "no_bias": "False",
108 | "num_filter": "48",
109 | "num_group": "1",
110 | "pad": "(0,0)",
111 | "stride": "(1,1)",
112 | "workspace": "1024"
113 | },
114 | "name": "conv2_1",
115 | "inputs": [[7, 0], [8, 0], [9, 0]],
116 | "backward_source_id": -1
117 | },
118 | {
119 | "op": "null",
120 | "param": {},
121 | "name": "prelu2_1_gamma",
122 | "inputs": [],
123 | "backward_source_id": -1
124 | },
125 | {
126 | "op": "LeakyReLU",
127 | "param": {
128 | "act_type": "prelu",
129 | "lower_bound": "0.125",
130 | "slope": "0.25",
131 | "upper_bound": "0.334"
132 | },
133 | "name": "prelu2_1",
134 | "inputs": [[10, 0], [11, 0]],
135 | "backward_source_id": -1
136 | },
137 | {
138 | "op": "Pooling",
139 | "param": {
140 | "global_pool": "False",
141 | "kernel": "(3,3)",
142 | "pad": "(0,0)",
143 | "pool_type": "max",
144 | "pooling_convention": "full",
145 | "stride": "(2,2)"
146 | },
147 | "name": "pool2_1",
148 | "inputs": [[12, 0]],
149 | "backward_source_id": -1
150 | },
151 | {
152 | "op": "null",
153 | "param": {},
154 | "name": "conv3_1_weight",
155 | "inputs": [],
156 | "backward_source_id": -1
157 | },
158 | {
159 | "op": "null",
160 | "param": {},
161 | "name": "conv3_1_bias",
162 | "inputs": [],
163 | "backward_source_id": -1
164 | },
165 | {
166 | "op": "Convolution",
167 | "param": {
168 | "cudnn_off": "False",
169 | "cudnn_tune": "off",
170 | "dilate": "(1,1)",
171 | "kernel": "(2,2)",
172 | "no_bias": "False",
173 | "num_filter": "64",
174 | "num_group": "1",
175 | "pad": "(0,0)",
176 | "stride": "(1,1)",
177 | "workspace": "1024"
178 | },
179 | "name": "conv3_1",
180 | "inputs": [[13, 0], [14, 0], [15, 0]],
181 | "backward_source_id": -1
182 | },
183 | {
184 | "op": "null",
185 | "param": {},
186 | "name": "prelu3_1_gamma",
187 | "inputs": [],
188 | "backward_source_id": -1
189 | },
190 | {
191 | "op": "LeakyReLU",
192 | "param": {
193 | "act_type": "prelu",
194 | "lower_bound": "0.125",
195 | "slope": "0.25",
196 | "upper_bound": "0.334"
197 | },
198 | "name": "prelu3_1",
199 | "inputs": [[16, 0], [17, 0]],
200 | "backward_source_id": -1
201 | },
202 | {
203 | "op": "null",
204 | "param": {},
205 | "name": "conv1_2_weight",
206 | "inputs": [],
207 | "backward_source_id": -1
208 | },
209 | {
210 | "op": "null",
211 | "param": {},
212 | "name": "conv1_2_bias",
213 | "inputs": [],
214 | "backward_source_id": -1
215 | },
216 | {
217 | "op": "Convolution",
218 | "param": {
219 | "cudnn_off": "False",
220 | "cudnn_tune": "off",
221 | "dilate": "(1,1)",
222 | "kernel": "(3,3)",
223 | "no_bias": "False",
224 | "num_filter": "28",
225 | "num_group": "1",
226 | "pad": "(0,0)",
227 | "stride": "(1,1)",
228 | "workspace": "1024"
229 | },
230 | "name": "conv1_2",
231 | "inputs": [[1, 1], [19, 0], [20, 0]],
232 | "backward_source_id": -1
233 | },
234 | {
235 | "op": "null",
236 | "param": {},
237 | "name": "prelu1_2_gamma",
238 | "inputs": [],
239 | "backward_source_id": -1
240 | },
241 | {
242 | "op": "LeakyReLU",
243 | "param": {
244 | "act_type": "prelu",
245 | "lower_bound": "0.125",
246 | "slope": "0.25",
247 | "upper_bound": "0.334"
248 | },
249 | "name": "prelu1_2",
250 | "inputs": [[21, 0], [22, 0]],
251 | "backward_source_id": -1
252 | },
253 | {
254 | "op": "Pooling",
255 | "param": {
256 | "global_pool": "False",
257 | "kernel": "(3,3)",
258 | "pad": "(0,0)",
259 | "pool_type": "max",
260 | "pooling_convention": "full",
261 | "stride": "(2,2)"
262 | },
263 | "name": "pool1_2",
264 | "inputs": [[23, 0]],
265 | "backward_source_id": -1
266 | },
267 | {
268 | "op": "null",
269 | "param": {},
270 | "name": "conv2_2_weight",
271 | "inputs": [],
272 | "backward_source_id": -1
273 | },
274 | {
275 | "op": "null",
276 | "param": {},
277 | "name": "conv2_2_bias",
278 | "inputs": [],
279 | "backward_source_id": -1
280 | },
281 | {
282 | "op": "Convolution",
283 | "param": {
284 | "cudnn_off": "False",
285 | "cudnn_tune": "off",
286 | "dilate": "(1,1)",
287 | "kernel": "(3,3)",
288 | "no_bias": "False",
289 | "num_filter": "48",
290 | "num_group": "1",
291 | "pad": "(0,0)",
292 | "stride": "(1,1)",
293 | "workspace": "1024"
294 | },
295 | "name": "conv2_2",
296 | "inputs": [[24, 0], [25, 0], [26, 0]],
297 | "backward_source_id": -1
298 | },
299 | {
300 | "op": "null",
301 | "param": {},
302 | "name": "prelu2_2_gamma",
303 | "inputs": [],
304 | "backward_source_id": -1
305 | },
306 | {
307 | "op": "LeakyReLU",
308 | "param": {
309 | "act_type": "prelu",
310 | "lower_bound": "0.125",
311 | "slope": "0.25",
312 | "upper_bound": "0.334"
313 | },
314 | "name": "prelu2_2",
315 | "inputs": [[27, 0], [28, 0]],
316 | "backward_source_id": -1
317 | },
318 | {
319 | "op": "Pooling",
320 | "param": {
321 | "global_pool": "False",
322 | "kernel": "(3,3)",
323 | "pad": "(0,0)",
324 | "pool_type": "max",
325 | "pooling_convention": "full",
326 | "stride": "(2,2)"
327 | },
328 | "name": "pool2_2",
329 | "inputs": [[29, 0]],
330 | "backward_source_id": -1
331 | },
332 | {
333 | "op": "null",
334 | "param": {},
335 | "name": "conv3_2_weight",
336 | "inputs": [],
337 | "backward_source_id": -1
338 | },
339 | {
340 | "op": "null",
341 | "param": {},
342 | "name": "conv3_2_bias",
343 | "inputs": [],
344 | "backward_source_id": -1
345 | },
346 | {
347 | "op": "Convolution",
348 | "param": {
349 | "cudnn_off": "False",
350 | "cudnn_tune": "off",
351 | "dilate": "(1,1)",
352 | "kernel": "(2,2)",
353 | "no_bias": "False",
354 | "num_filter": "64",
355 | "num_group": "1",
356 | "pad": "(0,0)",
357 | "stride": "(1,1)",
358 | "workspace": "1024"
359 | },
360 | "name": "conv3_2",
361 | "inputs": [[30, 0], [31, 0], [32, 0]],
362 | "backward_source_id": -1
363 | },
364 | {
365 | "op": "null",
366 | "param": {},
367 | "name": "prelu3_2_gamma",
368 | "inputs": [],
369 | "backward_source_id": -1
370 | },
371 | {
372 | "op": "LeakyReLU",
373 | "param": {
374 | "act_type": "prelu",
375 | "lower_bound": "0.125",
376 | "slope": "0.25",
377 | "upper_bound": "0.334"
378 | },
379 | "name": "prelu3_2",
380 | "inputs": [[33, 0], [34, 0]],
381 | "backward_source_id": -1
382 | },
383 | {
384 | "op": "null",
385 | "param": {},
386 | "name": "conv1_3_weight",
387 | "inputs": [],
388 | "backward_source_id": -1
389 | },
390 | {
391 | "op": "null",
392 | "param": {},
393 | "name": "conv1_3_bias",
394 | "inputs": [],
395 | "backward_source_id": -1
396 | },
397 | {
398 | "op": "Convolution",
399 | "param": {
400 | "cudnn_off": "False",
401 | "cudnn_tune": "off",
402 | "dilate": "(1,1)",
403 | "kernel": "(3,3)",
404 | "no_bias": "False",
405 | "num_filter": "28",
406 | "num_group": "1",
407 | "pad": "(0,0)",
408 | "stride": "(1,1)",
409 | "workspace": "1024"
410 | },
411 | "name": "conv1_3",
412 | "inputs": [[1, 2], [36, 0], [37, 0]],
413 | "backward_source_id": -1
414 | },
415 | {
416 | "op": "null",
417 | "param": {},
418 | "name": "prelu1_3_gamma",
419 | "inputs": [],
420 | "backward_source_id": -1
421 | },
422 | {
423 | "op": "LeakyReLU",
424 | "param": {
425 | "act_type": "prelu",
426 | "lower_bound": "0.125",
427 | "slope": "0.25",
428 | "upper_bound": "0.334"
429 | },
430 | "name": "prelu1_3",
431 | "inputs": [[38, 0], [39, 0]],
432 | "backward_source_id": -1
433 | },
434 | {
435 | "op": "Pooling",
436 | "param": {
437 | "global_pool": "False",
438 | "kernel": "(3,3)",
439 | "pad": "(0,0)",
440 | "pool_type": "max",
441 | "pooling_convention": "full",
442 | "stride": "(2,2)"
443 | },
444 | "name": "pool1_3",
445 | "inputs": [[40, 0]],
446 | "backward_source_id": -1
447 | },
448 | {
449 | "op": "null",
450 | "param": {},
451 | "name": "conv2_3_weight",
452 | "inputs": [],
453 | "backward_source_id": -1
454 | },
455 | {
456 | "op": "null",
457 | "param": {},
458 | "name": "conv2_3_bias",
459 | "inputs": [],
460 | "backward_source_id": -1
461 | },
462 | {
463 | "op": "Convolution",
464 | "param": {
465 | "cudnn_off": "False",
466 | "cudnn_tune": "off",
467 | "dilate": "(1,1)",
468 | "kernel": "(3,3)",
469 | "no_bias": "False",
470 | "num_filter": "48",
471 | "num_group": "1",
472 | "pad": "(0,0)",
473 | "stride": "(1,1)",
474 | "workspace": "1024"
475 | },
476 | "name": "conv2_3",
477 | "inputs": [[41, 0], [42, 0], [43, 0]],
478 | "backward_source_id": -1
479 | },
480 | {
481 | "op": "null",
482 | "param": {},
483 | "name": "prelu2_3_gamma",
484 | "inputs": [],
485 | "backward_source_id": -1
486 | },
487 | {
488 | "op": "LeakyReLU",
489 | "param": {
490 | "act_type": "prelu",
491 | "lower_bound": "0.125",
492 | "slope": "0.25",
493 | "upper_bound": "0.334"
494 | },
495 | "name": "prelu2_3",
496 | "inputs": [[44, 0], [45, 0]],
497 | "backward_source_id": -1
498 | },
499 | {
500 | "op": "Pooling",
501 | "param": {
502 | "global_pool": "False",
503 | "kernel": "(3,3)",
504 | "pad": "(0,0)",
505 | "pool_type": "max",
506 | "pooling_convention": "full",
507 | "stride": "(2,2)"
508 | },
509 | "name": "pool2_3",
510 | "inputs": [[46, 0]],
511 | "backward_source_id": -1
512 | },
513 | {
514 | "op": "null",
515 | "param": {},
516 | "name": "conv3_3_weight",
517 | "inputs": [],
518 | "backward_source_id": -1
519 | },
520 | {
521 | "op": "null",
522 | "param": {},
523 | "name": "conv3_3_bias",
524 | "inputs": [],
525 | "backward_source_id": -1
526 | },
527 | {
528 | "op": "Convolution",
529 | "param": {
530 | "cudnn_off": "False",
531 | "cudnn_tune": "off",
532 | "dilate": "(1,1)",
533 | "kernel": "(2,2)",
534 | "no_bias": "False",
535 | "num_filter": "64",
536 | "num_group": "1",
537 | "pad": "(0,0)",
538 | "stride": "(1,1)",
539 | "workspace": "1024"
540 | },
541 | "name": "conv3_3",
542 | "inputs": [[47, 0], [48, 0], [49, 0]],
543 | "backward_source_id": -1
544 | },
545 | {
546 | "op": "null",
547 | "param": {},
548 | "name": "prelu3_3_gamma",
549 | "inputs": [],
550 | "backward_source_id": -1
551 | },
552 | {
553 | "op": "LeakyReLU",
554 | "param": {
555 | "act_type": "prelu",
556 | "lower_bound": "0.125",
557 | "slope": "0.25",
558 | "upper_bound": "0.334"
559 | },
560 | "name": "prelu3_3",
561 | "inputs": [[50, 0], [51, 0]],
562 | "backward_source_id": -1
563 | },
564 | {
565 | "op": "null",
566 | "param": {},
567 | "name": "conv1_4_weight",
568 | "inputs": [],
569 | "backward_source_id": -1
570 | },
571 | {
572 | "op": "null",
573 | "param": {},
574 | "name": "conv1_4_bias",
575 | "inputs": [],
576 | "backward_source_id": -1
577 | },
578 | {
579 | "op": "Convolution",
580 | "param": {
581 | "cudnn_off": "False",
582 | "cudnn_tune": "off",
583 | "dilate": "(1,1)",
584 | "kernel": "(3,3)",
585 | "no_bias": "False",
586 | "num_filter": "28",
587 | "num_group": "1",
588 | "pad": "(0,0)",
589 | "stride": "(1,1)",
590 | "workspace": "1024"
591 | },
592 | "name": "conv1_4",
593 | "inputs": [[1, 3], [53, 0], [54, 0]],
594 | "backward_source_id": -1
595 | },
596 | {
597 | "op": "null",
598 | "param": {},
599 | "name": "prelu1_4_gamma",
600 | "inputs": [],
601 | "backward_source_id": -1
602 | },
603 | {
604 | "op": "LeakyReLU",
605 | "param": {
606 | "act_type": "prelu",
607 | "lower_bound": "0.125",
608 | "slope": "0.25",
609 | "upper_bound": "0.334"
610 | },
611 | "name": "prelu1_4",
612 | "inputs": [[55, 0], [56, 0]],
613 | "backward_source_id": -1
614 | },
615 | {
616 | "op": "Pooling",
617 | "param": {
618 | "global_pool": "False",
619 | "kernel": "(3,3)",
620 | "pad": "(0,0)",
621 | "pool_type": "max",
622 | "pooling_convention": "full",
623 | "stride": "(2,2)"
624 | },
625 | "name": "pool1_4",
626 | "inputs": [[57, 0]],
627 | "backward_source_id": -1
628 | },
629 | {
630 | "op": "null",
631 | "param": {},
632 | "name": "conv2_4_weight",
633 | "inputs": [],
634 | "backward_source_id": -1
635 | },
636 | {
637 | "op": "null",
638 | "param": {},
639 | "name": "conv2_4_bias",
640 | "inputs": [],
641 | "backward_source_id": -1
642 | },
643 | {
644 | "op": "Convolution",
645 | "param": {
646 | "cudnn_off": "False",
647 | "cudnn_tune": "off",
648 | "dilate": "(1,1)",
649 | "kernel": "(3,3)",
650 | "no_bias": "False",
651 | "num_filter": "48",
652 | "num_group": "1",
653 | "pad": "(0,0)",
654 | "stride": "(1,1)",
655 | "workspace": "1024"
656 | },
657 | "name": "conv2_4",
658 | "inputs": [[58, 0], [59, 0], [60, 0]],
659 | "backward_source_id": -1
660 | },
661 | {
662 | "op": "null",
663 | "param": {},
664 | "name": "prelu2_4_gamma",
665 | "inputs": [],
666 | "backward_source_id": -1
667 | },
668 | {
669 | "op": "LeakyReLU",
670 | "param": {
671 | "act_type": "prelu",
672 | "lower_bound": "0.125",
673 | "slope": "0.25",
674 | "upper_bound": "0.334"
675 | },
676 | "name": "prelu2_4",
677 | "inputs": [[61, 0], [62, 0]],
678 | "backward_source_id": -1
679 | },
680 | {
681 | "op": "Pooling",
682 | "param": {
683 | "global_pool": "False",
684 | "kernel": "(3,3)",
685 | "pad": "(0,0)",
686 | "pool_type": "max",
687 | "pooling_convention": "full",
688 | "stride": "(2,2)"
689 | },
690 | "name": "pool2_4",
691 | "inputs": [[63, 0]],
692 | "backward_source_id": -1
693 | },
694 | {
695 | "op": "null",
696 | "param": {},
697 | "name": "conv3_4_weight",
698 | "inputs": [],
699 | "backward_source_id": -1
700 | },
701 | {
702 | "op": "null",
703 | "param": {},
704 | "name": "conv3_4_bias",
705 | "inputs": [],
706 | "backward_source_id": -1
707 | },
708 | {
709 | "op": "Convolution",
710 | "param": {
711 | "cudnn_off": "False",
712 | "cudnn_tune": "off",
713 | "dilate": "(1,1)",
714 | "kernel": "(2,2)",
715 | "no_bias": "False",
716 | "num_filter": "64",
717 | "num_group": "1",
718 | "pad": "(0,0)",
719 | "stride": "(1,1)",
720 | "workspace": "1024"
721 | },
722 | "name": "conv3_4",
723 | "inputs": [[64, 0], [65, 0], [66, 0]],
724 | "backward_source_id": -1
725 | },
726 | {
727 | "op": "null",
728 | "param": {},
729 | "name": "prelu3_4_gamma",
730 | "inputs": [],
731 | "backward_source_id": -1
732 | },
733 | {
734 | "op": "LeakyReLU",
735 | "param": {
736 | "act_type": "prelu",
737 | "lower_bound": "0.125",
738 | "slope": "0.25",
739 | "upper_bound": "0.334"
740 | },
741 | "name": "prelu3_4",
742 | "inputs": [[67, 0], [68, 0]],
743 | "backward_source_id": -1
744 | },
745 | {
746 | "op": "null",
747 | "param": {},
748 | "name": "conv1_5_weight",
749 | "inputs": [],
750 | "backward_source_id": -1
751 | },
752 | {
753 | "op": "null",
754 | "param": {},
755 | "name": "conv1_5_bias",
756 | "inputs": [],
757 | "backward_source_id": -1
758 | },
759 | {
760 | "op": "Convolution",
761 | "param": {
762 | "cudnn_off": "False",
763 | "cudnn_tune": "off",
764 | "dilate": "(1,1)",
765 | "kernel": "(3,3)",
766 | "no_bias": "False",
767 | "num_filter": "28",
768 | "num_group": "1",
769 | "pad": "(0,0)",
770 | "stride": "(1,1)",
771 | "workspace": "1024"
772 | },
773 | "name": "conv1_5",
774 | "inputs": [[1, 4], [70, 0], [71, 0]],
775 | "backward_source_id": -1
776 | },
777 | {
778 | "op": "null",
779 | "param": {},
780 | "name": "prelu1_5_gamma",
781 | "inputs": [],
782 | "backward_source_id": -1
783 | },
784 | {
785 | "op": "LeakyReLU",
786 | "param": {
787 | "act_type": "prelu",
788 | "lower_bound": "0.125",
789 | "slope": "0.25",
790 | "upper_bound": "0.334"
791 | },
792 | "name": "prelu1_5",
793 | "inputs": [[72, 0], [73, 0]],
794 | "backward_source_id": -1
795 | },
796 | {
797 | "op": "Pooling",
798 | "param": {
799 | "global_pool": "False",
800 | "kernel": "(3,3)",
801 | "pad": "(0,0)",
802 | "pool_type": "max",
803 | "pooling_convention": "full",
804 | "stride": "(2,2)"
805 | },
806 | "name": "pool1_5",
807 | "inputs": [[74, 0]],
808 | "backward_source_id": -1
809 | },
810 | {
811 | "op": "null",
812 | "param": {},
813 | "name": "conv2_5_weight",
814 | "inputs": [],
815 | "backward_source_id": -1
816 | },
817 | {
818 | "op": "null",
819 | "param": {},
820 | "name": "conv2_5_bias",
821 | "inputs": [],
822 | "backward_source_id": -1
823 | },
824 | {
825 | "op": "Convolution",
826 | "param": {
827 | "cudnn_off": "False",
828 | "cudnn_tune": "off",
829 | "dilate": "(1,1)",
830 | "kernel": "(3,3)",
831 | "no_bias": "False",
832 | "num_filter": "48",
833 | "num_group": "1",
834 | "pad": "(0,0)",
835 | "stride": "(1,1)",
836 | "workspace": "1024"
837 | },
838 | "name": "conv2_5",
839 | "inputs": [[75, 0], [76, 0], [77, 0]],
840 | "backward_source_id": -1
841 | },
842 | {
843 | "op": "null",
844 | "param": {},
845 | "name": "prelu2_5_gamma",
846 | "inputs": [],
847 | "backward_source_id": -1
848 | },
849 | {
850 | "op": "LeakyReLU",
851 | "param": {
852 | "act_type": "prelu",
853 | "lower_bound": "0.125",
854 | "slope": "0.25",
855 | "upper_bound": "0.334"
856 | },
857 | "name": "prelu2_5",
858 | "inputs": [[78, 0], [79, 0]],
859 | "backward_source_id": -1
860 | },
861 | {
862 | "op": "Pooling",
863 | "param": {
864 | "global_pool": "False",
865 | "kernel": "(3,3)",
866 | "pad": "(0,0)",
867 | "pool_type": "max",
868 | "pooling_convention": "full",
869 | "stride": "(2,2)"
870 | },
871 | "name": "pool2_5",
872 | "inputs": [[80, 0]],
873 | "backward_source_id": -1
874 | },
875 | {
876 | "op": "null",
877 | "param": {},
878 | "name": "conv3_5_weight",
879 | "inputs": [],
880 | "backward_source_id": -1
881 | },
882 | {
883 | "op": "null",
884 | "param": {},
885 | "name": "conv3_5_bias",
886 | "inputs": [],
887 | "backward_source_id": -1
888 | },
889 | {
890 | "op": "Convolution",
891 | "param": {
892 | "cudnn_off": "False",
893 | "cudnn_tune": "off",
894 | "dilate": "(1,1)",
895 | "kernel": "(2,2)",
896 | "no_bias": "False",
897 | "num_filter": "64",
898 | "num_group": "1",
899 | "pad": "(0,0)",
900 | "stride": "(1,1)",
901 | "workspace": "1024"
902 | },
903 | "name": "conv3_5",
904 | "inputs": [[81, 0], [82, 0], [83, 0]],
905 | "backward_source_id": -1
906 | },
907 | {
908 | "op": "null",
909 | "param": {},
910 | "name": "prelu3_5_gamma",
911 | "inputs": [],
912 | "backward_source_id": -1
913 | },
914 | {
915 | "op": "LeakyReLU",
916 | "param": {
917 | "act_type": "prelu",
918 | "lower_bound": "0.125",
919 | "slope": "0.25",
920 | "upper_bound": "0.334"
921 | },
922 | "name": "prelu3_5",
923 | "inputs": [[84, 0], [85, 0]],
924 | "backward_source_id": -1
925 | },
926 | {
927 | "op": "Concat",
928 | "param": {
929 | "dim": "1",
930 | "num_args": "5"
931 | },
932 | "name": "concat",
933 | "inputs": [[18, 0], [35, 0], [52, 0], [69, 0], [86, 0]],
934 | "backward_source_id": -1
935 | },
936 | {
937 | "op": "null",
938 | "param": {},
939 | "name": "fc4_weight",
940 | "inputs": [],
941 | "backward_source_id": -1
942 | },
943 | {
944 | "op": "null",
945 | "param": {},
946 | "name": "fc4_bias",
947 | "inputs": [],
948 | "backward_source_id": -1
949 | },
950 | {
951 | "op": "FullyConnected",
952 | "param": {
953 | "no_bias": "False",
954 | "num_hidden": "256"
955 | },
956 | "name": "fc4",
957 | "inputs": [[87, 0], [88, 0], [89, 0]],
958 | "backward_source_id": -1
959 | },
960 | {
961 | "op": "null",
962 | "param": {},
963 | "name": "prelu4_gamma",
964 | "inputs": [],
965 | "backward_source_id": -1
966 | },
967 | {
968 | "op": "LeakyReLU",
969 | "param": {
970 | "act_type": "prelu",
971 | "lower_bound": "0.125",
972 | "slope": "0.25",
973 | "upper_bound": "0.334"
974 | },
975 | "name": "prelu4",
976 | "inputs": [[90, 0], [91, 0]],
977 | "backward_source_id": -1
978 | },
979 | {
980 | "op": "null",
981 | "param": {},
982 | "name": "fc4_1_weight",
983 | "inputs": [],
984 | "backward_source_id": -1
985 | },
986 | {
987 | "op": "null",
988 | "param": {},
989 | "name": "fc4_1_bias",
990 | "inputs": [],
991 | "backward_source_id": -1
992 | },
993 | {
994 | "op": "FullyConnected",
995 | "param": {
996 | "no_bias": "False",
997 | "num_hidden": "64"
998 | },
999 | "name": "fc4_1",
1000 | "inputs": [[92, 0], [93, 0], [94, 0]],
1001 | "backward_source_id": -1
1002 | },
1003 | {
1004 | "op": "null",
1005 | "param": {},
1006 | "name": "prelu4_1_gamma",
1007 | "inputs": [],
1008 | "backward_source_id": -1
1009 | },
1010 | {
1011 | "op": "LeakyReLU",
1012 | "param": {
1013 | "act_type": "prelu",
1014 | "lower_bound": "0.125",
1015 | "slope": "0.25",
1016 | "upper_bound": "0.334"
1017 | },
1018 | "name": "prelu4_1",
1019 | "inputs": [[95, 0], [96, 0]],
1020 | "backward_source_id": -1
1021 | },
1022 | {
1023 | "op": "null",
1024 | "param": {},
1025 | "name": "fc5_1_weight",
1026 | "inputs": [],
1027 | "backward_source_id": -1
1028 | },
1029 | {
1030 | "op": "null",
1031 | "param": {},
1032 | "name": "fc5_1_bias",
1033 | "inputs": [],
1034 | "backward_source_id": -1
1035 | },
1036 | {
1037 | "op": "FullyConnected",
1038 | "param": {
1039 | "no_bias": "False",
1040 | "num_hidden": "2"
1041 | },
1042 | "name": "fc5_1",
1043 | "inputs": [[97, 0], [98, 0], [99, 0]],
1044 | "backward_source_id": -1
1045 | },
1046 | {
1047 | "op": "null",
1048 | "param": {},
1049 | "name": "fc4_2_weight",
1050 | "inputs": [],
1051 | "backward_source_id": -1
1052 | },
1053 | {
1054 | "op": "null",
1055 | "param": {},
1056 | "name": "fc4_2_bias",
1057 | "inputs": [],
1058 | "backward_source_id": -1
1059 | },
1060 | {
1061 | "op": "FullyConnected",
1062 | "param": {
1063 | "no_bias": "False",
1064 | "num_hidden": "64"
1065 | },
1066 | "name": "fc4_2",
1067 | "inputs": [[92, 0], [101, 0], [102, 0]],
1068 | "backward_source_id": -1
1069 | },
1070 | {
1071 | "op": "null",
1072 | "param": {},
1073 | "name": "prelu4_2_gamma",
1074 | "inputs": [],
1075 | "backward_source_id": -1
1076 | },
1077 | {
1078 | "op": "LeakyReLU",
1079 | "param": {
1080 | "act_type": "prelu",
1081 | "lower_bound": "0.125",
1082 | "slope": "0.25",
1083 | "upper_bound": "0.334"
1084 | },
1085 | "name": "prelu4_2",
1086 | "inputs": [[103, 0], [104, 0]],
1087 | "backward_source_id": -1
1088 | },
1089 | {
1090 | "op": "null",
1091 | "param": {},
1092 | "name": "fc5_2_weight",
1093 | "inputs": [],
1094 | "backward_source_id": -1
1095 | },
1096 | {
1097 | "op": "null",
1098 | "param": {},
1099 | "name": "fc5_2_bias",
1100 | "inputs": [],
1101 | "backward_source_id": -1
1102 | },
1103 | {
1104 | "op": "FullyConnected",
1105 | "param": {
1106 | "no_bias": "False",
1107 | "num_hidden": "2"
1108 | },
1109 | "name": "fc5_2",
1110 | "inputs": [[105, 0], [106, 0], [107, 0]],
1111 | "backward_source_id": -1
1112 | },
1113 | {
1114 | "op": "null",
1115 | "param": {},
1116 | "name": "fc4_3_weight",
1117 | "inputs": [],
1118 | "backward_source_id": -1
1119 | },
1120 | {
1121 | "op": "null",
1122 | "param": {},
1123 | "name": "fc4_3_bias",
1124 | "inputs": [],
1125 | "backward_source_id": -1
1126 | },
1127 | {
1128 | "op": "FullyConnected",
1129 | "param": {
1130 | "no_bias": "False",
1131 | "num_hidden": "64"
1132 | },
1133 | "name": "fc4_3",
1134 | "inputs": [[92, 0], [109, 0], [110, 0]],
1135 | "backward_source_id": -1
1136 | },
1137 | {
1138 | "op": "null",
1139 | "param": {},
1140 | "name": "prelu4_3_gamma",
1141 | "inputs": [],
1142 | "backward_source_id": -1
1143 | },
1144 | {
1145 | "op": "LeakyReLU",
1146 | "param": {
1147 | "act_type": "prelu",
1148 | "lower_bound": "0.125",
1149 | "slope": "0.25",
1150 | "upper_bound": "0.334"
1151 | },
1152 | "name": "prelu4_3",
1153 | "inputs": [[111, 0], [112, 0]],
1154 | "backward_source_id": -1
1155 | },
1156 | {
1157 | "op": "null",
1158 | "param": {},
1159 | "name": "fc5_3_weight",
1160 | "inputs": [],
1161 | "backward_source_id": -1
1162 | },
1163 | {
1164 | "op": "null",
1165 | "param": {},
1166 | "name": "fc5_3_bias",
1167 | "inputs": [],
1168 | "backward_source_id": -1
1169 | },
1170 | {
1171 | "op": "FullyConnected",
1172 | "param": {
1173 | "no_bias": "False",
1174 | "num_hidden": "2"
1175 | },
1176 | "name": "fc5_3",
1177 | "inputs": [[113, 0], [114, 0], [115, 0]],
1178 | "backward_source_id": -1
1179 | },
1180 | {
1181 | "op": "null",
1182 | "param": {},
1183 | "name": "fc4_4_weight",
1184 | "inputs": [],
1185 | "backward_source_id": -1
1186 | },
1187 | {
1188 | "op": "null",
1189 | "param": {},
1190 | "name": "fc4_4_bias",
1191 | "inputs": [],
1192 | "backward_source_id": -1
1193 | },
1194 | {
1195 | "op": "FullyConnected",
1196 | "param": {
1197 | "no_bias": "False",
1198 | "num_hidden": "64"
1199 | },
1200 | "name": "fc4_4",
1201 | "inputs": [[92, 0], [117, 0], [118, 0]],
1202 | "backward_source_id": -1
1203 | },
1204 | {
1205 | "op": "null",
1206 | "param": {},
1207 | "name": "prelu4_4_gamma",
1208 | "inputs": [],
1209 | "backward_source_id": -1
1210 | },
1211 | {
1212 | "op": "LeakyReLU",
1213 | "param": {
1214 | "act_type": "prelu",
1215 | "lower_bound": "0.125",
1216 | "slope": "0.25",
1217 | "upper_bound": "0.334"
1218 | },
1219 | "name": "prelu4_4",
1220 | "inputs": [[119, 0], [120, 0]],
1221 | "backward_source_id": -1
1222 | },
1223 | {
1224 | "op": "null",
1225 | "param": {},
1226 | "name": "fc5_4_weight",
1227 | "inputs": [],
1228 | "backward_source_id": -1
1229 | },
1230 | {
1231 | "op": "null",
1232 | "param": {},
1233 | "name": "fc5_4_bias",
1234 | "inputs": [],
1235 | "backward_source_id": -1
1236 | },
1237 | {
1238 | "op": "FullyConnected",
1239 | "param": {
1240 | "no_bias": "False",
1241 | "num_hidden": "2"
1242 | },
1243 | "name": "fc5_4",
1244 | "inputs": [[121, 0], [122, 0], [123, 0]],
1245 | "backward_source_id": -1
1246 | },
1247 | {
1248 | "op": "null",
1249 | "param": {},
1250 | "name": "fc4_5_weight",
1251 | "inputs": [],
1252 | "backward_source_id": -1
1253 | },
1254 | {
1255 | "op": "null",
1256 | "param": {},
1257 | "name": "fc4_5_bias",
1258 | "inputs": [],
1259 | "backward_source_id": -1
1260 | },
1261 | {
1262 | "op": "FullyConnected",
1263 | "param": {
1264 | "no_bias": "False",
1265 | "num_hidden": "64"
1266 | },
1267 | "name": "fc4_5",
1268 | "inputs": [[92, 0], [125, 0], [126, 0]],
1269 | "backward_source_id": -1
1270 | },
1271 | {
1272 | "op": "null",
1273 | "param": {},
1274 | "name": "prelu4_5_gamma",
1275 | "inputs": [],
1276 | "backward_source_id": -1
1277 | },
1278 | {
1279 | "op": "LeakyReLU",
1280 | "param": {
1281 | "act_type": "prelu",
1282 | "lower_bound": "0.125",
1283 | "slope": "0.25",
1284 | "upper_bound": "0.334"
1285 | },
1286 | "name": "prelu4_5",
1287 | "inputs": [[127, 0], [128, 0]],
1288 | "backward_source_id": -1
1289 | },
1290 | {
1291 | "op": "null",
1292 | "param": {},
1293 | "name": "fc5_5_weight",
1294 | "inputs": [],
1295 | "backward_source_id": -1
1296 | },
1297 | {
1298 | "op": "null",
1299 | "param": {},
1300 | "name": "fc5_5_bias",
1301 | "inputs": [],
1302 | "backward_source_id": -1
1303 | },
1304 | {
1305 | "op": "FullyConnected",
1306 | "param": {
1307 | "no_bias": "False",
1308 | "num_hidden": "2"
1309 | },
1310 | "name": "fc5_5",
1311 | "inputs": [[129, 0], [130, 0], [131, 0]],
1312 | "backward_source_id": -1
1313 | }
1314 | ],
1315 | "arg_nodes": [
1316 | 0,
1317 | 2,
1318 | 3,
1319 | 5,
1320 | 8,
1321 | 9,
1322 | 11,
1323 | 14,
1324 | 15,
1325 | 17,
1326 | 19,
1327 | 20,
1328 | 22,
1329 | 25,
1330 | 26,
1331 | 28,
1332 | 31,
1333 | 32,
1334 | 34,
1335 | 36,
1336 | 37,
1337 | 39,
1338 | 42,
1339 | 43,
1340 | 45,
1341 | 48,
1342 | 49,
1343 | 51,
1344 | 53,
1345 | 54,
1346 | 56,
1347 | 59,
1348 | 60,
1349 | 62,
1350 | 65,
1351 | 66,
1352 | 68,
1353 | 70,
1354 | 71,
1355 | 73,
1356 | 76,
1357 | 77,
1358 | 79,
1359 | 82,
1360 | 83,
1361 | 85,
1362 | 88,
1363 | 89,
1364 | 91,
1365 | 93,
1366 | 94,
1367 | 96,
1368 | 98,
1369 | 99,
1370 | 101,
1371 | 102,
1372 | 104,
1373 | 106,
1374 | 107,
1375 | 109,
1376 | 110,
1377 | 112,
1378 | 114,
1379 | 115,
1380 | 117,
1381 | 118,
1382 | 120,
1383 | 122,
1384 | 123,
1385 | 125,
1386 | 126,
1387 | 128,
1388 | 130,
1389 | 131
1390 | ],
1391 | "heads": [[100, 0], [108, 0], [116, 0], [124, 0], [132, 0]]
1392 | }
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det4.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pterhoer/FaceImageQuality/611296605db57b8d50518fd5911d5111eeb52747/insightface/mtcnn-model/det4.caffemodel
--------------------------------------------------------------------------------
/insightface/mtcnn-model/det4.prototxt:
--------------------------------------------------------------------------------
1 | name: "LNet"
2 | input: "data"
3 | input_dim: 1
4 | input_dim: 15
5 | input_dim: 24
6 | input_dim: 24
7 |
8 | layer {
9 | name: "slicer_data"
10 | type: "Slice"
11 | bottom: "data"
12 | top: "data241"
13 | top: "data242"
14 | top: "data243"
15 | top: "data244"
16 | top: "data245"
17 | slice_param {
18 | axis: 1
19 | slice_point: 3
20 | slice_point: 6
21 | slice_point: 9
22 | slice_point: 12
23 | }
24 | }
25 | layer {
26 | name: "conv1_1"
27 | type: "Convolution"
28 | bottom: "data241"
29 | top: "conv1_1"
30 | param {
31 | lr_mult: 1
32 | decay_mult: 1
33 | }
34 | param {
35 | lr_mult: 2
36 | decay_mult: 1
37 | }
38 | convolution_param {
39 | num_output: 28
40 | kernel_size: 3
41 | stride: 1
42 | weight_filler {
43 | type: "xavier"
44 | }
45 | bias_filler {
46 | type: "constant"
47 | value: 0
48 | }
49 | }
50 |
51 | }
52 | layer {
53 | name: "prelu1_1"
54 | type: "PReLU"
55 | bottom: "conv1_1"
56 | top: "conv1_1"
57 |
58 | }
59 | layer {
60 | name: "pool1_1"
61 | type: "Pooling"
62 | bottom: "conv1_1"
63 | top: "pool1_1"
64 | pooling_param {
65 | pool: MAX
66 | kernel_size: 3
67 | stride: 2
68 | }
69 | }
70 |
71 | layer {
72 | name: "conv2_1"
73 | type: "Convolution"
74 | bottom: "pool1_1"
75 | top: "conv2_1"
76 | param {
77 | lr_mult: 1
78 | decay_mult: 1
79 | }
80 | param {
81 | lr_mult: 2
82 | decay_mult: 1
83 | }
84 | convolution_param {
85 | num_output: 48
86 | kernel_size: 3
87 | stride: 1
88 | weight_filler {
89 | type: "xavier"
90 | }
91 | bias_filler {
92 | type: "constant"
93 | value: 0
94 | }
95 | }
96 |
97 | }
98 | layer {
99 | name: "prelu2_1"
100 | type: "PReLU"
101 | bottom: "conv2_1"
102 | top: "conv2_1"
103 | }
104 | layer {
105 | name: "pool2_1"
106 | type: "Pooling"
107 | bottom: "conv2_1"
108 | top: "pool2_1"
109 | pooling_param {
110 | pool: MAX
111 | kernel_size: 3
112 | stride: 2
113 | }
114 |
115 | }
116 | layer {
117 | name: "conv3_1"
118 | type: "Convolution"
119 | bottom: "pool2_1"
120 | top: "conv3_1"
121 | param {
122 | lr_mult: 1
123 | decay_mult: 1
124 | }
125 | param {
126 | lr_mult: 2
127 | decay_mult: 1
128 | }
129 | convolution_param {
130 | num_output: 64
131 | kernel_size: 2
132 | stride: 1
133 | weight_filler {
134 | type: "xavier"
135 | }
136 | bias_filler {
137 | type: "constant"
138 | value: 0
139 | }
140 | }
141 |
142 | }
143 | layer {
144 | name: "prelu3_1"
145 | type: "PReLU"
146 | bottom: "conv3_1"
147 | top: "conv3_1"
148 | }
149 | ##########################
150 | layer {
151 | name: "conv1_2"
152 | type: "Convolution"
153 | bottom: "data242"
154 | top: "conv1_2"
155 | param {
156 | lr_mult: 1
157 | decay_mult: 1
158 | }
159 | param {
160 | lr_mult: 2
161 | decay_mult: 1
162 | }
163 | convolution_param {
164 | num_output: 28
165 | kernel_size: 3
166 | stride: 1
167 | weight_filler {
168 | type: "xavier"
169 | }
170 | bias_filler {
171 | type: "constant"
172 | value: 0
173 | }
174 | }
175 |
176 | }
177 | layer {
178 | name: "prelu1_2"
179 | type: "PReLU"
180 | bottom: "conv1_2"
181 | top: "conv1_2"
182 |
183 | }
184 | layer {
185 | name: "pool1_2"
186 | type: "Pooling"
187 | bottom: "conv1_2"
188 | top: "pool1_2"
189 | pooling_param {
190 | pool: MAX
191 | kernel_size: 3
192 | stride: 2
193 | }
194 | }
195 |
196 | layer {
197 | name: "conv2_2"
198 | type: "Convolution"
199 | bottom: "pool1_2"
200 | top: "conv2_2"
201 | param {
202 | lr_mult: 1
203 | decay_mult: 1
204 | }
205 | param {
206 | lr_mult: 2
207 | decay_mult: 1
208 | }
209 | convolution_param {
210 | num_output: 48
211 | kernel_size: 3
212 | stride: 1
213 | weight_filler {
214 | type: "xavier"
215 | }
216 | bias_filler {
217 | type: "constant"
218 | value: 0
219 | }
220 | }
221 |
222 | }
223 | layer {
224 | name: "prelu2_2"
225 | type: "PReLU"
226 | bottom: "conv2_2"
227 | top: "conv2_2"
228 | }
229 | layer {
230 | name: "pool2_2"
231 | type: "Pooling"
232 | bottom: "conv2_2"
233 | top: "pool2_2"
234 | pooling_param {
235 | pool: MAX
236 | kernel_size: 3
237 | stride: 2
238 | }
239 |
240 | }
241 | layer {
242 | name: "conv3_2"
243 | type: "Convolution"
244 | bottom: "pool2_2"
245 | top: "conv3_2"
246 | param {
247 | lr_mult: 1
248 | decay_mult: 1
249 | }
250 | param {
251 | lr_mult: 2
252 | decay_mult: 1
253 | }
254 | convolution_param {
255 | num_output: 64
256 | kernel_size: 2
257 | stride: 1
258 | weight_filler {
259 | type: "xavier"
260 | }
261 | bias_filler {
262 | type: "constant"
263 | value: 0
264 | }
265 | }
266 |
267 | }
268 | layer {
269 | name: "prelu3_2"
270 | type: "PReLU"
271 | bottom: "conv3_2"
272 | top: "conv3_2"
273 | }
274 | ##########################
275 | ##########################
276 | layer {
277 | name: "conv1_3"
278 | type: "Convolution"
279 | bottom: "data243"
280 | top: "conv1_3"
281 | param {
282 | lr_mult: 1
283 | decay_mult: 1
284 | }
285 | param {
286 | lr_mult: 2
287 | decay_mult: 1
288 | }
289 | convolution_param {
290 | num_output: 28
291 | kernel_size: 3
292 | stride: 1
293 | weight_filler {
294 | type: "xavier"
295 | }
296 | bias_filler {
297 | type: "constant"
298 | value: 0
299 | }
300 | }
301 |
302 | }
303 | layer {
304 | name: "prelu1_3"
305 | type: "PReLU"
306 | bottom: "conv1_3"
307 | top: "conv1_3"
308 |
309 | }
310 | layer {
311 | name: "pool1_3"
312 | type: "Pooling"
313 | bottom: "conv1_3"
314 | top: "pool1_3"
315 | pooling_param {
316 | pool: MAX
317 | kernel_size: 3
318 | stride: 2
319 | }
320 | }
321 |
322 | layer {
323 | name: "conv2_3"
324 | type: "Convolution"
325 | bottom: "pool1_3"
326 | top: "conv2_3"
327 | param {
328 | lr_mult: 1
329 | decay_mult: 1
330 | }
331 | param {
332 | lr_mult: 2
333 | decay_mult: 1
334 | }
335 | convolution_param {
336 | num_output: 48
337 | kernel_size: 3
338 | stride: 1
339 | weight_filler {
340 | type: "xavier"
341 | }
342 | bias_filler {
343 | type: "constant"
344 | value: 0
345 | }
346 | }
347 |
348 | }
349 | layer {
350 | name: "prelu2_3"
351 | type: "PReLU"
352 | bottom: "conv2_3"
353 | top: "conv2_3"
354 | }
355 | layer {
356 | name: "pool2_3"
357 | type: "Pooling"
358 | bottom: "conv2_3"
359 | top: "pool2_3"
360 | pooling_param {
361 | pool: MAX
362 | kernel_size: 3
363 | stride: 2
364 | }
365 |
366 | }
367 | layer {
368 | name: "conv3_3"
369 | type: "Convolution"
370 | bottom: "pool2_3"
371 | top: "conv3_3"
372 | param {
373 | lr_mult: 1
374 | decay_mult: 1
375 | }
376 | param {
377 | lr_mult: 2
378 | decay_mult: 1
379 | }
380 | convolution_param {
381 | num_output: 64
382 | kernel_size: 2
383 | stride: 1
384 | weight_filler {
385 | type: "xavier"
386 | }
387 | bias_filler {
388 | type: "constant"
389 | value: 0
390 | }
391 | }
392 |
393 | }
394 | layer {
395 | name: "prelu3_3"
396 | type: "PReLU"
397 | bottom: "conv3_3"
398 | top: "conv3_3"
399 | }
400 | ##########################
401 | ##########################
402 | layer {
403 | name: "conv1_4"
404 | type: "Convolution"
405 | bottom: "data244"
406 | top: "conv1_4"
407 | param {
408 | lr_mult: 1
409 | decay_mult: 1
410 | }
411 | param {
412 | lr_mult: 2
413 | decay_mult: 1
414 | }
415 | convolution_param {
416 | num_output: 28
417 | kernel_size: 3
418 | stride: 1
419 | weight_filler {
420 | type: "xavier"
421 | }
422 | bias_filler {
423 | type: "constant"
424 | value: 0
425 | }
426 | }
427 |
428 | }
429 | layer {
430 | name: "prelu1_4"
431 | type: "PReLU"
432 | bottom: "conv1_4"
433 | top: "conv1_4"
434 |
435 | }
436 | layer {
437 | name: "pool1_4"
438 | type: "Pooling"
439 | bottom: "conv1_4"
440 | top: "pool1_4"
441 | pooling_param {
442 | pool: MAX
443 | kernel_size: 3
444 | stride: 2
445 | }
446 | }
447 |
448 | layer {
449 | name: "conv2_4"
450 | type: "Convolution"
451 | bottom: "pool1_4"
452 | top: "conv2_4"
453 | param {
454 | lr_mult: 1
455 | decay_mult: 1
456 | }
457 | param {
458 | lr_mult: 2
459 | decay_mult: 1
460 | }
461 | convolution_param {
462 | num_output: 48
463 | kernel_size: 3
464 | stride: 1
465 | weight_filler {
466 | type: "xavier"
467 | }
468 | bias_filler {
469 | type: "constant"
470 | value: 0
471 | }
472 | }
473 |
474 | }
475 | layer {
476 | name: "prelu2_4"
477 | type: "PReLU"
478 | bottom: "conv2_4"
479 | top: "conv2_4"
480 | }
481 | layer {
482 | name: "pool2_4"
483 | type: "Pooling"
484 | bottom: "conv2_4"
485 | top: "pool2_4"
486 | pooling_param {
487 | pool: MAX
488 | kernel_size: 3
489 | stride: 2
490 | }
491 |
492 | }
493 | layer {
494 | name: "conv3_4"
495 | type: "Convolution"
496 | bottom: "pool2_4"
497 | top: "conv3_4"
498 | param {
499 | lr_mult: 1
500 | decay_mult: 1
501 | }
502 | param {
503 | lr_mult: 2
504 | decay_mult: 1
505 | }
506 | convolution_param {
507 | num_output: 64
508 | kernel_size: 2
509 | stride: 1
510 | weight_filler {
511 | type: "xavier"
512 | }
513 | bias_filler {
514 | type: "constant"
515 | value: 0
516 | }
517 | }
518 |
519 | }
520 | layer {
521 | name: "prelu3_4"
522 | type: "PReLU"
523 | bottom: "conv3_4"
524 | top: "conv3_4"
525 | }
526 | ##########################
527 | ##########################
528 | layer {
529 | name: "conv1_5"
530 | type: "Convolution"
531 | bottom: "data245"
532 | top: "conv1_5"
533 | param {
534 | lr_mult: 1
535 | decay_mult: 1
536 | }
537 | param {
538 | lr_mult: 2
539 | decay_mult: 1
540 | }
541 | convolution_param {
542 | num_output: 28
543 | kernel_size: 3
544 | stride: 1
545 | weight_filler {
546 | type: "xavier"
547 | }
548 | bias_filler {
549 | type: "constant"
550 | value: 0
551 | }
552 | }
553 |
554 | }
555 | layer {
556 | name: "prelu1_5"
557 | type: "PReLU"
558 | bottom: "conv1_5"
559 | top: "conv1_5"
560 |
561 | }
562 | layer {
563 | name: "pool1_5"
564 | type: "Pooling"
565 | bottom: "conv1_5"
566 | top: "pool1_5"
567 | pooling_param {
568 | pool: MAX
569 | kernel_size: 3
570 | stride: 2
571 | }
572 | }
573 |
574 | layer {
575 | name: "conv2_5"
576 | type: "Convolution"
577 | bottom: "pool1_5"
578 | top: "conv2_5"
579 | param {
580 | lr_mult: 1
581 | decay_mult: 1
582 | }
583 | param {
584 | lr_mult: 2
585 | decay_mult: 1
586 | }
587 | convolution_param {
588 | num_output: 48
589 | kernel_size: 3
590 | stride: 1
591 | weight_filler {
592 | type: "xavier"
593 | }
594 | bias_filler {
595 | type: "constant"
596 | value: 0
597 | }
598 | }
599 |
600 | }
601 | layer {
602 | name: "prelu2_5"
603 | type: "PReLU"
604 | bottom: "conv2_5"
605 | top: "conv2_5"
606 | }
607 | layer {
608 | name: "pool2_5"
609 | type: "Pooling"
610 | bottom: "conv2_5"
611 | top: "pool2_5"
612 | pooling_param {
613 | pool: MAX
614 | kernel_size: 3
615 | stride: 2
616 | }
617 |
618 | }
619 | layer {
620 | name: "conv3_5"
621 | type: "Convolution"
622 | bottom: "pool2_5"
623 | top: "conv3_5"
624 | param {
625 | lr_mult: 1
626 | decay_mult: 1
627 | }
628 | param {
629 | lr_mult: 2
630 | decay_mult: 1
631 | }
632 | convolution_param {
633 | num_output: 64
634 | kernel_size: 2
635 | stride: 1
636 | weight_filler {
637 | type: "xavier"
638 | }
639 | bias_filler {
640 | type: "constant"
641 | value: 0
642 | }
643 | }
644 |
645 | }
646 | layer {
647 | name: "prelu3_5"
648 | type: "PReLU"
649 | bottom: "conv3_5"
650 | top: "conv3_5"
651 | }
652 | ##########################
653 | layer {
654 | name: "concat"
655 | bottom: "conv3_1"
656 | bottom: "conv3_2"
657 | bottom: "conv3_3"
658 | bottom: "conv3_4"
659 | bottom: "conv3_5"
660 | top: "conv3"
661 | type: "Concat"
662 | concat_param {
663 | axis: 1
664 | }
665 | }
666 | ##########################
667 | layer {
668 | name: "fc4"
669 | type: "InnerProduct"
670 | bottom: "conv3"
671 | top: "fc4"
672 | param {
673 | lr_mult: 1
674 | decay_mult: 1
675 | }
676 | param {
677 | lr_mult: 2
678 | decay_mult: 1
679 | }
680 | inner_product_param {
681 | num_output: 256
682 | weight_filler {
683 | type: "xavier"
684 | }
685 | bias_filler {
686 | type: "constant"
687 | value: 0
688 | }
689 | }
690 |
691 | }
692 | layer {
693 | name: "prelu4"
694 | type: "PReLU"
695 | bottom: "fc4"
696 | top: "fc4"
697 | }
698 | ############################
699 | layer {
700 | name: "fc4_1"
701 | type: "InnerProduct"
702 | bottom: "fc4"
703 | top: "fc4_1"
704 | param {
705 | lr_mult: 1
706 | decay_mult: 1
707 | }
708 | param {
709 | lr_mult: 2
710 | decay_mult: 1
711 | }
712 | inner_product_param {
713 | num_output: 64
714 | weight_filler {
715 | type: "xavier"
716 | }
717 | bias_filler {
718 | type: "constant"
719 | value: 0
720 | }
721 | }
722 |
723 | }
724 | layer {
725 | name: "prelu4_1"
726 | type: "PReLU"
727 | bottom: "fc4_1"
728 | top: "fc4_1"
729 | }
730 | layer {
731 | name: "fc5_1"
732 | type: "InnerProduct"
733 | bottom: "fc4_1"
734 | top: "fc5_1"
735 | param {
736 | lr_mult: 1
737 | decay_mult: 1
738 | }
739 | param {
740 | lr_mult: 2
741 | decay_mult: 1
742 | }
743 | inner_product_param {
744 | num_output: 2
745 | weight_filler {
746 | type: "xavier"
747 | #type: "constant"
748 | #value: 0
749 | }
750 | bias_filler {
751 | type: "constant"
752 | value: 0
753 | }
754 | }
755 | }
756 |
757 |
758 | #########################
759 | layer {
760 | name: "fc4_2"
761 | type: "InnerProduct"
762 | bottom: "fc4"
763 | top: "fc4_2"
764 | param {
765 | lr_mult: 1
766 | decay_mult: 1
767 | }
768 | param {
769 | lr_mult: 2
770 | decay_mult: 1
771 | }
772 | inner_product_param {
773 | num_output: 64
774 | weight_filler {
775 | type: "xavier"
776 | }
777 | bias_filler {
778 | type: "constant"
779 | value: 0
780 | }
781 | }
782 |
783 | }
784 | layer {
785 | name: "prelu4_2"
786 | type: "PReLU"
787 | bottom: "fc4_2"
788 | top: "fc4_2"
789 | }
790 | layer {
791 | name: "fc5_2"
792 | type: "InnerProduct"
793 | bottom: "fc4_2"
794 | top: "fc5_2"
795 | param {
796 | lr_mult: 1
797 | decay_mult: 1
798 | }
799 | param {
800 | lr_mult: 2
801 | decay_mult: 1
802 | }
803 | inner_product_param {
804 | num_output: 2
805 | weight_filler {
806 | type: "xavier"
807 | #type: "constant"
808 | #value: 0
809 | }
810 | bias_filler {
811 | type: "constant"
812 | value: 0
813 | }
814 | }
815 | }
816 |
817 | #########################
818 | layer {
819 | name: "fc4_3"
820 | type: "InnerProduct"
821 | bottom: "fc4"
822 | top: "fc4_3"
823 | param {
824 | lr_mult: 1
825 | decay_mult: 1
826 | }
827 | param {
828 | lr_mult: 2
829 | decay_mult: 1
830 | }
831 | inner_product_param {
832 | num_output: 64
833 | weight_filler {
834 | type: "xavier"
835 | }
836 | bias_filler {
837 | type: "constant"
838 | value: 0
839 | }
840 | }
841 |
842 | }
843 | layer {
844 | name: "prelu4_3"
845 | type: "PReLU"
846 | bottom: "fc4_3"
847 | top: "fc4_3"
848 | }
849 | layer {
850 | name: "fc5_3"
851 | type: "InnerProduct"
852 | bottom: "fc4_3"
853 | top: "fc5_3"
854 | param {
855 | lr_mult: 1
856 | decay_mult: 1
857 | }
858 | param {
859 | lr_mult: 2
860 | decay_mult: 1
861 | }
862 | inner_product_param {
863 | num_output: 2
864 | weight_filler {
865 | type: "xavier"
866 | #type: "constant"
867 | #value: 0
868 | }
869 | bias_filler {
870 | type: "constant"
871 | value: 0
872 | }
873 | }
874 | }
875 |
876 | #########################
877 | layer {
878 | name: "fc4_4"
879 | type: "InnerProduct"
880 | bottom: "fc4"
881 | top: "fc4_4"
882 | param {
883 | lr_mult: 1
884 | decay_mult: 1
885 | }
886 | param {
887 | lr_mult: 2
888 | decay_mult: 1
889 | }
890 | inner_product_param {
891 | num_output: 64
892 | weight_filler {
893 | type: "xavier"
894 | }
895 | bias_filler {
896 | type: "constant"
897 | value: 0
898 | }
899 | }
900 |
901 | }
902 | layer {
903 | name: "prelu4_4"
904 | type: "PReLU"
905 | bottom: "fc4_4"
906 | top: "fc4_4"
907 | }
908 | layer {
909 | name: "fc5_4"
910 | type: "InnerProduct"
911 | bottom: "fc4_4"
912 | top: "fc5_4"
913 | param {
914 | lr_mult: 1
915 | decay_mult: 1
916 | }
917 | param {
918 | lr_mult: 2
919 | decay_mult: 1
920 | }
921 | inner_product_param {
922 | num_output: 2
923 | weight_filler {
924 | type: "xavier"
925 | #type: "constant"
926 | #value: 0
927 | }
928 | bias_filler {
929 | type: "constant"
930 | value: 0
931 | }
932 | }
933 | }
934 |
935 | #########################
936 | layer {
937 | name: "fc4_5"
938 | type: "InnerProduct"
939 | bottom: "fc4"
940 | top: "fc4_5"
941 | param {
942 | lr_mult: 1
943 | decay_mult: 1
944 | }
945 | param {
946 | lr_mult: 2
947 | decay_mult: 1
948 | }
949 | inner_product_param {
950 | num_output: 64
951 | weight_filler {
952 | type: "xavier"
953 | }
954 | bias_filler {
955 | type: "constant"
956 | value: 0
957 | }
958 | }
959 |
960 | }
961 | layer {
962 | name: "prelu4_5"
963 | type: "PReLU"
964 | bottom: "fc4_5"
965 | top: "fc4_5"
966 | }
967 | layer {
968 | name: "fc5_5"
969 | type: "InnerProduct"
970 | bottom: "fc4_5"
971 | top: "fc5_5"
972 | param {
973 | lr_mult: 1
974 | decay_mult: 1
975 | }
976 | param {
977 | lr_mult: 2
978 | decay_mult: 1
979 | }
980 | inner_product_param {
981 | num_output: 2
982 | weight_filler {
983 | type: "xavier"
984 | #type: "constant"
985 | #value: 0
986 | }
987 | bias_filler {
988 | type: "constant"
989 | value: 0
990 | }
991 | }
992 | }
993 |
994 | #########################
995 |
996 |
--------------------------------------------------------------------------------
/insightface/src/face_preprocess.py:
--------------------------------------------------------------------------------
1 |
2 | import cv2
3 | import numpy as np
4 | from skimage import transform as trans
5 |
6 | def parse_lst_line(line):
7 | vec = line.strip().split("\t")
8 | assert len(vec)>=3
9 | aligned = int(vec[0])
10 | image_path = vec[1]
11 | label = int(vec[2])
12 | bbox = None
13 | landmark = None
14 | #print(vec)
15 | if len(vec)>3:
16 | bbox = np.zeros( (4,), dtype=np.int32)
17 | for i in xrange(3,7):
18 | bbox[i-3] = int(vec[i])
19 | landmark = None
20 | if len(vec)>7:
21 | _l = []
22 | for i in xrange(7,17):
23 | _l.append(float(vec[i]))
24 | landmark = np.array(_l).reshape( (2,5) ).T
25 | #print(aligned)
26 | return image_path, label, bbox, landmark, aligned
27 |
28 |
29 |
30 |
31 | def read_image(img_path, **kwargs):
32 | mode = kwargs.get('mode', 'rgb')
33 | layout = kwargs.get('layout', 'HWC')
34 | if mode=='gray':
35 | img = cv2.imread(img_path, cv2.CV_LOAD_IMAGE_GRAYSCALE)
36 | else:
37 | img = cv2.imread(img_path, cv2.CV_LOAD_IMAGE_COLOR)
38 | if mode=='rgb':
39 | #print('to rgb')
40 | img = img[...,::-1]
41 | if layout=='CHW':
42 | img = np.transpose(img, (2,0,1))
43 | return img
44 |
45 |
46 | def preprocess(img, bbox=None, landmark=None, **kwargs):
47 | if isinstance(img, str):
48 | img = read_image(img, **kwargs)
49 | M = None
50 | image_size = []
51 | str_image_size = kwargs.get('image_size', '')
52 | if len(str_image_size)>0:
53 | image_size = [int(x) for x in str_image_size.split(',')]
54 | if len(image_size)==1:
55 | image_size = [image_size[0], image_size[0]]
56 | assert len(image_size)==2
57 | assert image_size[0]==112
58 | assert image_size[0]==112 or image_size[1]==96
59 | if landmark is not None:
60 | assert len(image_size)==2
61 | src = np.array([
62 | [30.2946, 51.6963],
63 | [65.5318, 51.5014],
64 | [48.0252, 71.7366],
65 | [33.5493, 92.3655],
66 | [62.7299, 92.2041] ], dtype=np.float32 )
67 | if image_size[1]==112:
68 | src[:,0] += 8.0
69 | dst = landmark.astype(np.float32)
70 |
71 | tform = trans.SimilarityTransform()
72 | tform.estimate(dst, src)
73 | M = tform.params[0:2,:]
74 | #M = cv2.estimateRigidTransform( dst.reshape(1,5,2), src.reshape(1,5,2), False)
75 |
76 | if M is None:
77 | if bbox is None: #use center crop
78 | det = np.zeros(4, dtype=np.int32)
79 | det[0] = int(img.shape[1]*0.0625)
80 | det[1] = int(img.shape[0]*0.0625)
81 | det[2] = img.shape[1] - det[0]
82 | det[3] = img.shape[0] - det[1]
83 | else:
84 | det = bbox
85 | margin = kwargs.get('margin', 44)
86 | bb = np.zeros(4, dtype=np.int32)
87 | bb[0] = np.maximum(det[0]-margin/2, 0)
88 | bb[1] = np.maximum(det[1]-margin/2, 0)
89 | bb[2] = np.minimum(det[2]+margin/2, img.shape[1])
90 | bb[3] = np.minimum(det[3]+margin/2, img.shape[0])
91 | ret = img[bb[1]:bb[3],bb[0]:bb[2],:]
92 | if len(image_size)>0:
93 | ret = cv2.resize(ret, (image_size[1], image_size[0]))
94 | return ret
95 | else: #do align using landmark
96 | assert len(image_size)==2
97 |
98 | #src = src[0:3,:]
99 | #dst = dst[0:3,:]
100 |
101 |
102 | #print(src.shape, dst.shape)
103 | #print(src)
104 | #print(dst)
105 | #print(M)
106 | warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0)
107 |
108 | #tform3 = trans.ProjectiveTransform()
109 | #tform3.estimate(src, dst)
110 | #warped = trans.warp(img, tform3, output_shape=_shape)
111 | return warped
112 |
113 |
114 |
--------------------------------------------------------------------------------
/insightface/src/helper.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | # YuanYang
3 | import math
4 | import cv2
5 | import numpy as np
6 |
7 |
8 | def nms(boxes, overlap_threshold, mode='Union'):
9 | """
10 | non max suppression
11 |
12 | Parameters:
13 | ----------
14 | box: numpy array n x 5
15 | input bbox array
16 | overlap_threshold: float number
17 | threshold of overlap
18 | mode: float number
19 | how to compute overlap ratio, 'Union' or 'Min'
20 | Returns:
21 | -------
22 | index array of the selected bbox
23 | """
24 | # if there are no boxes, return an empty list
25 | if len(boxes) == 0:
26 | return []
27 |
28 | # if the bounding boxes integers, convert them to floats
29 | if boxes.dtype.kind == "i":
30 | boxes = boxes.astype("float")
31 |
32 | # initialize the list of picked indexes
33 | pick = []
34 |
35 | # grab the coordinates of the bounding boxes
36 | x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
37 |
38 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
39 | idxs = np.argsort(score)
40 |
41 | # keep looping while some indexes still remain in the indexes list
42 | while len(idxs) > 0:
43 | # grab the last index in the indexes list and add the index value to the list of picked indexes
44 | last = len(idxs) - 1
45 | i = idxs[last]
46 | pick.append(i)
47 |
48 | xx1 = np.maximum(x1[i], x1[idxs[:last]])
49 | yy1 = np.maximum(y1[i], y1[idxs[:last]])
50 | xx2 = np.minimum(x2[i], x2[idxs[:last]])
51 | yy2 = np.minimum(y2[i], y2[idxs[:last]])
52 |
53 | # compute the width and height of the bounding box
54 | w = np.maximum(0, xx2 - xx1 + 1)
55 | h = np.maximum(0, yy2 - yy1 + 1)
56 |
57 | inter = w * h
58 | if mode == 'Min':
59 | overlap = inter / np.minimum(area[i], area[idxs[:last]])
60 | else:
61 | overlap = inter / (area[i] + area[idxs[:last]] - inter)
62 |
63 | # delete all indexes from the index list that have
64 | idxs = np.delete(idxs, np.concatenate(([last],
65 | np.where(overlap > overlap_threshold)[0])))
66 |
67 | return pick
68 |
69 | def adjust_input(in_data):
70 | """
71 | adjust the input from (h, w, c) to ( 1, c, h, w) for network input
72 |
73 | Parameters:
74 | ----------
75 | in_data: numpy array of shape (h, w, c)
76 | input data
77 | Returns:
78 | -------
79 | out_data: numpy array of shape (1, c, h, w)
80 | reshaped array
81 | """
82 | if in_data.dtype is not np.dtype('float32'):
83 | out_data = in_data.astype(np.float32)
84 | else:
85 | out_data = in_data
86 |
87 | out_data = out_data.transpose((2,0,1))
88 | out_data = np.expand_dims(out_data, 0)
89 | out_data = (out_data - 127.5)*0.0078125
90 | return out_data
91 |
92 | def generate_bbox(map, reg, scale, threshold):
93 | """
94 | generate bbox from feature map
95 | Parameters:
96 | ----------
97 | map: numpy array , n x m x 1
98 | detect score for each position
99 | reg: numpy array , n x m x 4
100 | bbox
101 | scale: float number
102 | scale of this detection
103 | threshold: float number
104 | detect threshold
105 | Returns:
106 | -------
107 | bbox array
108 | """
109 | stride = 2
110 | cellsize = 12
111 |
112 | t_index = np.where(map>threshold)
113 |
114 | # find nothing
115 | if t_index[0].size == 0:
116 | return np.array([])
117 |
118 | dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)]
119 |
120 | reg = np.array([dx1, dy1, dx2, dy2])
121 | score = map[t_index[0], t_index[1]]
122 | boundingbox = np.vstack([np.round((stride*t_index[1]+1)/scale),
123 | np.round((stride*t_index[0]+1)/scale),
124 | np.round((stride*t_index[1]+1+cellsize)/scale),
125 | np.round((stride*t_index[0]+1+cellsize)/scale),
126 | score,
127 | reg])
128 |
129 | return boundingbox.T
130 |
131 |
132 | def detect_first_stage(img, net, scale, threshold):
133 | """
134 | run PNet for first stage
135 |
136 | Parameters:
137 | ----------
138 | img: numpy array, bgr order
139 | input image
140 | scale: float number
141 | how much should the input image scale
142 | net: PNet
143 | worker
144 | Returns:
145 | -------
146 | total_boxes : bboxes
147 | """
148 | height, width, _ = img.shape
149 | hs = int(math.ceil(height * scale))
150 | ws = int(math.ceil(width * scale))
151 |
152 | im_data = cv2.resize(img, (ws,hs))
153 |
154 | # adjust for the network input
155 | input_buf = adjust_input(im_data)
156 | output = net.predict(input_buf)
157 | boxes = generate_bbox(output[1][0,1,:,:], output[0], scale, threshold)
158 |
159 | if boxes.size == 0:
160 | return None
161 |
162 | # nms
163 | pick = nms(boxes[:,0:5], 0.5, mode='Union')
164 | boxes = boxes[pick]
165 | return boxes
166 |
167 | def detect_first_stage_warpper( args ):
168 | return detect_first_stage(*args)
169 |
--------------------------------------------------------------------------------
/insightface/src/mtcnn_detector.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 | import mxnet as mx
4 | import numpy as np
5 | import math
6 | import cv2
7 | from multiprocessing import Pool
8 | from itertools import repeat
9 | try:
10 | from itertools import izip
11 | except ImportError:
12 | izip = zip
13 |
14 | from .helper import nms, adjust_input, generate_bbox, detect_first_stage_warpper
15 |
16 | class MtcnnDetector(object):
17 | """
18 | Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks
19 | see https://github.com/kpzhang93/MTCNN_face_detection_alignment
20 | this is a mxnet version
21 | """
22 | def __init__(self,
23 | model_folder='.',
24 | minsize = 20,
25 | threshold = [0.6, 0.7, 0.8],
26 | factor = 0.709,
27 | num_worker = 1,
28 | accurate_landmark = False,
29 | ctx=mx.cpu()):
30 | """
31 | Initialize the detector
32 |
33 | Parameters:
34 | ----------
35 | model_folder : string
36 | path for the models
37 | minsize : float number
38 | minimal face to detect
39 | threshold : float number
40 | detect threshold for 3 stages
41 | factor: float number
42 | scale factor for image pyramid
43 | num_worker: int number
44 | number of processes we use for first stage
45 | accurate_landmark: bool
46 | use accurate landmark localization or not
47 |
48 | """
49 | self.num_worker = num_worker
50 | self.accurate_landmark = accurate_landmark
51 |
52 | # load 4 models from folder
53 | models = ['det1', 'det2', 'det3','det4']
54 | models = [ os.path.join(model_folder, f) for f in models]
55 |
56 | self.PNets = []
57 | for i in range(num_worker):
58 | workner_net = mx.model.FeedForward.load(models[0], 1, ctx=ctx)
59 | self.PNets.append(workner_net)
60 |
61 | #self.Pool = Pool(num_worker)
62 |
63 | self.RNet = mx.model.FeedForward.load(models[1], 1, ctx=ctx)
64 | self.ONet = mx.model.FeedForward.load(models[2], 1, ctx=ctx)
65 | self.LNet = mx.model.FeedForward.load(models[3], 1, ctx=ctx)
66 |
67 | self.minsize = float(minsize)
68 | self.factor = float(factor)
69 | self.threshold = threshold
70 |
71 |
72 | def convert_to_square(self, bbox):
73 | """
74 | convert bbox to square
75 |
76 | Parameters:
77 | ----------
78 | bbox: numpy array , shape n x 5
79 | input bbox
80 |
81 | Returns:
82 | -------
83 | square bbox
84 | """
85 | square_bbox = bbox.copy()
86 |
87 | h = bbox[:, 3] - bbox[:, 1] + 1
88 | w = bbox[:, 2] - bbox[:, 0] + 1
89 | max_side = np.maximum(h,w)
90 | square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5
91 | square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5
92 | square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1
93 | square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1
94 | return square_bbox
95 |
96 | def calibrate_box(self, bbox, reg):
97 | """
98 | calibrate bboxes
99 |
100 | Parameters:
101 | ----------
102 | bbox: numpy array, shape n x 5
103 | input bboxes
104 | reg: numpy array, shape n x 4
105 | bboxex adjustment
106 |
107 | Returns:
108 | -------
109 | bboxes after refinement
110 |
111 | """
112 | w = bbox[:, 2] - bbox[:, 0] + 1
113 | w = np.expand_dims(w, 1)
114 | h = bbox[:, 3] - bbox[:, 1] + 1
115 | h = np.expand_dims(h, 1)
116 | reg_m = np.hstack([w, h, w, h])
117 | aug = reg_m * reg
118 | bbox[:, 0:4] = bbox[:, 0:4] + aug
119 | return bbox
120 |
121 |
122 | def pad(self, bboxes, w, h):
123 | """
124 | pad the the bboxes, alse restrict the size of it
125 |
126 | Parameters:
127 | ----------
128 | bboxes: numpy array, n x 5
129 | input bboxes
130 | w: float number
131 | width of the input image
132 | h: float number
133 | height of the input image
134 | Returns :
135 | ------s
136 | dy, dx : numpy array, n x 1
137 | start point of the bbox in target image
138 | edy, edx : numpy array, n x 1
139 | end point of the bbox in target image
140 | y, x : numpy array, n x 1
141 | start point of the bbox in original image
142 | ex, ex : numpy array, n x 1
143 | end point of the bbox in original image
144 | tmph, tmpw: numpy array, n x 1
145 | height and width of the bbox
146 |
147 | """
148 | tmpw, tmph = bboxes[:, 2] - bboxes[:, 0] + 1, bboxes[:, 3] - bboxes[:, 1] + 1
149 | num_box = bboxes.shape[0]
150 |
151 | dx , dy= np.zeros((num_box, )), np.zeros((num_box, ))
152 | edx, edy = tmpw.copy()-1, tmph.copy()-1
153 |
154 | x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
155 |
156 | tmp_index = np.where(ex > w-1)
157 | edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index]
158 | ex[tmp_index] = w - 1
159 |
160 | tmp_index = np.where(ey > h-1)
161 | edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index]
162 | ey[tmp_index] = h - 1
163 |
164 | tmp_index = np.where(x < 0)
165 | dx[tmp_index] = 0 - x[tmp_index]
166 | x[tmp_index] = 0
167 |
168 | tmp_index = np.where(y < 0)
169 | dy[tmp_index] = 0 - y[tmp_index]
170 | y[tmp_index] = 0
171 |
172 | return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
173 | return_list = [item.astype(np.int32) for item in return_list]
174 |
175 | return return_list
176 |
177 | def slice_index(self, number):
178 | """
179 | slice the index into (n,n,m), m < n
180 | Parameters:
181 | ----------
182 | number: int number
183 | number
184 | """
185 | def chunks(l, n):
186 | """Yield successive n-sized chunks from l."""
187 | for i in range(0, len(l), n):
188 | yield l[i:i + n]
189 | num_list = range(number)
190 | return list(chunks(num_list, self.num_worker))
191 |
192 | def detect_face_limited(self, img, det_type=2):
193 | height, width, _ = img.shape
194 | if det_type>=2:
195 | total_boxes = np.array( [ [0.0, 0.0, img.shape[1], img.shape[0], 0.9] ] ,dtype=np.float32)
196 | num_box = total_boxes.shape[0]
197 |
198 | # pad the bbox
199 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
200 | # (3, 24, 24) is the input shape for RNet
201 | input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)
202 |
203 | for i in range(num_box):
204 | tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
205 | tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
206 | input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
207 |
208 | output = self.RNet.predict(input_buf)
209 |
210 | # filter the total_boxes with threshold
211 | passed = np.where(output[1][:, 1] > self.threshold[1])
212 | total_boxes = total_boxes[passed]
213 |
214 | if total_boxes.size == 0:
215 | return None
216 |
217 | total_boxes[:, 4] = output[1][passed, 1].reshape((-1,))
218 | reg = output[0][passed]
219 |
220 | # nms
221 | pick = nms(total_boxes, 0.7, 'Union')
222 | total_boxes = total_boxes[pick]
223 | total_boxes = self.calibrate_box(total_boxes, reg[pick])
224 | total_boxes = self.convert_to_square(total_boxes)
225 | total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
226 | else:
227 | total_boxes = np.array( [ [0.0, 0.0, img.shape[1], img.shape[0], 0.9] ] ,dtype=np.float32)
228 | num_box = total_boxes.shape[0]
229 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
230 | # (3, 48, 48) is the input shape for ONet
231 | input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)
232 |
233 | for i in range(num_box):
234 | tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
235 | tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
236 | input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
237 |
238 | output = self.ONet.predict(input_buf)
239 | #print(output[2])
240 |
241 | # filter the total_boxes with threshold
242 | passed = np.where(output[2][:, 1] > self.threshold[2])
243 | total_boxes = total_boxes[passed]
244 |
245 | if total_boxes.size == 0:
246 | return None
247 |
248 | total_boxes[:, 4] = output[2][passed, 1].reshape((-1,))
249 | reg = output[1][passed]
250 | points = output[0][passed]
251 |
252 | # compute landmark points
253 | bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
254 | bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
255 | points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
256 | points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]
257 |
258 | # nms
259 | total_boxes = self.calibrate_box(total_boxes, reg)
260 | pick = nms(total_boxes, 0.7, 'Min')
261 | total_boxes = total_boxes[pick]
262 | points = points[pick]
263 |
264 | if not self.accurate_landmark:
265 | return total_boxes, points
266 |
267 | #############################################
268 | # extended stage
269 | #############################################
270 | num_box = total_boxes.shape[0]
271 | patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1)
272 | patchw = np.round(patchw*0.25)
273 |
274 | # make it even
275 | patchw[np.where(np.mod(patchw,2) == 1)] += 1
276 |
277 | input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
278 | for i in range(5):
279 | x, y = points[:, i], points[:, i+5]
280 | x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw)
281 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T,
282 | width,
283 | height)
284 | for j in range(num_box):
285 | tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
286 | tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :]
287 | input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24)))
288 |
289 | output = self.LNet.predict(input_buf)
290 |
291 | pointx = np.zeros((num_box, 5))
292 | pointy = np.zeros((num_box, 5))
293 |
294 | for k in range(5):
295 | # do not make a large movement
296 | tmp_index = np.where(np.abs(output[k]-0.5) > 0.35)
297 | output[k][tmp_index[0]] = 0.5
298 |
299 | pointx[:, k] = np.round(points[:, k] - 0.5*patchw) + output[k][:, 0]*patchw
300 | pointy[:, k] = np.round(points[:, k+5] - 0.5*patchw) + output[k][:, 1]*patchw
301 |
302 | points = np.hstack([pointx, pointy])
303 | points = points.astype(np.int32)
304 |
305 | return total_boxes, points
306 |
307 | def detect_face(self, img, det_type=0):
308 | """
309 | detect face over img
310 | Parameters:
311 | ----------
312 | img: numpy array, bgr order of shape (1, 3, n, m)
313 | input image
314 | Retures:
315 | -------
316 | bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
317 | bboxes
318 | points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
319 | landmarks
320 | """
321 |
322 | # check input
323 | height, width, _ = img.shape
324 | if det_type==0:
325 | MIN_DET_SIZE = 12
326 |
327 | if img is None:
328 | return None
329 |
330 | # only works for color image
331 | if len(img.shape) != 3:
332 | return None
333 |
334 | # detected boxes
335 | total_boxes = []
336 |
337 | minl = min( height, width)
338 |
339 | # get all the valid scales
340 | scales = []
341 | m = MIN_DET_SIZE/self.minsize
342 | minl *= m
343 | factor_count = 0
344 | while minl > MIN_DET_SIZE:
345 | scales.append(m*self.factor**factor_count)
346 | minl *= self.factor
347 | factor_count += 1
348 |
349 | #############################################
350 | # first stage
351 | #############################################
352 | #for scale in scales:
353 | # return_boxes = self.detect_first_stage(img, scale, 0)
354 | # if return_boxes is not None:
355 | # total_boxes.append(return_boxes)
356 |
357 | sliced_index = self.slice_index(len(scales))
358 | total_boxes = []
359 | for batch in sliced_index:
360 | #local_boxes = self.Pool.map( detect_first_stage_warpper, \
361 | # izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
362 | local_boxes = map( detect_first_stage_warpper, \
363 | izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
364 | total_boxes.extend(local_boxes)
365 |
366 | # remove the Nones
367 | total_boxes = [ i for i in total_boxes if i is not None]
368 |
369 | if len(total_boxes) == 0:
370 | return None
371 |
372 | total_boxes = np.vstack(total_boxes)
373 |
374 | if total_boxes.size == 0:
375 | return None
376 |
377 | # merge the detection from first stage
378 | pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
379 | total_boxes = total_boxes[pick]
380 |
381 | bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
382 | bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
383 |
384 | # refine the bboxes
385 | total_boxes = np.vstack([total_boxes[:, 0]+total_boxes[:, 5] * bbw,
386 | total_boxes[:, 1]+total_boxes[:, 6] * bbh,
387 | total_boxes[:, 2]+total_boxes[:, 7] * bbw,
388 | total_boxes[:, 3]+total_boxes[:, 8] * bbh,
389 | total_boxes[:, 4]
390 | ])
391 |
392 | total_boxes = total_boxes.T
393 | total_boxes = self.convert_to_square(total_boxes)
394 | total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
395 | else:
396 | total_boxes = np.array( [ [0.0, 0.0, img.shape[1], img.shape[0], 0.9] ] ,dtype=np.float32)
397 |
398 | #############################################
399 | # second stage
400 | #############################################
401 | num_box = total_boxes.shape[0]
402 |
403 | # pad the bbox
404 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
405 | # (3, 24, 24) is the input shape for RNet
406 | input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)
407 |
408 | for i in range(num_box):
409 | tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
410 | tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
411 | input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
412 |
413 | output = self.RNet.predict(input_buf)
414 |
415 | # filter the total_boxes with threshold
416 | passed = np.where(output[1][:, 1] > self.threshold[1])
417 | total_boxes = total_boxes[passed]
418 |
419 | if total_boxes.size == 0:
420 | return None
421 |
422 | total_boxes[:, 4] = output[1][passed, 1].reshape((-1,))
423 | reg = output[0][passed]
424 |
425 | # nms
426 | pick = nms(total_boxes, 0.7, 'Union')
427 | total_boxes = total_boxes[pick]
428 | total_boxes = self.calibrate_box(total_boxes, reg[pick])
429 | total_boxes = self.convert_to_square(total_boxes)
430 | total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
431 |
432 | #############################################
433 | # third stage
434 | #############################################
435 | num_box = total_boxes.shape[0]
436 |
437 | # pad the bbox
438 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
439 | # (3, 48, 48) is the input shape for ONet
440 | input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)
441 |
442 | for i in range(num_box):
443 | tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
444 | tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
445 | input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
446 |
447 | output = self.ONet.predict(input_buf)
448 |
449 | # filter the total_boxes with threshold
450 | passed = np.where(output[2][:, 1] > self.threshold[2])
451 | total_boxes = total_boxes[passed]
452 |
453 | if total_boxes.size == 0:
454 | return None
455 |
456 | total_boxes[:, 4] = output[2][passed, 1].reshape((-1,))
457 | reg = output[1][passed]
458 | points = output[0][passed]
459 |
460 | # compute landmark points
461 | bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
462 | bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
463 | points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
464 | points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]
465 |
466 | # nms
467 | total_boxes = self.calibrate_box(total_boxes, reg)
468 | pick = nms(total_boxes, 0.7, 'Min')
469 | total_boxes = total_boxes[pick]
470 | points = points[pick]
471 |
472 | if not self.accurate_landmark:
473 | return total_boxes, points
474 |
475 | #############################################
476 | # extended stage
477 | #############################################
478 | num_box = total_boxes.shape[0]
479 | patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1)
480 | patchw = np.round(patchw*0.25)
481 |
482 | # make it even
483 | patchw[np.where(np.mod(patchw,2) == 1)] += 1
484 |
485 | input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
486 | for i in range(5):
487 | x, y = points[:, i], points[:, i+5]
488 | x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw)
489 | [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T,
490 | width,
491 | height)
492 | for j in range(num_box):
493 | tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
494 | tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :]
495 | input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24)))
496 |
497 | output = self.LNet.predict(input_buf)
498 |
499 | pointx = np.zeros((num_box, 5))
500 | pointy = np.zeros((num_box, 5))
501 |
502 | for k in range(5):
503 | # do not make a large movement
504 | tmp_index = np.where(np.abs(output[k]-0.5) > 0.35)
505 | output[k][tmp_index[0]] = 0.5
506 |
507 | pointx[:, k] = np.round(points[:, k] - 0.5*patchw) + output[k][:, 0]*patchw
508 | pointy[:, k] = np.round(points[:, k+5] - 0.5*patchw) + output[k][:, 1]*patchw
509 |
510 | points = np.hstack([pointx, pointy])
511 | points = points.astype(np.int32)
512 |
513 | return total_boxes, points
514 |
515 |
516 |
517 | def list2colmatrix(self, pts_list):
518 | """
519 | convert list to column matrix
520 | Parameters:
521 | ----------
522 | pts_list:
523 | input list
524 | Retures:
525 | -------
526 | colMat:
527 |
528 | """
529 | assert len(pts_list) > 0
530 | colMat = []
531 | for i in range(len(pts_list)):
532 | colMat.append(pts_list[i][0])
533 | colMat.append(pts_list[i][1])
534 | colMat = np.matrix(colMat).transpose()
535 | return colMat
536 |
537 | def find_tfrom_between_shapes(self, from_shape, to_shape):
538 | """
539 | find transform between shapes
540 | Parameters:
541 | ----------
542 | from_shape:
543 | to_shape:
544 | Retures:
545 | -------
546 | tran_m:
547 | tran_b:
548 | """
549 | assert from_shape.shape[0] == to_shape.shape[0] and from_shape.shape[0] % 2 == 0
550 |
551 | sigma_from = 0.0
552 | sigma_to = 0.0
553 | cov = np.matrix([[0.0, 0.0], [0.0, 0.0]])
554 |
555 | # compute the mean and cov
556 | from_shape_points = from_shape.reshape(from_shape.shape[0]/2, 2)
557 | to_shape_points = to_shape.reshape(to_shape.shape[0]/2, 2)
558 | mean_from = from_shape_points.mean(axis=0)
559 | mean_to = to_shape_points.mean(axis=0)
560 |
561 | for i in range(from_shape_points.shape[0]):
562 | temp_dis = np.linalg.norm(from_shape_points[i] - mean_from)
563 | sigma_from += temp_dis * temp_dis
564 | temp_dis = np.linalg.norm(to_shape_points[i] - mean_to)
565 | sigma_to += temp_dis * temp_dis
566 | cov += (to_shape_points[i].transpose() - mean_to.transpose()) * (from_shape_points[i] - mean_from)
567 |
568 | sigma_from = sigma_from / to_shape_points.shape[0]
569 | sigma_to = sigma_to / to_shape_points.shape[0]
570 | cov = cov / to_shape_points.shape[0]
571 |
572 | # compute the affine matrix
573 | s = np.matrix([[1.0, 0.0], [0.0, 1.0]])
574 | u, d, vt = np.linalg.svd(cov)
575 |
576 | if np.linalg.det(cov) < 0:
577 | if d[1] < d[0]:
578 | s[1, 1] = -1
579 | else:
580 | s[0, 0] = -1
581 | r = u * s * vt
582 | c = 1.0
583 | if sigma_from != 0:
584 | c = 1.0 / sigma_from * np.trace(np.diag(d) * s)
585 |
586 | tran_b = mean_to.transpose() - c * r * mean_from.transpose()
587 | tran_m = c * r
588 |
589 | return tran_m, tran_b
590 |
591 | def extract_image_chips(self, img, points, desired_size=256, padding=0):
592 | """
593 | crop and align face
594 | Parameters:
595 | ----------
596 | img: numpy array, bgr order of shape (1, 3, n, m)
597 | input image
598 | points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
599 | desired_size: default 256
600 | padding: default 0
601 | Retures:
602 | -------
603 | crop_imgs: list, n
604 | cropped and aligned faces
605 | """
606 | crop_imgs = []
607 | for p in points:
608 | shape =[]
609 | for k in range(len(p)/2):
610 | shape.append(p[k])
611 | shape.append(p[k+5])
612 |
613 | if padding > 0:
614 | padding = padding
615 | else:
616 | padding = 0
617 | # average positions of face points
618 | mean_face_shape_x = [0.224152, 0.75610125, 0.490127, 0.254149, 0.726104]
619 | mean_face_shape_y = [0.2119465, 0.2119465, 0.628106, 0.780233, 0.780233]
620 |
621 | from_points = []
622 | to_points = []
623 |
624 | for i in range(len(shape)/2):
625 | x = (padding + mean_face_shape_x[i]) / (2 * padding + 1) * desired_size
626 | y = (padding + mean_face_shape_y[i]) / (2 * padding + 1) * desired_size
627 | to_points.append([x, y])
628 | from_points.append([shape[2*i], shape[2*i+1]])
629 |
630 | # convert the points to Mat
631 | from_mat = self.list2colmatrix(from_points)
632 | to_mat = self.list2colmatrix(to_points)
633 |
634 | # compute the similar transfrom
635 | tran_m, tran_b = self.find_tfrom_between_shapes(from_mat, to_mat)
636 |
637 | probe_vec = np.matrix([1.0, 0.0]).transpose()
638 | probe_vec = tran_m * probe_vec
639 |
640 | scale = np.linalg.norm(probe_vec)
641 | angle = 180.0 / math.pi * math.atan2(probe_vec[1, 0], probe_vec[0, 0])
642 |
643 | from_center = [(shape[0]+shape[2])/2.0, (shape[1]+shape[3])/2.0]
644 | to_center = [0, 0]
645 | to_center[1] = desired_size * 0.4
646 | to_center[0] = desired_size * 0.5
647 |
648 | ex = to_center[0] - from_center[0]
649 | ey = to_center[1] - from_center[1]
650 |
651 | rot_mat = cv2.getRotationMatrix2D((from_center[0], from_center[1]), -1*angle, scale)
652 | rot_mat[0][2] += ex
653 | rot_mat[1][2] += ey
654 |
655 | chips = cv2.warpAffine(img, rot_mat, (desired_size, desired_size))
656 | crop_imgs.append(chips)
657 |
658 | return crop_imgs
659 |
660 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | certifi==2021.5.30
2 | charset-normalizer==2.0.4
3 | cycler==0.10.0
4 | graphviz==0.8.4
5 | idna==3.2
6 | imageio==2.9.0
7 | joblib==1.0.1
8 | kiwisolver==1.3.1
9 | matplotlib==3.4.3
10 | mxnet-cu100==1.8.0.post0
11 | networkx==2.6.2
12 | numpy==1.21.1
13 | opencv-python==4.5.3.56
14 | Pillow==8.3.1
15 | pyparsing==2.4.7
16 | python-dateutil==2.8.2
17 | PyWavelets==1.1.1
18 | requests==2.26.0
19 | scikit-image==0.18.2
20 | scikit-learn==0.24.2
21 | scipy==1.7.1
22 | six==1.16.0
23 | threadpoolctl==2.2.0
24 | tifffile==2021.8.8
25 | urllib3==1.26.6
26 |
--------------------------------------------------------------------------------
/serfiq_example.py:
--------------------------------------------------------------------------------
1 | # Author: Jan Niklas Kolf, 2020
2 | from face_image_quality import SER_FIQ
3 | import cv2
4 |
5 | if __name__ == "__main__":
6 | # Sample code of calculating the score of an image
7 |
8 | # Create the SER-FIQ Model
9 | # Choose the GPU, default is 0.
10 | ser_fiq = SER_FIQ(gpu=0)
11 |
12 | # Load the test image
13 | test_img = cv2.imread("./data/test_img.jpeg")
14 |
15 | # Align the image
16 | aligned_img = ser_fiq.apply_mtcnn(test_img)
17 |
18 | # Calculate the quality score of the image
19 | # T=100 (default) is a good choice
20 | # Alpha and r parameters can be used to scale your
21 | # score distribution.
22 | score = ser_fiq.get_score(aligned_img, T=100)
23 |
24 | print("SER-FIQ quality score of image 1 is", score)
25 |
26 | # Do the same thing for the second image as well
27 | test_img2 = cv2.imread("./data/test_img2.jpeg")
28 |
29 | aligned_img2 = ser_fiq.apply_mtcnn(test_img2)
30 |
31 | score2 = ser_fiq.get_score(aligned_img2, T=100)
32 |
33 | print("SER-FIQ quality score of image 2 is", score2)
34 |
--------------------------------------------------------------------------------