├── requirements.in
├── reference.pdf
├── facenet_code
    ├── face.py
    ├── encoder.py
    ├── detection.py
    ├── align_dataset_mtcnn.py
    ├── facenet.py
    └── detect_face.py
├── requirements.txt
├── detect_blur.py
└── README.md


/requirements.in:
--------------------------------------------------------------------------------
1 | # requirements.in
2 | 
3 | imutils
4 | opencv-python
5 | tensorflow
6 | numpy
7 | scipy
8 | 


--------------------------------------------------------------------------------
/reference.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/giovanadelucca/Blurry-Faces-Detection-in-Videos/HEAD/reference.pdf


--------------------------------------------------------------------------------
/facenet_code/face.py:
--------------------------------------------------------------------------------
1 | class Face:
2 |     def __init__(self):
3 |         self.name = None
4 |         self.bounding_box = None
5 |         self.image = None
6 |         self.container_image = None
7 |         self.embedding = None
8 |         self.confidence = None
9 |         self.class_probabilities = None


--------------------------------------------------------------------------------
/facenet_code/encoder.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import os
 3 | 
 4 | from facenet_code import facenet
 5 | 
 6 | tf.logging.set_verbosity(tf.logging.ERROR)
 7 | # tf.logging.set_verbosity(tf.logging.INFO)
 8 | # tf.logging.set_verbosity(tf.logging.WARN)
 9 | # tf.logging.set_verbosity(tf.logging.DEBUG)
10 | # tf.logging.set_verbosity(tf.logging.FATAL)
11 | 
12 | 
13 | PATH_ENCODE_EMBEDDED = "facenet_code/weights/20180402-114759.pb"
14 | 
15 | 
16 | class Encoder:
17 |     def __init__(self):
18 |         self.sess = tf.Session()
19 |         with self.sess.as_default():
20 |             facenet.load_model(PATH_ENCODE_EMBEDDED)
21 | 
22 |     def generate_embedding(self, face):
23 |         # Get input and output tensors
24 |         images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
25 |         embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
26 |         phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
27 | 
28 |         prewhiten_face = facenet.prewhiten(face.image)
29 | 
30 |         # Run forward pass to calculate embeddings
31 |         feed_dict = {images_placeholder: [prewhiten_face], phase_train_placeholder: False}
32 |         return self.sess.run(embeddings, feed_dict=feed_dict)[0]


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile
 3 | # To update, run:
 4 | #
 5 | #    pip-compile
 6 | #
 7 | absl-py==0.9.0            # via tensorboard, tensorflow
 8 | astor==0.8.1              # via tensorflow
 9 | gast==0.2.2               # via tensorflow
10 | google-pasta==0.1.8       # via tensorflow
11 | grpcio==1.27.2            # via tensorboard, tensorflow
12 | h5py==2.10.0              # via keras-applications
13 | imutils==0.5.3            # via -r requirements.in
14 | keras-applications==1.0.8  # via tensorflow
15 | keras-preprocessing==1.1.0  # via tensorflow
16 | markdown==3.2.1           # via tensorboard
17 | numpy==1.18.1             # via -r requirements.in, h5py, keras-applications, keras-preprocessing, opencv-python, opt-einsum, scipy, tensorboard, tensorflow
18 | opencv-python==4.2.0.32   # via -r requirements.in
19 | opt-einsum==3.2.0         # via tensorflow
20 | protobuf==3.11.3          # via tensorboard, tensorflow
21 | scipy==1.4.1              # via -r requirements.in
22 | six==1.14.0               # via absl-py, google-pasta, grpcio, h5py, keras-preprocessing, protobuf, tensorboard, tensorflow
23 | tensorboard==1.15.2       # via tensorflow
24 | tensorflow-estimator==1.15.2 # via tensorflow
25 | tensorflow==1.15.2        # via -r requirements.in
26 | termcolor==1.1.0          # via tensorflow
27 | werkzeug==1.0.0           # via tensorboard
28 | wheel==0.34.2             # via tensorboard, tensorflow
29 | wrapt==1.12.0             # via tensorflow
30 | 
31 | # The following packages are considered to be unsafe in a requirements file:
32 | # setuptools
33 | 


--------------------------------------------------------------------------------
/facenet_code/detection.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import cv2
 4 | import os
 5 | 
 6 | from facenet_code.face import Face
 7 | from facenet_code import detect_face
 8 | 
 9 | tf.logging.set_verbosity(tf.logging.ERROR)
10 | # tf.logging.set_verbosity(tf.logging.INFO)
11 | # tf.logging.set_verbosity(tf.logging.WARN)
12 | # tf.logging.set_verbosity(tf.logging.DEBUG)
13 | # tf.logging.set_verbosity(tf.logging.FATAL)
14 | 
15 | gpu_memory_fraction = 0.3
16 | 
17 | class Detection:
18 |     # face detection parameters
19 |     minsize = 20  # minimum size of face
20 |     threshold = [0.6, 0.7, 0.7]  # three steps's threshold
21 |     factor = 0.709  # scale factor
22 | 
23 |     def __init__(self, face_crop_size=160, face_crop_margin=32):
24 |         self.pnet, self.rnet, self.onet = self._setup_mtcnn()
25 |         self.face_crop_size = face_crop_size
26 |         self.face_crop_margin = face_crop_margin
27 | 
28 |     def _setup_mtcnn(self):
29 |         with tf.Graph().as_default():
30 |             gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
31 |             sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
32 |             with sess.as_default():
33 |                 return detect_face.create_mtcnn(sess, None)
34 | 
35 |     def find_faces(self, image):
36 |         faces = []
37 | 
38 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
39 | 
40 |         bounding_boxes, _ = detect_face.detect_face(image, self.minsize,
41 |                                                           self.pnet, self.rnet, self.onet,
42 |                                                           self.threshold, self.factor)
43 |         for bb in bounding_boxes:
44 |             face = Face()
45 |             face.container_image = image
46 |             face.bounding_box = np.zeros(4, dtype=np.int32)
47 | 
48 |             img_size = np.asarray(image.shape)[0:2]
49 |             face.bounding_box[0] = np.maximum(bb[0] - self.face_crop_margin / 2, 0)
50 |             face.bounding_box[1] = np.maximum(bb[1] - self.face_crop_margin / 2, 0)
51 |             face.bounding_box[2] = np.minimum(bb[2] + self.face_crop_margin / 2, img_size[1])
52 |             face.bounding_box[3] = np.minimum(bb[3] + self.face_crop_margin / 2, img_size[0])
53 |             cropped = image[face.bounding_box[1]:face.bounding_box[3], face.bounding_box[0]:face.bounding_box[2], :]
54 |             face.image = cv2.resize(cropped, (self.face_crop_size, self.face_crop_size), interpolation=cv2.INTER_LINEAR)
55 |             # face.image = misc.imresize(cropped, (self.face_crop_size, self.face_crop_size), interp='bilinear')
56 |             face.confidence = bb[4]
57 |             faces.append(face)
58 | 
59 |         return faces


--------------------------------------------------------------------------------
/detect_blur.py:
--------------------------------------------------------------------------------
  1 | from facenet_code.detection import Detection
  2 | from facenet_code.encoder import Encoder
  3 | from scipy.linalg import svd
  4 | from imutils import paths
  5 | import numpy as np
  6 | import argparse
  7 | import cv2
  8 | import os
  9 | 
 10 | class DetectBlur(object):
 11 |     def __init__(self, video, threshold=0.8):
 12 |         self.video = video
 13 |         self.threshold = threshold
 14 |         print(self.threshold)
 15 |         self.video_frames = []
 16 | 
 17 |         self.detect = Detection()
 18 | 
 19 |         self.process()
 20 |     
 21 |     def process(self):
 22 |         self.create_output_folder()
 23 |         self.get_video_frames()
 24 |         self.detect_blur()
 25 | 
 26 |     def create_output_folder(self):
 27 |         if not os.path.isdir('output'):
 28 |             os.mkdir('output')
 29 |         video_name = self.video.split('.')[0]
 30 |         if not os.path.isdir('output/'+video_name):
 31 |             os.mkdir('output/'+video_name)
 32 |         if not os.path.isdir('output/'+video_name+'/'+'frames'):
 33 |             os.mkdir('output/'+video_name+'/'+'frames')
 34 | 
 35 |     def get_blur_degree(self, img, sv_num=10):
 36 |         gray_img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
 37 |         u, s, v = np.linalg.svd(gray_img)
 38 |         top_sv = np.sum(s[0:sv_num])
 39 |         total_sv = np.sum(s)
 40 |         return top_sv/total_sv
 41 | 
 42 |     # def get_blur_map(self, img, win_size=10, sv_num=3):
 43 |     #     gray_img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
 44 |     #     new_img = np.zeros((gray_img.shape[0]+win_size*2, gray_img.shape[1]+win_size*2))
 45 |     #     for i in range(new_img.shape[0]):
 46 |     #         for j in range(new_img.shape[1]):
 47 |     #             if i<win_size:
 48 |     #                 p = win_size-i
 49 |     #             elif i>gray_img.shape[0]+win_size-1:
 50 |     #                 p = gray_img.shape[0]*2-i
 51 |     #             else:
 52 |     #                 p = i-win_size
 53 |     #             if j<win_size:
 54 |     #                 q = win_size-j
 55 |     #             elif j>gray_img.shape[1]+win_size-1:
 56 |     #                 q = gray_img.shape[1]*2-j
 57 |     #             else:
 58 |     #                 q = j-win_size
 59 |     #             new_img[i,j] = img[p,q]
 60 |     #     blur_map = np.zeros((gray_img.shape[0], gray_img.shape[1]))
 61 |     #     max_sv = 0
 62 |     #     min_sv = 1
 63 |     #     for i in range(gray_img.shape[0]):
 64 |     #         for j in range(gray_img.shape[1]):
 65 |     #             block = new_img[i:i+win_size*2, j:j+win_size*2]
 66 |     #             u, s, v = np.linalg.svd(block)
 67 |     #             top_sv = np.sum(s[0:sv_num])
 68 |     #             total_sv = np.sum(s)
 69 |     #             sv_degree = top_sv/total_sv
 70 |     #             if max_sv < sv_degree:
 71 |     #                 max_sv = sv_degree
 72 |     #             if min_sv > sv_degree:
 73 |     #                 min_sv = sv_degree
 74 |     #             blur_map[i, j] = sv_degree
 75 |     #     blur_map = (blur_map-min_sv)/(max_sv-min_sv)
 76 |     #     return blur_map
 77 | 
 78 |     def get_video_frames(self):
 79 |         vidcap = cv2.VideoCapture(self.video)
 80 |         success, image = vidcap.read()
 81 |         count = 0
 82 |         while success:
 83 |             self.video_frames.append(image)    
 84 |             success, image = vidcap.read()
 85 | 
 86 |     def print_box(self, frame, name, blur_degree, face_bb, color):
 87 |         left, top, right, bottom = face_bb
 88 |         width = right - left
 89 |         height = bottom - top
 90 | 
 91 |         if height > width:
 92 |             tam = int(height/4)
 93 |         else:
 94 |             tam = int(width/4)
 95 | 
 96 |         cv2.putText(frame, name, (right + 15, top + 30), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
 97 |         cv2.putText(frame, blur_degree, (right + 15, top + 60), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2)
 98 | 
 99 |         cv2.rectangle(frame, (face_bb[0], face_bb[1]), (face_bb[2], face_bb[3]), color, 1)
100 | 
101 |         cv2.line(frame, (left, top), (left+tam, top), color, 3)
102 |         cv2.line(frame, (left, top), (left, top+tam), color, 3)
103 | 
104 |         cv2.line(frame, (left, bottom), (left, bottom-tam), color, 3)
105 |         cv2.line(frame, (left, bottom), (left+tam, bottom), color, 3)
106 | 
107 |         cv2.line(frame, (right, top), (right-tam, top), color, 3)
108 |         cv2.line(frame, (right, top), (right, top+tam), color, 3)
109 | 
110 |         cv2.line(frame, (right, bottom), (right-tam, bottom), color, 3)
111 |         cv2.line(frame, (right, bottom), (right, bottom-tam), color, 3)
112 | 
113 |     def detect_blur(self):
114 |         output_video = None
115 |         if output_video is None:
116 |             video_name = self.video.split('.')[0]
117 |             size = (self.video_frames[0].shape[1], self.video_frames[0].shape[0])
118 |             fourcc = cv2.VideoWriter_fourcc(*'MJPG')
119 |             output_video = cv2.VideoWriter('output/'+video_name+'/'+video_name+'.avi',fourcc, 5, size, True)
120 |         for i, frame in enumerate(self.video_frames):
121 |             print('[INFO] detecting blur in image '+str(i+1)+'/'+str(len(self.video_frames)))
122 |             faces = self.detect.find_faces(frame)
123 |             if len(faces) > 0:
124 |                 for face in faces:
125 |                     if face.confidence > 0.9:
126 |                         text = "Not Blurry"
127 |                         boxes = face.bounding_box.astype(int)
128 |                         left, top, right, bottom = boxes
129 |                         face_image = frame[top:bottom, left:right]
130 |                         blur_degree = self.get_blur_degree(face_image)
131 |                         if blur_degree > self.threshold:
132 |                             text = "Blurry"
133 |                         self.print_box(frame, text, "{:.2f}".format(blur_degree), boxes, (255,255,255))
134 |             if output_video is not None:
135 |                 output_video.write(frame)
136 |                 cv2.imwrite('output/'+video_name+'/'+'frames/frame_'+str(i+1)+'.jpg', frame)
137 |         if output_video is not None:
138 |             output_video.release()
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     ap = argparse.ArgumentParser()
143 |     ap.add_argument('video', type=str, help='the video input to detect blurry faces')
144 |     ap.add_argument('--threshold', default=0.8, type=float, help='the threshold of blur degree to classify if some face is blurry or not')
145 |     args = vars(ap.parse_args())
146 | 
147 |     DetectBlur(video=args['video'], threshold=args['threshold'])
148 | 
149 |     
150 | 
151 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Blurry Faces Detection in Videos
  2 | 
  3 | Many digital images contain blurred regions which are caused by incorrect focus, object motion, hand shaking and so on. In any cases, automatic image blurred region detection are useful for learning the image information, which can be used in different multimedia analysis applications such as image segmentation, depth recovery, image retrieval and face recognition. For machine learning process known as face recognition, the blur detection it's important to avoid wrong predictions caused by people motion. The main objective of this experiment is detect blur on face pictures to improve the results of face recognition process. The experiment is based on paper entitled "Blurred Image Region Detection and Classification" that can be found in `reference.pdf` file.
  4 | 
  5 | #
  6 | 
  7 | ## FaceNet Project
  8 | 
  9 | The achieved results in this experiment was reached using a face recognition project named FaceNet. This project was implemented by David Sandberg and it's available on his Github account in [facenet](https://github.com/davidsandberg/facenet) repository. The code is open source with MIT [license](https://github.com/davidsandberg/facenet/blob/master/LICENSE.md) and was developed using Python programming language, with TersorFlow library for Machine Learning process and OpenCV multiplatform library for image processing.
 10 | 
 11 | David Sandberg describes in repository documentation that the code was heavily inspired by the [OpenFace](https://github.com/cmusatyalab/openface) implementation and uses ideas from the paper ["Deep Face Recognition"](http://www.robots.ox.ac.uk/~vgg/publications/2015/Parkhi15/parkhi15.pdf) from the [Visual Geometry Group](http://www.robots.ox.ac.uk/~vgg/) at Oxford. The FaceNet implementation was tested using Tensorflow r1.7 under Ubuntu 14.04 with Python 2.7 and Python 3.5. The test cases and their results can be found in repository as reported in documentation.
 12 | 
 13 | Besides the tests, two pre-trained models are available in repository to download. The model named [20180408-102900](https://drive.google.com/file/d/1R77HmFADxe87GmoLwzfgMu_HY0IhcyBz/view) obtained 0.9905 of accuracy using CASIA-WebFace dataset to training and [Inception ResNet v1](https://github.com/davidsandberg/facenet/blob/master/src/models/inception_resnet_v1.py) architecture. The other model, named [20180402-114759](https://drive.google.com/file/d/1EXPBSXwTaqrSC0OhUdXNmKSh9qJUQ55-/view), obtained a relative better accuracy of 0.9965 using VGGFace2 training dataset and the same architecture. Some another informations are available in FaceNet repository like updates, details of the training data, pre-processing, performance, etc.
 14 | 
 15 | #
 16 | 
 17 | ## Face Detection Process
 18 | 
 19 | The FaceNet project, detailed in previous section, was used in this experiments for face detection. The project provide, as mentioned before, two pre-trained models capable of detect faces in a picture. The picture is introduced as input and the implementation detect where the faces are localized in the picture. The exact result is a vector of faces. Each face is represent by anothe vector wich has five numbers, the first four represents the four bounding boxes in pixels of the respective detected face and the other number is the confidence of the detection result in percent. The FaceNet used code is available in `facenet_code` folder.
 20 | 
 21 | #
 22 | 
 23 | ## Blur Detection Process
 24 | 
 25 | Singular Value Decomposition (SVD) is one of the most useful techniques in Linear Algebra, and has been applied to different areas of Computer Science. The blur detection process uses the SVD factorization to calculate a blur degree and, based on estipulated threshold, classify some picture in "Blurred" or "Not blurred". Generally, blurred picture regions have a higher blur degree compared with clear image regions with no blurs. The reference paper suggest, based on tested different images, a 0.75 threshold, achieved with the accuracy is 88.78%. In case of a detailed explanation, the step by step of the calculation of blur degree is described in the paper. The implementation of the paper description can be found in [blur_detection](https://github.com/fled/blur_detection) repository, in one of the authors Github account.
 26 | 
 27 | #
 28 | 
 29 | ## System Requirements
 30 | 
 31 | ### `Warning: To follow the documentation, it's necessary to use Ubuntu 18+ as operational system, but accompanying the documentation it's possible verify all the requirements and project dependencies to reproduce the configuration in another operational systems.`
 32 | 
 33 | - **python3-venv**
 34 |     ><code>$ sudo apt install python3-venv</code>
 35 | - **pip3**
 36 |     ><code>$ sudo apt install python3-pip</code>
 37 | 
 38 | #
 39 | 
 40 | ## Virtual Environment
 41 | It's advisable to create a virtual environment to manage the project dependencies without libraries conflicts. For create, activate and deactivate a virutal enviroment, follow the instructions bellow.
 42 | 
 43 | From the project root directory: 
 44 | 
 45 | - **Create** a new virtual enviroment:
 46 |     ><code>$ python3 -m venv env</code>
 47 | - **Activate** a virtual enviroment:
 48 |     ><code>$ source env/bin/activate</code>
 49 | - **Deactivate** a virtual enviroment:
 50 |     ><code>$ deactivate</code>
 51 | 
 52 | #
 53 | 
 54 | ## Project Dependencies
 55 | Follow the instructions bellow to install all project dependencies in a virtual enviroments. It's important to mention that all required libraries are listed in `requirements.in`.
 56 | 
 57 | From the project root directory:
 58 | 
 59 | - **Create** a new virtual enviroment:
 60 |     ><code>$ python3 -m venv env</code>
 61 | - **Activate** the virtual enviroment:
 62 |     ><code>$ source env/bin/activate</code>
 63 | - Install **pip-tools**:
 64 |     ><code>$ pip3 install pip-tools</code>
 65 | - **Compile** all the requirements:
 66 |     ><code>$ pip-compile</code>
 67 | - **Syncronize** all the requirements:
 68 |     ><code>$ pip-sync</code>
 69 | 
 70 | To learn more about **pip-tools** please refer to [documentation](https://pypi.org/project/pip-tools/).
 71 | 
 72 | After running all these instructions the `requirements.txt` file will be generated and all the dependencies will be installed.
 73 | 
 74 | #
 75 | 
 76 | ## Download Weights
 77 | 
 78 | As mentioned before, two pre-trained models are available in FaceNet repository to download. These models are the key of the algorithm. One of them, wich is used in this experiment, is available in [weights](https://drive.google.com/drive/folders/1Thfg7WguOLfjZ3iAtdQqQNev6uoaXwsy?usp=sharing) and must be downloaded. It's fundamental put the downloaded `weitghts` folder inside `facenet_code` folder to garantee the code operation. 
 79 | 
 80 | #
 81 | 
 82 | ## Run Blurry Faces Detection Process
 83 | 
 84 | The main file with the whole blurry faces detection implementation is `detect_blur.py`. Executing this file the process will running automatically. Besisdes that, at most two parameters can be included in command line. First of them is the video wich will be used to detect the blurry faces. This parameter is required and it's necessary include the path and extension of the file. The second parameter is optional and refer to threshold of blur degree. Remember that the threshold default is 0.8 and if it's necessary change it include the `--threshold` label before the float threshold value in command line. 
 85 | 
 86 | Two examples of running blur detection process:
 87 | ><code>$ python detect_blur.py video.mp4</code>
 88 | 
 89 | ><code>$ python detect_blur.py video.mp4 --threshold 0.75</code>
 90 | 
 91 | All the results of any code execution will be available in `output` folder. Inside this folder will be create another folder with the blur detection process video name that will contain the respective results. One of the results is the same video with three descriptions in each video frame: the bounding boxes in each detected face, the value of blur degree about this bounding boxes and the classification based on defined threshold if the faces were blurry or not. Besisdes the video, each video frame of the video with the same informations will also be available.
 92 | 
 93 | #
 94 | 
 95 | ## License
 96 | 
 97 | FaceNet project is open source with MIT [license](https://github.com/davidsandberg/facenet/blob/master/LICENSE.md).
 98 | 
 99 | About code developed by paper authors, everyone is permitted to copy and distribute verbatim copies of the [license document](https://github.com/fled/blur_detection/blob/master/LICENSE), but changing it is not allowed.
100 | 
101 | About this project, just consider the other two licenses. Use these informations wisely. 
102 | 
103 | # 
104 | 
105 | ## Final Considerations
106 | 
107 | In spite of this project basically merge two existing Github repositories, this project was made especifically to detect blurry faces in videos. As mentioned before, this research is very usefull to be used in facial recognition projects or different multimedia analysis applications. The code is very small and have all the necessary documentation to be adapted to your implementation.
108 | 
109 | # 
110 | 
111 | ## Thank you for reading and  enjoy it!
112 | 
113 | 


--------------------------------------------------------------------------------
/facenet_code/align_dataset_mtcnn.py:
--------------------------------------------------------------------------------
  1 | """Performs face alignment and stores face thumbnails in the output directory."""
  2 | # MIT License
  3 | # 
  4 | # Copyright (c) 2016 David Sandberg
  5 | # 
  6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | # of this software and associated documentation files (the "Software"), to deal
  8 | # in the Software without restriction, including without limitation the rights
  9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | # copies of the Software, and to permit persons to whom the Software is
 11 | # furnished to do so, subject to the following conditions:
 12 | # 
 13 | # The above copyright notice and this permission notice shall be included in all
 14 | # copies or substantial portions of the Software.
 15 | # 
 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | # SOFTWARE.
 23 | 
 24 | from __future__ import absolute_import
 25 | from __future__ import division
 26 | from __future__ import print_function
 27 | 
 28 | from scipy import misc
 29 | import sys
 30 | import os
 31 | import argparse
 32 | import tensorflow as tf
 33 | import numpy as np
 34 | 
 35 | import random
 36 | from time import sleep
 37 | from facenet import facenet
 38 | from facenet import detect_face
 39 | 
 40 | tf.logging.set_verbosity(tf.logging.ERROR)
 41 | # tf.logging.set_verbosity(tf.logging.INFO)
 42 | # tf.logging.set_verbosity(tf.logging.WARN)
 43 | # tf.logging.set_verbosity(tf.logging.DEBUG)
 44 | # tf.logging.set_verbosity(tf.logging.FATAL)
 45 | 
 46 | def main(args):
 47 |     sleep(random.random())
 48 |     output_dir = os.path.expanduser(args.output_dir)
 49 |     if not os.path.exists(output_dir):
 50 |         os.makedirs(output_dir)
 51 |     # Store some git revision info in a text file in the log directory
 52 |     src_path,_ = os.path.split(os.path.realpath(__file__))
 53 |     facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
 54 |     dataset = facenet.get_dataset(args.input_dir)
 55 |     
 56 |     print('Creating networks and loading parameters')
 57 |     
 58 |     with tf.Graph().as_default():
 59 |         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
 60 |         sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
 61 |         with sess.as_default():
 62 |             pnet, rnet, onet = detect_face.create_mtcnn(sess, None)
 63 |     
 64 |     minsize = 20 # minimum size of face
 65 |     threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
 66 |     factor = 0.709 # scale factor
 67 | 
 68 |     # Add a random key to the filename to allow alignment using multiple processes
 69 |     random_key = np.random.randint(0, high=99999)
 70 |     bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
 71 |     
 72 |     with open(bounding_boxes_filename, "w") as text_file:
 73 |         nrof_images_total = 0
 74 |         nrof_successfully_aligned = 0
 75 |         if args.random_order:
 76 |             random.shuffle(dataset)
 77 |         for cls in dataset:
 78 |             output_class_dir = os.path.join(output_dir, cls.name)
 79 |             if not os.path.exists(output_class_dir):
 80 |                 os.makedirs(output_class_dir)
 81 |                 if args.random_order:
 82 |                     random.shuffle(cls.image_paths)
 83 |             for image_path in cls.image_paths:
 84 |                 nrof_images_total += 1
 85 |                 filename = os.path.splitext(os.path.split(image_path)[1])[0]
 86 |                 output_filename = os.path.join(output_class_dir, filename+'.png')
 87 |                 print(image_path)
 88 |                 if not os.path.exists(output_filename):
 89 |                     try:
 90 |                         img = misc.imread(image_path)
 91 |                     except (IOError, ValueError, IndexError) as e:
 92 |                         errorMessage = '{}: {}'.format(image_path, e)
 93 |                         print(errorMessage)
 94 |                     else:
 95 |                         if img.ndim<2:
 96 |                             print('Unable to align "%s"' % image_path)
 97 |                             text_file.write('%s\n' % (output_filename))
 98 |                             continue
 99 |                         if img.ndim == 2:
100 |                             img = facenet.to_rgb(img)
101 |                         img = img[:,:,0:3]
102 |     
103 |                         bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
104 |                         nrof_faces = bounding_boxes.shape[0]
105 |                         if nrof_faces>0:
106 |                             det = bounding_boxes[:,0:4]
107 |                             det_arr = []
108 |                             img_size = np.asarray(img.shape)[0:2]
109 |                             if nrof_faces>1:
110 |                                 if args.detect_multiple_faces:
111 |                                     for i in range(nrof_faces):
112 |                                         det_arr.append(np.squeeze(det[i]))
113 |                                 else:
114 |                                     bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
115 |                                     img_center = img_size / 2
116 |                                     offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
117 |                                     offset_dist_squared = np.sum(np.power(offsets,2.0),0)
118 |                                     index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
119 |                                     det_arr.append(det[index,:])
120 |                             else:
121 |                                 det_arr.append(np.squeeze(det))
122 | 
123 |                             for i, det in enumerate(det_arr):
124 |                                 det = np.squeeze(det)
125 |                                 bb = np.zeros(4, dtype=np.int32)
126 |                                 bb[0] = np.maximum(det[0]-args.margin/2, 0)
127 |                                 bb[1] = np.maximum(det[1]-args.margin/2, 0)
128 |                                 bb[2] = np.minimum(det[2]+args.margin/2, img_size[1])
129 |                                 bb[3] = np.minimum(det[3]+args.margin/2, img_size[0])
130 |                                 cropped = img[bb[1]:bb[3],bb[0]:bb[2],:]
131 |                                 scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear')
132 |                                 nrof_successfully_aligned += 1
133 |                                 filename_base, file_extension = os.path.splitext(output_filename)
134 |                                 if args.detect_multiple_faces:
135 |                                     output_filename_n = "{}_{}{}".format(filename_base, i, file_extension)
136 |                                 else:
137 |                                     output_filename_n = "{}{}".format(filename_base, file_extension)
138 |                                 misc.imsave(output_filename_n, scaled)
139 |                                 text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3]))
140 |                         else:
141 |                             print('Unable to align "%s"' % image_path)
142 |                             text_file.write('%s\n' % (output_filename))
143 |                             
144 |     print('Total number of images: %d' % nrof_images_total)
145 |     print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
146 |             
147 | 
148 | def parse_arguments(argv):
149 |     parser = argparse.ArgumentParser()
150 |     
151 |     parser.add_argument('input_dir', type=str, help='Directory with unaligned images.')
152 |     parser.add_argument('output_dir', type=str, help='Directory with aligned face thumbnails.')
153 |     parser.add_argument('--image_size', type=int,
154 |         help='Image size (height, width) in pixels.', default=182)
155 |     parser.add_argument('--margin', type=int,
156 |         help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
157 |     parser.add_argument('--random_order', 
158 |         help='Shuffles the order of images to enable alignment using multiple processes.', action='store_true')
159 |     parser.add_argument('--gpu_memory_fraction', type=float,
160 |         help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0)
161 |     parser.add_argument('--detect_multiple_faces', type=bool,
162 |                         help='Detect and align multiple faces per image.', default=False)
163 |     return parser.parse_args(argv)
164 | 
165 | if __name__ == '__main__':
166 |     main(parse_arguments(sys.argv[1:]))
167 | 


--------------------------------------------------------------------------------
/facenet_code/facenet.py:
--------------------------------------------------------------------------------
  1 | """Functions for building the face recognition network.
  2 | """
  3 | # MIT License
  4 | # 
  5 | # Copyright (c) 2016 David Sandberg
  6 | # 
  7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | # of this software and associated documentation files (the "Software"), to deal
  9 | # in the Software without restriction, including without limitation the rights
 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | # copies of the Software, and to permit persons to whom the Software is
 12 | # furnished to do so, subject to the following conditions:
 13 | # 
 14 | # The above copyright notice and this permission notice shall be included in all
 15 | # copies or substantial portions of the Software.
 16 | # 
 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 | # SOFTWARE.
 24 | 
 25 | # pylint: disable=missing-docstring
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | 
 30 | import os
 31 | from subprocess import Popen, PIPE
 32 | import tensorflow as tf
 33 | import numpy as np
 34 | from scipy import misc
 35 | from tensorflow.python.training import training
 36 | import re
 37 | from tensorflow.python.platform import gfile
 38 | 
 39 | tf.logging.set_verbosity(tf.logging.ERROR)
 40 | # tf.logging.set_verbosity(tf.logging.INFO)
 41 | # tf.logging.set_verbosity(tf.logging.WARN)
 42 | # tf.logging.set_verbosity(tf.logging.DEBUG)
 43 | # tf.logging.set_verbosity(tf.logging.FATAL)
 44 | 
 45 | 
 46 | # 1: Random rotate 2: Random crop  4: Random flip  8:  Fixed image standardization  16: Flip
 47 | RANDOM_ROTATE = 1
 48 | RANDOM_CROP = 2
 49 | RANDOM_FLIP = 4
 50 | FIXED_STANDARDIZATION = 8
 51 | FLIP = 16
 52 | 
 53 | def get_image_paths_and_labels(dataset):
 54 |     image_paths_flat = []
 55 |     labels_flat = []
 56 |     for i in range(len(dataset)):
 57 |         image_paths_flat += dataset[i].image_paths
 58 |         labels_flat += [i] * len(dataset[i].image_paths)
 59 |     return image_paths_flat, labels_flat
 60 | 
 61 | def prewhiten(x):
 62 |     mean = np.mean(x)
 63 |     std = np.std(x)
 64 |     std_adj = np.maximum(std, 1.0/np.sqrt(x.size))
 65 |     y = np.multiply(np.subtract(x, mean), 1/std_adj)
 66 |     return y  
 67 | 
 68 | def crop(image, random_crop, image_size):
 69 |     if image.shape[1]>image_size:
 70 |         sz1 = int(image.shape[1]//2)
 71 |         sz2 = int(image_size//2)
 72 |         if random_crop:
 73 |             diff = sz1-sz2
 74 |             (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1))
 75 |         else:
 76 |             (h, v) = (0,0)
 77 |         image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:]
 78 |     return image
 79 |   
 80 | def flip(image, random_flip):
 81 |     if random_flip and np.random.choice([True, False]):
 82 |         image = np.fliplr(image)
 83 |     return image
 84 | 
 85 | def to_rgb(img):
 86 |     w, h = img.shape
 87 |     ret = np.empty((w, h, 3), dtype=np.uint8)
 88 |     ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
 89 |     return ret
 90 |   
 91 | def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True):
 92 |     nrof_samples = len(image_paths)
 93 |     images = np.zeros((nrof_samples, image_size, image_size, 3))
 94 |     for i in range(nrof_samples):
 95 |         img = misc.imread(image_paths[i])
 96 |         if img.ndim == 2:
 97 |             img = to_rgb(img)
 98 |         if do_prewhiten:
 99 |             img = prewhiten(img)
100 |         img = crop(img, do_random_crop, image_size)
101 |         img = flip(img, do_random_flip)
102 |         images[i,:,:,:] = img
103 |     return images
104 | 
105 | 
106 | class ImageClass():
107 |     "Stores the paths to images for a given class"
108 |     def __init__(self, name, image_paths):
109 |         self.name = name
110 |         self.image_paths = image_paths
111 |   
112 |     def __str__(self):
113 |         return self.name + ', ' + str(len(self.image_paths)) + ' images'
114 |   
115 |     def __len__(self):
116 |         return len(self.image_paths)
117 | 
118 | def get_dataset(path, has_class_directories=True):
119 |     dataset = []
120 |     path_exp = os.path.expanduser(path)
121 |     classes = [path for path in os.listdir(path_exp) \
122 |                     if os.path.isdir(os.path.join(path_exp, path))]
123 |     classes.sort()
124 |     nrof_classes = len(classes)
125 |     for i in range(nrof_classes):
126 |         class_name = classes[i]
127 |         facedir = os.path.join(path_exp, class_name)
128 |         image_paths = get_image_paths(facedir)
129 |         dataset.append(ImageClass(class_name, image_paths))
130 | 
131 |     return dataset
132 | 
133 | def get_image_paths(facedir):
134 |     image_paths = []
135 |     if os.path.isdir(facedir):
136 |         images = os.listdir(facedir)
137 |         image_paths = [os.path.join(facedir,img) for img in images]
138 |     return image_paths
139 | 
140 | def load_model(model, input_map=None):
141 |     # Check if the sample-weight is a sample-weight directory (containing a metagraph and a checkpoint file)
142 |     #  or if it is a protobuf file with a frozen graph
143 |     model_exp = os.path.expanduser(model)
144 |     if (os.path.isfile(model_exp)):
145 |         with gfile.FastGFile(model_exp,'rb') as f:
146 |             graph_def = tf.GraphDef()
147 |             graph_def.ParseFromString(f.read())
148 |             tf.import_graph_def(graph_def, input_map=input_map, name='')
149 |     else:
150 |         print('Model directory: %s' % model_exp)
151 |         meta_file, ckpt_file = get_model_filenames(model_exp)
152 |         
153 |         print('Metagraph file: %s' % meta_file)
154 |         print('Checkpoint file: %s' % ckpt_file)
155 |       
156 |         saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file), input_map=input_map)
157 |         saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file))
158 |     
159 | def get_model_filenames(model_dir):
160 |     files = os.listdir(model_dir)
161 |     meta_files = [s for s in files if s.endswith('.meta')]
162 |     if len(meta_files)==0:
163 |         raise ValueError('No meta file found in the sample-weight directory (%s)' % model_dir)
164 |     elif len(meta_files)>1:
165 |         raise ValueError('There should not be more than one meta file in the sample-weight directory (%s)' % model_dir)
166 |     meta_file = meta_files[0]
167 |     ckpt = tf.train.get_checkpoint_state(model_dir)
168 |     if ckpt and ckpt.model_checkpoint_path:
169 |         ckpt_file = os.path.basename(ckpt.model_checkpoint_path)
170 |         return meta_file, ckpt_file
171 | 
172 |     meta_files = [s for s in files if '.ckpt' in s]
173 |     max_step = -1
174 |     for f in files:
175 |         step_str = re.match(r'(^sample-weight-[\w\- ]+.ckpt-(\d+))', f)
176 |         if step_str is not None and len(step_str.groups())>=2:
177 |             step = int(step_str.groups()[1])
178 |             if step > max_step:
179 |                 max_step = step
180 |                 ckpt_file = step_str.groups()[0]
181 |     return meta_file, ckpt_file
182 |   
183 | 
184 | def store_revision_info(src_path, output_dir, arg_string):
185 |     try:
186 |         # Get git hash
187 |         cmd = ['git', 'rev-parse', 'HEAD']
188 |         gitproc = Popen(cmd, stdout = PIPE, cwd=src_path)
189 |         (stdout, _) = gitproc.communicate()
190 |         git_hash = stdout.strip()
191 |     except OSError as e:
192 |         git_hash = ' '.join(cmd) + ': ' +  e.strerror
193 |   
194 |     try:
195 |         # Get local changes
196 |         cmd = ['git', 'diff', 'HEAD']
197 |         gitproc = Popen(cmd, stdout = PIPE, cwd=src_path)
198 |         (stdout, _) = gitproc.communicate()
199 |         git_diff = stdout.strip()
200 |     except OSError as e:
201 |         git_diff = ' '.join(cmd) + ': ' +  e.strerror
202 |     
203 |     # Store a text file in the log directory
204 |     rev_info_filename = os.path.join(output_dir, 'revision_info.txt')
205 |     with open(rev_info_filename, "w") as text_file:
206 |         text_file.write('arguments: %s\n--------------------\n' % arg_string)
207 |         text_file.write('tensorflow version: %s\n--------------------\n' % tf.__version__)  # @UndefinedVariable
208 |         text_file.write('git hash: %s\n--------------------\n' % git_hash)
209 |         text_file.write('%s' % git_diff)
210 | 
211 | def list_variables(filename):
212 |     reader = training.NewCheckpointReader(filename)
213 |     variable_map = reader.get_variable_to_shape_map()
214 |     names = sorted(variable_map.keys())
215 |     return names
216 | 
217 | def put_images_on_grid(images, shape=(16,8)):
218 |     nrof_images = images.shape[0]
219 |     img_size = images.shape[1]
220 |     bw = 3
221 |     img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32)
222 |     for i in range(shape[1]):
223 |         x_start = i*(img_size+bw)+bw
224 |         for j in range(shape[0]):
225 |             img_index = i*shape[0]+j
226 |             if img_index>=nrof_images:
227 |                 break
228 |             y_start = j*(img_size+bw)+bw
229 |             img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :]
230 |         if img_index>=nrof_images:
231 |             break
232 |     return img
233 | 
234 | 
235 | 


--------------------------------------------------------------------------------
/facenet_code/detect_face.py:
--------------------------------------------------------------------------------
  1 | """ Tensorflow implementation of the face detection / alignment algorithm found at
  2 | https://github.com/kpzhang93/MTCNN_face_detection_alignment
  3 | """
  4 | # MIT License
  5 | # 
  6 | # Copyright (c) 2016 David Sandberg
  7 | # 
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | # 
 15 | # The above copyright notice and this permission notice shall be included in all
 16 | # copies or substantial portions of the Software.
 17 | # 
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24 | # SOFTWARE.
 25 | 
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | from six import string_types, iteritems
 30 | 
 31 | import numpy as np
 32 | import tensorflow as tf
 33 | import cv2
 34 | import os
 35 | 
 36 | tf.logging.set_verbosity(tf.logging.ERROR)
 37 | # tf.logging.set_verbosity(tf.logging.INFO)
 38 | # tf.logging.set_verbosity(tf.logging.WARN)
 39 | # tf.logging.set_verbosity(tf.logging.DEBUG)
 40 | # tf.logging.set_verbosity(tf.logging.FATAL)
 41 | 
 42 | 
 43 | def layer(op):
 44 |     """Decorator for composable network layers."""
 45 | 
 46 |     def layer_decorated(self, *args, **kwargs):
 47 |         # Automatically set a name if not provided.
 48 |         name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
 49 |         # Figure out the layer inputs.
 50 |         if len(self.terminals) == 0:
 51 |             raise RuntimeError('No input variables found for layer %s.' % name)
 52 |         elif len(self.terminals) == 1:
 53 |             layer_input = self.terminals[0]
 54 |         else:
 55 |             layer_input = list(self.terminals)
 56 |         # Perform the operation and get the output.
 57 |         layer_output = op(self, layer_input, *args, **kwargs)
 58 |         # Add to layer LUT.
 59 |         self.layers[name] = layer_output
 60 |         # This output is now the input for the next layer.
 61 |         self.feed(layer_output)
 62 |         # Return self for chained calls.
 63 |         return self
 64 | 
 65 |     return layer_decorated
 66 | 
 67 | class Network(object):
 68 | 
 69 |     def __init__(self, inputs, trainable=True):
 70 |         # The input nodes for this network
 71 |         self.inputs = inputs
 72 |         # The current list of terminal nodes
 73 |         self.terminals = []
 74 |         # Mapping from layer names to layers
 75 |         self.layers = dict(inputs)
 76 |         # If true, the resulting variables are set as trainable
 77 |         self.trainable = trainable
 78 | 
 79 |         self.setup()
 80 | 
 81 |     def setup(self):
 82 |         """Construct the network. """
 83 |         raise NotImplementedError('Must be implemented by the subclass.')
 84 | 
 85 |     def load(self, data_path, session, ignore_missing=False):
 86 |         """Load network weights.
 87 |         data_path: The path to the numpy-serialized network weights
 88 |         session: The current TensorFlow session
 89 |         ignore_missing: If true, serialized weights for missing layers are ignored.
 90 |         """
 91 |         data_dict = np.load(data_path, encoding='latin1', allow_pickle=True).item() #pylint: disable=no-member
 92 | 
 93 |         for op_name in data_dict:
 94 |             with tf.variable_scope(op_name, reuse=True):
 95 |                 for param_name, data in iteritems(data_dict[op_name]):
 96 |                     try:
 97 |                         var = tf.get_variable(param_name)
 98 |                         session.run(var.assign(data))
 99 |                     except ValueError:
100 |                         if not ignore_missing:
101 |                             raise
102 | 
103 |     def feed(self, *args):
104 |         """Set the input(s) for the next operation by replacing the terminal nodes.
105 |         The arguments can be either layer names or the actual layers.
106 |         """
107 |         assert len(args) != 0
108 |         self.terminals = []
109 |         for fed_layer in args:
110 |             if isinstance(fed_layer, string_types):
111 |                 try:
112 |                     fed_layer = self.layers[fed_layer]
113 |                 except KeyError:
114 |                     raise KeyError('Unknown layer name fed: %s' % fed_layer)
115 |             self.terminals.append(fed_layer)
116 |         return self
117 | 
118 |     def get_output(self):
119 |         """Returns the current network output."""
120 |         return self.terminals[-1]
121 | 
122 |     def get_unique_name(self, prefix):
123 |         """Returns an index-suffixed unique name for the given prefix.
124 |         This is used for auto-generating layer names based on the type-prefix.
125 |         """
126 |         ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
127 |         return '%s_%d' % (prefix, ident)
128 | 
129 |     def make_var(self, name, shape):
130 |         """Creates a new TensorFlow variable."""
131 |         return tf.get_variable(name, shape, trainable=self.trainable)
132 | 
133 |     def validate_padding(self, padding):
134 |         """Verifies that the padding is one of the supported ones."""
135 |         assert padding in ('SAME', 'VALID')
136 | 
137 |     @layer
138 |     def conv(self,
139 |              inp,
140 |              k_h,
141 |              k_w,
142 |              c_o,
143 |              s_h,
144 |              s_w,
145 |              name,
146 |              relu=True,
147 |              padding='SAME',
148 |              group=1,
149 |              biased=True):
150 |         # Verify that the padding is acceptable
151 |         self.validate_padding(padding)
152 |         # Get the number of channels in the input
153 |         c_i = int(inp.get_shape()[-1])
154 |         # Verify that the grouping parameter is valid
155 |         assert c_i % group == 0
156 |         assert c_o % group == 0
157 |         # Convolution for a given input and kernel
158 |         convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
159 |         with tf.variable_scope(name) as scope:
160 |             kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
161 |             # This is the common-case. Convolve the input without any further complications.
162 |             output = convolve(inp, kernel)
163 |             # Add the biases
164 |             if biased:
165 |                 biases = self.make_var('biases', [c_o])
166 |                 output = tf.nn.bias_add(output, biases)
167 |             if relu:
168 |                 # ReLU non-linearity
169 |                 output = tf.nn.relu(output, name=scope.name)
170 |             return output
171 | 
172 |     @layer
173 |     def prelu(self, inp, name):
174 |         with tf.variable_scope(name):
175 |             i = int(inp.get_shape()[-1])
176 |             alpha = self.make_var('alpha', shape=(i,))
177 |             output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
178 |         return output
179 | 
180 |     @layer
181 |     def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
182 |         self.validate_padding(padding)
183 |         return tf.nn.max_pool(inp,
184 |                               ksize=[1, k_h, k_w, 1],
185 |                               strides=[1, s_h, s_w, 1],
186 |                               padding=padding,
187 |                               name=name)
188 | 
189 |     @layer
190 |     def fc(self, inp, num_out, name, relu=True):
191 |         with tf.variable_scope(name):
192 |             input_shape = inp.get_shape()
193 |             if input_shape.ndims == 4:
194 |                 # The input is spatial. Vectorize it first.
195 |                 dim = 1
196 |                 for d in input_shape[1:].as_list():
197 |                     dim *= int(d)
198 |                 feed_in = tf.reshape(inp, [-1, dim])
199 |             else:
200 |                 feed_in, dim = (inp, input_shape[-1].value)
201 |             weights = self.make_var('weights', shape=[dim, num_out])
202 |             biases = self.make_var('biases', [num_out])
203 |             op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
204 |             fc = op(feed_in, weights, biases, name=name)
205 |             return fc
206 | 
207 | 
208 |     """
209 |     Multi dimensional softmax,
210 |     refer to https://github.com/tensorflow/tensorflow/issues/210
211 |     compute softmax along the dimension of target
212 |     the native softmax only supports batch_size x dimension
213 |     """
214 |     @layer
215 |     def softmax(self, target, axis, name=None):
216 |         max_axis = tf.reduce_max(target, axis, keepdims=True)
217 |         target_exp = tf.exp(target-max_axis)
218 |         normalize = tf.reduce_sum(target_exp, axis, keepdims=True)
219 |         softmax = tf.div(target_exp, normalize, name)
220 |         return softmax
221 |     
222 | class PNet(Network):
223 |     def setup(self):
224 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
225 |              .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
226 |              .prelu(name='PReLU1')
227 |              .max_pool(2, 2, 2, 2, name='pool1')
228 |              .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
229 |              .prelu(name='PReLU2')
230 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
231 |              .prelu(name='PReLU3')
232 |              .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
233 |              .softmax(3,name='prob1'))
234 | 
235 |         (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
236 |              .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
237 |         
238 | class RNet(Network):
239 |     def setup(self):
240 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
241 |              .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
242 |              .prelu(name='prelu1')
243 |              .max_pool(3, 3, 2, 2, name='pool1')
244 |              .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
245 |              .prelu(name='prelu2')
246 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
247 |              .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
248 |              .prelu(name='prelu3')
249 |              .fc(128, relu=False, name='conv4')
250 |              .prelu(name='prelu4')
251 |              .fc(2, relu=False, name='conv5-1')
252 |              .softmax(1,name='prob1'))
253 | 
254 |         (self.feed('prelu4') #pylint: disable=no-value-for-parameter
255 |              .fc(4, relu=False, name='conv5-2'))
256 | 
257 | class ONet(Network):
258 |     def setup(self):
259 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
260 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
261 |              .prelu(name='prelu1')
262 |              .max_pool(3, 3, 2, 2, name='pool1')
263 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
264 |              .prelu(name='prelu2')
265 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
266 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
267 |              .prelu(name='prelu3')
268 |              .max_pool(2, 2, 2, 2, name='pool3')
269 |              .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
270 |              .prelu(name='prelu4')
271 |              .fc(256, relu=False, name='conv5')
272 |              .prelu(name='prelu5')
273 |              .fc(2, relu=False, name='conv6-1')
274 |              .softmax(1, name='prob1'))
275 | 
276 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
277 |              .fc(4, relu=False, name='conv6-2'))
278 | 
279 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
280 |              .fc(10, relu=False, name='conv6-3'))
281 | 
282 | def create_mtcnn(sess, model_path):
283 |     if not model_path:
284 |         model_path,_ = os.path.split(os.path.realpath(__file__))
285 | 
286 |     with tf.variable_scope('pnet'):
287 |         data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
288 |         pnet = PNet({'data':data})
289 |         pnet.load(os.path.join(model_path, 'weights/det1.npy'), sess)
290 |     with tf.variable_scope('rnet'):
291 |         data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
292 |         rnet = RNet({'data':data})
293 |         rnet.load(os.path.join(model_path, 'weights/det2.npy'), sess)
294 |     with tf.variable_scope('onet'):
295 |         data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
296 |         onet = ONet({'data':data})
297 |         onet.load(os.path.join(model_path, 'weights/det3.npy'), sess)
298 |         
299 |     pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
300 |     rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
301 |     onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
302 |     return pnet_fun, rnet_fun, onet_fun
303 | 
304 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
305 |     """Detects faces in an image, and returns bounding boxes and points for them.
306 |     img: input image
307 |     minsize: minimum faces' size
308 |     pnet, rnet, onet: caffemodel
309 |     threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
310 |     factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
311 |     """
312 |     factor_count=0
313 |     total_boxes=np.empty((0,9))
314 |     points=np.empty(0)
315 |     h=img.shape[0]
316 |     w=img.shape[1]
317 |     minl=np.amin([h, w])
318 |     m=12.0/minsize
319 |     minl=minl*m
320 |     # create scale pyramid
321 |     scales=[]
322 |     while minl>=12:
323 |         scales += [m*np.power(factor, factor_count)]
324 |         minl = minl*factor
325 |         factor_count += 1
326 | 
327 |     # first stage
328 |     for scale in scales:
329 |         hs=int(np.ceil(h*scale))
330 |         ws=int(np.ceil(w*scale))
331 |         im_data = imresample(img, (hs, ws))
332 |         im_data = (im_data-127.5)*0.0078125
333 |         img_x = np.expand_dims(im_data, 0)
334 |         img_y = np.transpose(img_x, (0,2,1,3))
335 |         out = pnet(img_y)
336 |         out0 = np.transpose(out[0], (0,2,1,3))
337 |         out1 = np.transpose(out[1], (0,2,1,3))
338 |         
339 |         boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
340 |         
341 |         # inter-scale nms
342 |         pick = nms(boxes.copy(), 0.5, 'Union')
343 |         if boxes.size>0 and pick.size>0:
344 |             boxes = boxes[pick,:]
345 |             total_boxes = np.append(total_boxes, boxes, axis=0)
346 | 
347 |     numbox = total_boxes.shape[0]
348 |     if numbox>0:
349 |         pick = nms(total_boxes.copy(), 0.7, 'Union')
350 |         total_boxes = total_boxes[pick,:]
351 |         regw = total_boxes[:,2]-total_boxes[:,0]
352 |         regh = total_boxes[:,3]-total_boxes[:,1]
353 |         qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
354 |         qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
355 |         qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
356 |         qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
357 |         total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
358 |         total_boxes = rerec(total_boxes.copy())
359 |         total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
360 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
361 | 
362 |     numbox = total_boxes.shape[0]
363 |     if numbox>0:
364 |         # second stage
365 |         tempimg = np.zeros((24,24,3,numbox))
366 |         for k in range(0,numbox):
367 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
368 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
369 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
370 |                 tempimg[:,:,:,k] = imresample(tmp, (24, 24))
371 |             else:
372 |                 return np.empty()
373 |         tempimg = (tempimg-127.5)*0.0078125
374 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
375 |         out = rnet(tempimg1)
376 |         out0 = np.transpose(out[0])
377 |         out1 = np.transpose(out[1])
378 |         score = out1[1,:]
379 |         ipass = np.where(score>threshold[1])
380 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
381 |         mv = out0[:,ipass[0]]
382 |         if total_boxes.shape[0]>0:
383 |             pick = nms(total_boxes, 0.7, 'Union')
384 |             total_boxes = total_boxes[pick,:]
385 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
386 |             total_boxes = rerec(total_boxes.copy())
387 | 
388 |     numbox = total_boxes.shape[0]
389 |     if numbox>0:
390 |         # third stage
391 |         total_boxes = np.fix(total_boxes).astype(np.int32)
392 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
393 |         tempimg = np.zeros((48,48,3,numbox))
394 |         for k in range(0,numbox):
395 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
396 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
397 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
398 |                 tempimg[:,:,:,k] = imresample(tmp, (48, 48))
399 |             else:
400 |                 return np.empty()
401 |         tempimg = (tempimg-127.5)*0.0078125
402 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
403 |         out = onet(tempimg1)
404 |         out0 = np.transpose(out[0])
405 |         out1 = np.transpose(out[1])
406 |         out2 = np.transpose(out[2])
407 |         score = out2[1,:]
408 |         points = out1
409 |         ipass = np.where(score>threshold[2])
410 |         points = points[:,ipass[0]]
411 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
412 |         mv = out0[:,ipass[0]]
413 | 
414 |         w = total_boxes[:,2]-total_boxes[:,0]+1
415 |         h = total_boxes[:,3]-total_boxes[:,1]+1
416 |         points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
417 |         points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
418 |         if total_boxes.shape[0]>0:
419 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
420 |             pick = nms(total_boxes.copy(), 0.7, 'Min')
421 |             total_boxes = total_boxes[pick,:]
422 |             points = points[:,pick]
423 |                 
424 |     return total_boxes, points
425 | 
426 | 
427 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
428 |     """Detects faces in a list of images
429 |     images: list containing input images
430 |     detection_window_size_ratio: ratio of minimum face size to smallest image dimension
431 |     pnet, rnet, onet: caffemodel
432 |     threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
433 |     factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
434 |     """
435 |     all_scales = [None] * len(images)
436 |     images_with_boxes = [None] * len(images)
437 | 
438 |     for i in range(len(images)):
439 |         images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
440 | 
441 |     # create scale pyramid
442 |     for index, img in enumerate(images):
443 |         all_scales[index] = []
444 |         h = img.shape[0]
445 |         w = img.shape[1]
446 |         minsize = int(detection_window_size_ratio * np.minimum(w, h))
447 |         factor_count = 0
448 |         minl = np.amin([h, w])
449 |         if minsize <= 12:
450 |             minsize = 12
451 | 
452 |         m = 12.0 / minsize
453 |         minl = minl * m
454 |         while minl >= 12:
455 |             all_scales[index].append(m * np.power(factor, factor_count))
456 |             minl = minl * factor
457 |             factor_count += 1
458 | 
459 |     # # # # # # # # # # # # #
460 |     # first stage - fast proposal network (pnet) to obtain face candidates
461 |     # # # # # # # # # # # # #
462 | 
463 |     images_obj_per_resolution = {}
464 | 
465 |     # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
466 | 
467 |     for index, scales in enumerate(all_scales):
468 |         h = images[index].shape[0]
469 |         w = images[index].shape[1]
470 | 
471 |         for scale in scales:
472 |             hs = int(np.ceil(h * scale))
473 |             ws = int(np.ceil(w * scale))
474 | 
475 |             if (ws, hs) not in images_obj_per_resolution:
476 |                 images_obj_per_resolution[(ws, hs)] = []
477 | 
478 |             im_data = imresample(images[index], (hs, ws))
479 |             im_data = (im_data - 127.5) * 0.0078125
480 |             img_y = np.transpose(im_data, (1, 0, 2))  # caffe uses different dimensions ordering
481 |             images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
482 | 
483 |     for resolution in images_obj_per_resolution:
484 |         images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
485 |         outs = pnet(images_per_resolution)
486 | 
487 |         for index in range(len(outs[0])):
488 |             scale = images_obj_per_resolution[resolution][index]['scale']
489 |             image_index = images_obj_per_resolution[resolution][index]['index']
490 |             out0 = np.transpose(outs[0][index], (1, 0, 2))
491 |             out1 = np.transpose(outs[1][index], (1, 0, 2))
492 | 
493 |             boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
494 | 
495 |             # inter-scale nms
496 |             pick = nms(boxes.copy(), 0.5, 'Union')
497 |             if boxes.size > 0 and pick.size > 0:
498 |                 boxes = boxes[pick, :]
499 |                 images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
500 |                                                                           boxes,
501 |                                                                           axis=0)
502 | 
503 |     for index, image_obj in enumerate(images_with_boxes):
504 |         numbox = image_obj['total_boxes'].shape[0]
505 |         if numbox > 0:
506 |             h = images[index].shape[0]
507 |             w = images[index].shape[1]
508 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
509 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
510 |             regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
511 |             regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
512 |             qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
513 |             qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
514 |             qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
515 |             qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
516 |             image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
517 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
518 |             image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
519 |             dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
520 | 
521 |             numbox = image_obj['total_boxes'].shape[0]
522 |             tempimg = np.zeros((24, 24, 3, numbox))
523 | 
524 |             if numbox > 0:
525 |                 for k in range(0, numbox):
526 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
527 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
528 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
529 |                         tempimg[:, :, :, k] = imresample(tmp, (24, 24))
530 |                     else:
531 |                         return np.empty()
532 | 
533 |                 tempimg = (tempimg - 127.5) * 0.0078125
534 |                 image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
535 | 
536 |     # # # # # # # # # # # # #
537 |     # second stage - refinement of face candidates with rnet
538 |     # # # # # # # # # # # # #
539 | 
540 |     bulk_rnet_input = np.empty((0, 24, 24, 3))
541 |     for index, image_obj in enumerate(images_with_boxes):
542 |         if 'rnet_input' in image_obj:
543 |             bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
544 | 
545 |     out = rnet(bulk_rnet_input)
546 |     out0 = np.transpose(out[0])
547 |     out1 = np.transpose(out[1])
548 |     score = out1[1, :]
549 | 
550 |     i = 0
551 |     for index, image_obj in enumerate(images_with_boxes):
552 |         if 'rnet_input' not in image_obj:
553 |             continue
554 | 
555 |         rnet_input_count = image_obj['rnet_input'].shape[0]
556 |         score_per_image = score[i:i + rnet_input_count]
557 |         out0_per_image = out0[:, i:i + rnet_input_count]
558 | 
559 |         ipass = np.where(score_per_image > threshold[1])
560 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
561 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
562 | 
563 |         mv = out0_per_image[:, ipass[0]]
564 | 
565 |         if image_obj['total_boxes'].shape[0] > 0:
566 |             h = images[index].shape[0]
567 |             w = images[index].shape[1]
568 |             pick = nms(image_obj['total_boxes'], 0.7, 'Union')
569 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
570 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
571 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
572 | 
573 |             numbox = image_obj['total_boxes'].shape[0]
574 | 
575 |             if numbox > 0:
576 |                 tempimg = np.zeros((48, 48, 3, numbox))
577 |                 image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
578 |                 dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
579 | 
580 |                 for k in range(0, numbox):
581 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
582 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
583 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
584 |                         tempimg[:, :, :, k] = imresample(tmp, (48, 48))
585 |                     else:
586 |                         return np.empty()
587 |                 tempimg = (tempimg - 127.5) * 0.0078125
588 |                 image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
589 | 
590 |         i += rnet_input_count
591 | 
592 |     # # # # # # # # # # # # #
593 |     # third stage - further refinement and facial landmarks positions with onet
594 |     # # # # # # # # # # # # #
595 | 
596 |     bulk_onet_input = np.empty((0, 48, 48, 3))
597 |     for index, image_obj in enumerate(images_with_boxes):
598 |         if 'onet_input' in image_obj:
599 |             bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
600 | 
601 |     out = onet(bulk_onet_input)
602 | 
603 |     out0 = np.transpose(out[0])
604 |     out1 = np.transpose(out[1])
605 |     out2 = np.transpose(out[2])
606 |     score = out2[1, :]
607 |     points = out1
608 | 
609 |     i = 0
610 |     ret = []
611 |     for index, image_obj in enumerate(images_with_boxes):
612 |         if 'onet_input' not in image_obj:
613 |             ret.append(None)
614 |             continue
615 | 
616 |         onet_input_count = image_obj['onet_input'].shape[0]
617 | 
618 |         out0_per_image = out0[:, i:i + onet_input_count]
619 |         score_per_image = score[i:i + onet_input_count]
620 |         points_per_image = points[:, i:i + onet_input_count]
621 | 
622 |         ipass = np.where(score_per_image > threshold[2])
623 |         points_per_image = points_per_image[:, ipass[0]]
624 | 
625 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
626 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
627 |         mv = out0_per_image[:, ipass[0]]
628 | 
629 |         w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
630 |         h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
631 |         points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
632 |             image_obj['total_boxes'][:, 0], (5, 1)) - 1
633 |         points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
634 |             image_obj['total_boxes'][:, 1], (5, 1)) - 1
635 | 
636 |         if image_obj['total_boxes'].shape[0] > 0:
637 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
638 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
639 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
640 |             points_per_image = points_per_image[:, pick]
641 | 
642 |             ret.append((image_obj['total_boxes'], points_per_image))
643 |         else:
644 |             ret.append(None)
645 | 
646 |         i += onet_input_count
647 | 
648 |     return ret
649 | 
650 | 
651 | # function [boundingbox] = bbreg(boundingbox,reg)
652 | def bbreg(boundingbox,reg):
653 |     """Calibrate bounding boxes"""
654 |     if reg.shape[1]==1:
655 |         reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
656 | 
657 |     w = boundingbox[:,2]-boundingbox[:,0]+1
658 |     h = boundingbox[:,3]-boundingbox[:,1]+1
659 |     b1 = boundingbox[:,0]+reg[:,0]*w
660 |     b2 = boundingbox[:,1]+reg[:,1]*h
661 |     b3 = boundingbox[:,2]+reg[:,2]*w
662 |     b4 = boundingbox[:,3]+reg[:,3]*h
663 |     boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
664 |     return boundingbox
665 |  
666 | def generateBoundingBox(imap, reg, scale, t):
667 |     """Use heatmap to generate bounding boxes"""
668 |     stride=2
669 |     cellsize=12
670 | 
671 |     imap = np.transpose(imap)
672 |     dx1 = np.transpose(reg[:,:,0])
673 |     dy1 = np.transpose(reg[:,:,1])
674 |     dx2 = np.transpose(reg[:,:,2])
675 |     dy2 = np.transpose(reg[:,:,3])
676 |     y, x = np.where(imap >= t)
677 |     if y.shape[0]==1:
678 |         dx1 = np.flipud(dx1)
679 |         dy1 = np.flipud(dy1)
680 |         dx2 = np.flipud(dx2)
681 |         dy2 = np.flipud(dy2)
682 |     score = imap[(y,x)]
683 |     reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
684 |     if reg.size==0:
685 |         reg = np.empty((0,3))
686 |     bb = np.transpose(np.vstack([y,x]))
687 |     q1 = np.fix((stride*bb+1)/scale)
688 |     q2 = np.fix((stride*bb+cellsize-1+1)/scale)
689 |     boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
690 |     return boundingbox, reg
691 |  
692 | # function pick = nms(boxes,threshold,type)
693 | def nms(boxes, threshold, method):
694 |     if boxes.size==0:
695 |         return np.empty((0,3))
696 |     x1 = boxes[:,0]
697 |     y1 = boxes[:,1]
698 |     x2 = boxes[:,2]
699 |     y2 = boxes[:,3]
700 |     s = boxes[:,4]
701 |     area = (x2-x1+1) * (y2-y1+1)
702 |     I = np.argsort(s)
703 |     pick = np.zeros_like(s, dtype=np.int16)
704 |     counter = 0
705 |     while I.size>0:
706 |         i = I[-1]
707 |         pick[counter] = i
708 |         counter += 1
709 |         idx = I[0:-1]
710 |         xx1 = np.maximum(x1[i], x1[idx])
711 |         yy1 = np.maximum(y1[i], y1[idx])
712 |         xx2 = np.minimum(x2[i], x2[idx])
713 |         yy2 = np.minimum(y2[i], y2[idx])
714 |         w = np.maximum(0.0, xx2-xx1+1)
715 |         h = np.maximum(0.0, yy2-yy1+1)
716 |         inter = w * h
717 |         if method is 'Min':
718 |             o = inter / np.minimum(area[i], area[idx])
719 |         else:
720 |             o = inter / (area[i] + area[idx] - inter)
721 |         I = I[np.where(o<=threshold)]
722 |     pick = pick[0:counter]
723 |     return pick
724 | 
725 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
726 | def pad(total_boxes, w, h):
727 |     """Compute the padding coordinates (pad the bounding boxes to square)"""
728 |     tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
729 |     tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
730 |     numbox = total_boxes.shape[0]
731 | 
732 |     dx = np.ones((numbox), dtype=np.int32)
733 |     dy = np.ones((numbox), dtype=np.int32)
734 |     edx = tmpw.copy().astype(np.int32)
735 |     edy = tmph.copy().astype(np.int32)
736 | 
737 |     x = total_boxes[:,0].copy().astype(np.int32)
738 |     y = total_boxes[:,1].copy().astype(np.int32)
739 |     ex = total_boxes[:,2].copy().astype(np.int32)
740 |     ey = total_boxes[:,3].copy().astype(np.int32)
741 | 
742 |     tmp = np.where(ex>w)
743 |     edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
744 |     ex[tmp] = w
745 |     
746 |     tmp = np.where(ey>h)
747 |     edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
748 |     ey[tmp] = h
749 | 
750 |     tmp = np.where(x<1)
751 |     dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
752 |     x[tmp] = 1
753 | 
754 |     tmp = np.where(y<1)
755 |     dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
756 |     y[tmp] = 1
757 |     
758 |     return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
759 | 
760 | # function [bboxA] = rerec(bboxA)
761 | def rerec(bboxA):
762 |     """Convert bboxA to square."""
763 |     h = bboxA[:,3]-bboxA[:,1]
764 |     w = bboxA[:,2]-bboxA[:,0]
765 |     l = np.maximum(w, h)
766 |     bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
767 |     bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
768 |     bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
769 |     return bboxA
770 | 
771 | def imresample(img, sz):
772 |     im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable
773 |     return im_data
774 | 
775 |     # This method is kept for debugging purpose
776 | #     h=img.shape[0]
777 | #     w=img.shape[1]
778 | #     hs, ws = sz
779 | #     dx = float(w) / ws
780 | #     dy = float(h) / hs
781 | #     im_data = np.zeros((hs,ws,3))
782 | #     for a1 in range(0,hs):
783 | #         for a2 in range(0,ws):
784 | #             for a3 in range(0,3):
785 | #                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
786 | #     return im_data
787 | 
788 | 


--------------------------------------------------------------------------------