├── .gitignore
├── README.md
├── cal_128XVector_user_facenet.py
├── collect_frame_to_csv.py
├── data
    └── data.csv
├── detect_face.py
├── face_detector_MTcnn.py
├── facenet.py
├── model
    ├── det1.npy
    ├── det2.npy
    └── det3.npy
└── realtime_detect_face_and_recognition.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # face-detect-MTcnn-faceNet
 2 | 工作平台：
 3 | windos
 4 | tensorflow
 5 | pycharm
 6 | opencv
 7 | 
 8 | 总体功能：
 9 | 
10 |         Mtcnn 实现人脸检测
11 |         
12 |         faceNet ： 实现相似度计算
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 第一步：实现人脸检测功能（MTcnn）
20 | 最开始的时候使用的是opencv自带的人脸级联分类器进行人脸检测，但是 后来查一下资料说MTcnn能够实现的效果更好 ，所以尝试着用一下。
21 |   
22 |   对于MTcnn 框架 是存在 PNet RNet ONet 三个网络架构级联成的，由于电脑渣渣 所以直接下载别人训练好的库  存在mode/文件夹下 
23 | 
24 | （1）detect_face.py 文件实现了MTcnn人脸检测的相关函数
25 | 
26 | 
27 | 
28 | （2）face_detector_MTcnn.py 是对detect_face.py进行测试：实现了视频流下人脸的检测和定位功能
29 | 
30 | 
31 | 第二步：利用faceNet 实现两张图片距离向量的计算
32 | 
33 | （1）facenet.py 是我直接在网上下载的 文件 他实现了相关函数的处理具体faceNet的实现原理这里就不讲了 可以自行收索资料了解一下 我当时是看了吴恩达的视屏才知道这个方法， 开始的时候我只使用直方图比较 效果很差 所以这个faceNet方法很好
34 | 
35 | （2）cal_128XVector_user_facenet.py 文件是根据facenet.py文件里相关函数，计算出两张图片的distance：
36 | 
37 | 
38 |       1、build_facenet_model（modir='./model/20170512-110547.pb'）函数： 是建立faceNet模型用的   由于电脑渣渣加上图片数据难找， 没法实现模型的训练我就下载了一个文件，（20170512-110547.pb） 可以在官网下载
39 | 
40 | 
41 |       2、cal_128_vector（）函数： 就是计算一张图片的distance向量  计算结果会产生[1,128]数组 
42 | 
43 | 
44 |       3、cal_dist（）函数： 计算两个数组的方差和 ，是根据这个结果来衡量两张图片的相似度
45 |       
46 |       
47 |       4、saver_data_to_csv(array,label='lijie2',csv_dir='./data/data.csv'):将采集的图片放到 csv 文件中 label 作为该数据的标签 后面可以通过标签来判别识别的人脸的属性
48 |       
49 |       
50 |       5、cal_dist_from_csv（）：计算 实时采集的图片 与 已经存在csv的数据对比 ，输出结果是 该实时图片与CSV文件中相似的 图片的distance 和标签
51 | 
52 | 
53 | 
54 | 第三步：collect_frame_to_csv.py实时检测人脸，采集人脸数据到csv文件中去  也就是存储用户的人脸信息（faceNet + opencv ）
55 | 
56 |       具体实现方法 在程序中注释的很详细  可以直接运行使用
57 |       
58 |       
59 |       
60 | 第四步：realtime_detect_face_and_recognition.py
61 | 
62 |       把相关的文件传放到必要的位置上可以直接执行 实现人脸的属性检测 并分类
63 |       
64 |       
65 |       
66 |       
67 | 备注：
68 | 由于20170512-110547.pb 这个文件大于25M  没法在线上传 ，需要自己下载  放到 model 文件夹下 
69 | 
70 |         （1）将所有的文件放到一个工程中  执行collect_frame_to_csv.py 文件   ，这个就会采集人脸数据  会让你输入标签   标签如果已有 就会覆盖掉以前的数据
71 |         
72 |         （2）数据采集完后 ，直接执行realtime_detect_face_and_recognition.py  文件  会咨询你是否 采集 数据  如果上一步做了  就 输入n
73 |         
74 |           接着会问你 是否进行 detect   输入y   就可以实现人脸检测  和 标注   
75 |           如果 检测的人脸在不能与csv文件中匹配 就会显示 others
76 |           
77 | 
78 | 
79 |   
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/cal_128XVector_user_facenet.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import cv2
  5 | import facenet
  6 | import pandas as pd
  7 | 
  8 | def build_facenet_model(modir='./model/20170512-110547.pb'):
  9 |     tf.Graph().as_default()
 10 |     sess=tf.Session()
 11 |     facenet.load_model(modir)
 12 | 
 13 |     images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
 14 |     embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
 15 |     phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
 16 | 
 17 |     print('facenet embedding模型建立完毕')
 18 |     return sess, images_placeholder, phase_train_placeholder, embeddings
 19 | 
 20 | #通过facenet模型 计算图片的128向量
 21 | def cal_128_vector(frame,sess, images_placeholder, phase_train_placeholder, embeddings):
 22 |     scaled_reshape=[]
 23 |     embeddings_size=embeddings.get_shape()[1]
 24 |     #frame=cv2.imread(frame)#如果fram是路径的话 去掉注释
 25 |     frame=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
 26 |     image=cv2.resize(frame,(200,200))
 27 |     image=facenet.prewhiten(image)
 28 |     scaled_reshape.append(image.reshape(-1,200,200,3))
 29 |     array=np.zeros((1,embeddings_size))
 30 |     array[0,:]=sess.run(embeddings,feed_dict={images_placeholder: scaled_reshape[0],
 31 |                                                     phase_train_placeholder: False })[0]
 32 | 
 33 |     return array
 34 | 
 35 | def cal_dist(array0,array1):
 36 |     dist = np.sqrt(np.sum(np.square(array0[0] - array1[0])))
 37 |     return dist
 38 | 
 39 | #将128向量保存到csv文件中去
 40 | def saver_data_to_csv(array,label='lijie2',csv_dir='./data/data.csv'):
 41 |     # data1=DataFrame(array,index=None,columns=[label])
 42 |     # data1.to_csv(csv_dir)
 43 |     array=array[0,:]
 44 |     info=pd.read_csv(csv_dir)
 45 |     #print(info.shape)
 46 |     info[label]=array
 47 |     #print(info.shape)
 48 |     info.to_csv(csv_dir,index=None)
 49 |     return info
 50 | 
 51 | #计算两个数组之间的距离  并返回距离最小值  和 对应的标签
 52 | def cal_dist_from_csv(csv_dir,array):
 53 |     array1=array[0,:]
 54 |     final_column='others'#如果不满足最小距离  返回 others
 55 |     pre_dist=1
 56 |     info=pd.read_csv(csv_dir)
 57 |     #print(info.head(0))
 58 |     for i,column in enumerate(info.head(0)):
 59 |         array2=info[column]
 60 |         dist=cal_dist(array1,array2)
 61 |         if dist<pre_dist and dist<=0.5: #判断最相似的人脸
 62 |             pre_dist=dist
 63 |             final_column=column
 64 |     #print("final_column:",final_column)
 65 |     #print("final_dist:", pre_dist)
 66 |     return  pre_dist,final_column
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 | if __name__=="__main__":
 80 | 
 81 |     image_name0 = './picture/1.jpg'  # change to your image name
 82 |     image_name1 = './picture/0.jpg'  # change to your image name
 83 | 
 84 |     #调用facenet模型
 85 |     sess, images_placeholder, phase_train_placeholder, embeddings=build_facenet_model()
 86 |     # 计算128 向量
 87 |     array0 = cal_128_vector(image_name0, sess, images_placeholder, phase_train_placeholder, embeddings)
 88 |    # print("the resulrt :", array0)
 89 | 
 90 |     #计算128 向量
 91 |     array1=cal_128_vector(image_name1,sess, images_placeholder, phase_train_placeholder, embeddings)
 92 |     #print("the resulrt :",array1)
 93 |     dist=cal_dist(array0,array1)
 94 |     print("dist:",dist)
 95 |     print("the array0 shape:",array0.shape)
 96 | 
 97 |     csv_dir = './data/data.csv'
 98 |     #saver_data_to_csv(array0,label='wangzheng',csv_dir)  #保存图片的128向量数据到 csv文件中去
 99 |     dist,column=cal_dist_from_csv(csv_dir,array1)       #计算array1 与csv文件中距离最近的距离 并返回对应标签
100 |     print("dist is :%.2f"%(dist))
101 |     print("the name is :",column)
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/collect_frame_to_csv.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import detect_face
 3 | import cv2
 4 | from cal_128XVector_user_facenet import cal_128_vector,saver_data_to_csv,build_facenet_model
 5 | 
 6 | 
 7 | #实时监测人脸  取出人脸的ROI
 8 | def collect_frame():
 9 |     minsize = 20
10 |     threshold = [0.6, 0.7, 0.7]
11 |     factor = 0.709  # scale factor
12 |     image=None
13 |     #########################build mtcnn########################
14 |     with tf.Graph().as_default():
15 |         sess = tf.Session()
16 |         with sess.as_default():
17 |             pnet, rnet, onet = detect_face.create_mtcnn(sess, './model/')
18 |     capture = cv2.VideoCapture(0)
19 |     while (capture.isOpened()):
20 |         ret, frame = capture.read()
21 |         bounding_box, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
22 | 
23 |         nb_faces = bounding_box.shape[0]  # 人脸检测的个数
24 |         # 标记人脸
25 |         for face_position in bounding_box:
26 |             rect = face_position.astype(int)
27 |             image=frame[rect[1]-20:rect[3]+20,rect[0]-20:rect[2]+20]
28 |             cv2.imshow('output',image)
29 |             # 矩形框
30 |             cv2.rectangle(frame, (rect[0], rect[1]), (rect[2], rect[3]), (0, 255, 255), 2, 1)
31 |             #cv2.circle(frame,(rect[0], rect[1]),2,(0,0,255),2,1)
32 |             cv2.putText(frame, "faces:%d" % (nb_faces), (10, 20), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 255), 4)
33 | 
34 |         cv2.imshow('Video', frame)
35 |         if cv2.waitKey(1) & 0xff == 27:
36 |             break
37 |     capture.release()
38 |     cv2.destroyAllWindows()
39 |     return image
40 | 
41 | #图片生成128向量  保存到 csv文件中去
42 | def image_to_csv(image,label='name',csv_dir='./model/data.csv'):
43 |     #调用facenet 模型
44 |     sess, images_placeholder, phase_train_placeholder, embeddings = build_facenet_model()
45 |     #计算128 vector
46 |     array = cal_128_vector(image, sess, images_placeholder, phase_train_placeholder, embeddings)
47 |     saver_data_to_csv(array,label,csv_dir)
48 | 
49 | def collect_frame_to_csv(csv_dir = './data/data.csv'):
50 |     choos=input("weither collect picture or not(y/n)")
51 |     if choos=='y':
52 |         name = input("please input your nmae:")
53 |         image = collect_frame()
54 |         image_to_csv(image, label=name, csv_dir=csv_dir)
55 |     else:
56 |         print('go next steps...')
57 | 
58 | 
59 | 
60 | if __name__=="__main__":
61 | 
62 |     collect_frame_to_csv()
63 | 


--------------------------------------------------------------------------------
/data/data.csv:
--------------------------------------------------------------------------------
  1 | lijie
  2 | 0.035077792
  3 | -0.08229129
  4 | -0.176478729
  5 | -0.053235184
  6 | 0.05042208
  7 | -0.027102327
  8 | -0.031985965
  9 | -0.028766789
 10 | 0.005679792
 11 | -0.145161346
 12 | -0.065255031
 13 | 0.025574913
 14 | -0.092570044
 15 | -0.109059975
 16 | 0.016842794
 17 | 0.013458521
 18 | 0.02038086
 19 | 0.029658951
 20 | 0.082041487
 21 | -0.077551879
 22 | -0.09119992
 23 | 0.033147186
 24 | -0.184123635
 25 | -0.014264806
 26 | -0.016903829
 27 | 0.084908709
 28 | -0.00385992
 29 | 0.133748263
 30 | -0.006017634
 31 | 0.070134044
 32 | 0.091110356
 33 | 0.092162922
 34 | -0.043238632
 35 | -0.076087311
 36 | -0.063780792
 37 | 0.008034321
 38 | -0.1249419
 39 | 0.016178835
 40 | 0.186321974
 41 | -0.000535543
 42 | -0.038965151
 43 | 0.046696689
 44 | 0.016071523
 45 | -0.147031367
 46 | -0.023046255
 47 | -0.024708558
 48 | -0.015273484
 49 | 0.00527145
 50 | -0.104167148
 51 | 0.110124364
 52 | 0.035463236
 53 | 0.116340116
 54 | 0.145900443
 55 | -0.028703876
 56 | 0.107447654
 57 | 0.109112956
 58 | -0.083665825
 59 | -0.032554839
 60 | 0.005953964
 61 | -0.041905984
 62 | -0.095981948
 63 | -0.097451828
 64 | 0.010108011
 65 | 0.174066275
 66 | 0.155018494
 67 | 0.103951365
 68 | 0.059142731
 69 | 0.001896149
 70 | -0.067041144
 71 | -0.046835564
 72 | 0.112860397
 73 | -0.111315973
 74 | 0.120196141
 75 | -0.111265719
 76 | -0.025671
 77 | 0.072668113
 78 | -0.112411655
 79 | 0.124241248
 80 | -0.050135404
 81 | -0.060856942
 82 | -0.061305065
 83 | 0.045855414
 84 | -0.015720278
 85 | 0.091234066
 86 | 0.065983891
 87 | -0.070499428
 88 | -0.010835051
 89 | -0.017513333
 90 | -0.084395505
 91 | 0.0801486
 92 | 0.216129333
 93 | 0.009324871
 94 | -0.15118356
 95 | -0.059824403
 96 | -0.03517957
 97 | 0.03397584
 98 | -0.055923611
 99 | -0.073259428
100 | -0.100260228
101 | -0.01234051
102 | -0.06314905
103 | 0.094573446
104 | 0.171167046
105 | -0.097593658
106 | -0.07095138
107 | -0.082312137
108 | 0.02111244
109 | 0.059140414
110 | -0.001143287
111 | -0.113210551
112 | 0.001487855
113 | 0.086333856
114 | 0.041258089
115 | 0.046350293
116 | 0.16955556
117 | -0.01717958
118 | -0.013319869
119 | 0.279518783
120 | 0.108523272
121 | 0.005068701
122 | 0.030118333
123 | 0.019935956
124 | 0.208756953
125 | 0.055404816
126 | 0.043919966
127 | -0.188059539
128 | -0.009527138
129 | -0.064772673
130 | 


--------------------------------------------------------------------------------
/detect_face.py:
--------------------------------------------------------------------------------
  1 | """ Tensorflow implementation of the face detection / alignment algorithm found at
  2 | https://github.com/kpzhang93/MTCNN_face_detection_alignment
  3 | """
  4 | # MIT License
  5 | #
  6 | # Copyright (c) 2016 David Sandberg
  7 | #
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | #
 15 | # The above copyright notice and this permission notice shall be included in all
 16 | # copies or substantial portions of the Software.
 17 | #
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24 | # SOFTWARE.
 25 | 
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | 
 30 | import numpy as np
 31 | import tensorflow as tf
 32 | #from math import floor
 33 | import cv2
 34 | import os
 35 | 
 36 | def layer(op):
 37 |     '''Decorator for composable network layers.'''
 38 | 
 39 |     def layer_decorated(self, *args, **kwargs):
 40 |         # Automatically set a name if not provided.
 41 |         name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
 42 |         # Figure out the layer inputs.
 43 |         if len(self.terminals) == 0:
 44 |             raise RuntimeError('No input variables found for layer %s.' % name)
 45 |         elif len(self.terminals) == 1:
 46 |             layer_input = self.terminals[0]
 47 |         else:
 48 |             layer_input = list(self.terminals)
 49 |         # Perform the operation and get the output.
 50 |         layer_output = op(self, layer_input, *args, **kwargs)
 51 |         # Add to layer LUT.
 52 |         self.layers[name] = layer_output
 53 |         # This output is now the input for the next layer.
 54 |         self.feed(layer_output)
 55 |         # Return self for chained calls.
 56 |         return self
 57 | 
 58 |     return layer_decorated
 59 | 
 60 | class Network(object):
 61 | 
 62 |     def __init__(self, inputs, trainable=True):
 63 |         # The input nodes for this network
 64 |         self.inputs = inputs
 65 |         # The current list of terminal nodes
 66 |         self.terminals = []
 67 |         # Mapping from layer names to layers
 68 |         self.layers = dict(inputs)
 69 |         # If true, the resulting variables are set as trainable
 70 |         self.trainable = trainable
 71 | 
 72 |         self.setup()
 73 | 
 74 |     def setup(self):
 75 |         '''Construct the network. '''
 76 |         raise NotImplementedError('Must be implemented by the subclass.')
 77 | 
 78 |     def load(self, data_path, session, ignore_missing=False):
 79 |         '''Load network weights.
 80 |         data_path: The path to the numpy-serialized network weights
 81 |         session: The current TensorFlow session
 82 |         ignore_missing: If true, serialized weights for missing layers are ignored.
 83 |         '''
 84 |         #data_dict = np.load(data_path).item() #pylint: disable=no-member
 85 |         data_dict=np.load(data_path,encoding='latin1').item()
 86 |         for op_name in data_dict:
 87 |             with tf.variable_scope(op_name, reuse=True):
 88 |                 for param_name, data in data_dict[op_name].items():
 89 |                     try:
 90 |                         var = tf.get_variable(param_name)
 91 |                         session.run(var.assign(data))
 92 |                     except ValueError:
 93 |                         if not ignore_missing:
 94 |                             raise
 95 | 
 96 |     def feed(self, *args):
 97 |         '''Set the input(s) for the next operation by replacing the terminal nodes.
 98 |         The arguments can be either layer names or the actual layers.
 99 |         '''
100 |         assert len(args) != 0
101 |         self.terminals = []
102 |         for fed_layer in args:
103 |             if isinstance(fed_layer, str):
104 |                 try:
105 |                     fed_layer = self.layers[fed_layer]
106 |                 except KeyError:
107 |                     raise KeyError('Unknown layer name fed: %s' % fed_layer)
108 |             self.terminals.append(fed_layer)
109 |         return self
110 | 
111 |     def get_output(self):
112 |         '''Returns the current network output.'''
113 |         return self.terminals[-1]
114 | 
115 |     def get_unique_name(self, prefix):
116 |         '''Returns an index-suffixed unique name for the given prefix.
117 |         This is used for auto-generating layer names based on the type-prefix.
118 |         '''
119 |         ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
120 |         return '%s_%d' % (prefix, ident)
121 | 
122 |     def make_var(self, name, shape):
123 |         '''Creates a new TensorFlow variable.'''
124 |         return tf.get_variable(name, shape, trainable=self.trainable)
125 | 
126 |     def validate_padding(self, padding):
127 |         '''Verifies that the padding is one of the supported ones.'''
128 |         assert padding in ('SAME', 'VALID')
129 | 
130 |     @layer
131 |     def conv(self,
132 |              inp,
133 |              k_h,
134 |              k_w,
135 |              c_o,
136 |              s_h,
137 |              s_w,
138 |              name,
139 |              relu=True,
140 |              padding='SAME',
141 |              group=1,
142 |              biased=True):
143 |         # Verify that the padding is acceptable
144 |         self.validate_padding(padding)
145 |         # Get the number of channels in the input
146 |         c_i = inp.get_shape()[-1]
147 |         # Verify that the grouping parameter is valid
148 |         assert c_i % group == 0
149 |         assert c_o % group == 0
150 |         # Convolution for a given input and kernel
151 |         convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
152 |         with tf.variable_scope(name) as scope:
153 |             kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
154 |             # This is the common-case. Convolve the input without any further complications.
155 |             output = convolve(inp, kernel)
156 |             # Add the biases
157 |             if biased:
158 |                 biases = self.make_var('biases', [c_o])
159 |                 output = tf.nn.bias_add(output, biases)
160 |             if relu:
161 |                 # ReLU non-linearity
162 |                 output = tf.nn.relu(output, name=scope.name)
163 |             return output
164 | 
165 |     @layer
166 |     def prelu(self, inp, name):
167 |         with tf.variable_scope(name):
168 |             i = inp.get_shape().as_list()
169 |             alpha = self.make_var('alpha', shape=(i[-1]))
170 |             output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
171 |         return output
172 | 
173 |     @layer
174 |     def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
175 |         self.validate_padding(padding)
176 |         return tf.nn.max_pool(inp,
177 |                               ksize=[1, k_h, k_w, 1],
178 |                               strides=[1, s_h, s_w, 1],
179 |                               padding=padding,
180 |                               name=name)
181 | 
182 |     @layer
183 |     def fc(self, inp, num_out, name, relu=True):
184 |         with tf.variable_scope(name):
185 |             input_shape = inp.get_shape()
186 |             if input_shape.ndims == 4:
187 |                 # The input is spatial. Vectorize it first.
188 |                 dim = 1
189 |                 for d in input_shape[1:].as_list():
190 |                     dim *= d
191 |                 feed_in = tf.reshape(inp, [-1, dim])
192 |             else:
193 |                 feed_in, dim = (inp, input_shape[-1].value)
194 |             weights = self.make_var('weights', shape=[dim, num_out])
195 |             biases = self.make_var('biases', [num_out])
196 |             op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
197 |             fc = op(feed_in, weights, biases, name=name)
198 |             return fc
199 | 
200 | 
201 |     """
202 |     Multi dimensional softmax,
203 |     refer to https://github.com/tensorflow/tensorflow/issues/210
204 |     compute softmax along the dimension of target
205 |     the native softmax only supports batch_size x dimension
206 |     """
207 |     @layer
208 |     def softmax(self, target, axis, name=None):
209 |         max_axis = tf.reduce_max(target, axis, keepdims=True)
210 |         target_exp = tf.exp(target-max_axis)
211 |         normalize = tf.reduce_sum(target_exp, axis, keepdims=True)
212 |         softmax = tf.div(target_exp, normalize, name)
213 |         return softmax
214 | 
215 | class PNet(Network):
216 |     def setup(self):
217 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
218 |              .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
219 |              .prelu(name='PReLU1')
220 |              .max_pool(2, 2, 2, 2, name='pool1')
221 |              .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
222 |              .prelu(name='PReLU2')
223 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
224 |              .prelu(name='PReLU3')
225 |              .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
226 |              .softmax(3,name='prob1'))
227 | 
228 |         (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
229 |              .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
230 | 
231 | class RNet(Network):
232 |     def setup(self):
233 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
234 |              .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
235 |              .prelu(name='prelu1')
236 |              .max_pool(3, 3, 2, 2, name='pool1')
237 |              .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
238 |              .prelu(name='prelu2')
239 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
240 |              .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
241 |              .prelu(name='prelu3')
242 |              .fc(128, relu=False, name='conv4')
243 |              .prelu(name='prelu4')
244 |              .fc(2, relu=False, name='conv5-1')
245 |              .softmax(1,name='prob1'))
246 | 
247 |         (self.feed('prelu4') #pylint: disable=no-value-for-parameter
248 |              .fc(4, relu=False, name='conv5-2'))
249 | 
250 | class ONet(Network):
251 |     def setup(self):
252 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
253 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
254 |              .prelu(name='prelu1')
255 |              .max_pool(3, 3, 2, 2, name='pool1')
256 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
257 |              .prelu(name='prelu2')
258 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
259 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
260 |              .prelu(name='prelu3')
261 |              .max_pool(2, 2, 2, 2, name='pool3')
262 |              .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
263 |              .prelu(name='prelu4')
264 |              .fc(256, relu=False, name='conv5')
265 |              .prelu(name='prelu5')
266 |              .fc(2, relu=False, name='conv6-1')
267 |              .softmax(1, name='prob1'))
268 | 
269 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
270 |              .fc(4, relu=False, name='conv6-2'))
271 | 
272 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
273 |              .fc(10, relu=False, name='conv6-3'))
274 | 
275 | def create_mtcnn(sess, model_path):
276 |     with tf.variable_scope('pnet'):
277 |         data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
278 |         pnet = PNet({'data':data})
279 |         pnet.load(os.path.join(model_path, 'det1.npy'), sess)
280 |     with tf.variable_scope('rnet'):
281 |         data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
282 |         rnet = RNet({'data':data})
283 |         rnet.load(os.path.join(model_path, 'det2.npy'), sess)
284 |     with tf.variable_scope('onet'):
285 |         data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
286 |         onet = ONet({'data':data})
287 |         onet.load(os.path.join(model_path, 'det3.npy'), sess)
288 | 
289 |     pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
290 |     rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
291 |     onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
292 |     return pnet_fun, rnet_fun, onet_fun
293 | 
294 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
295 |     # im: input image
296 |     # minsize: minimum of faces' size
297 |     # pnet, rnet, onet: caffemodel
298 |     # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold
299 |     # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true
300 |     factor_count=0
301 |     total_boxes=np.empty((0,9))
302 |     points=[]
303 |     h=img.shape[0]
304 |     w=img.shape[1]
305 |     minl=np.amin([h, w])
306 |     m=12.0/minsize
307 |     minl=minl*m
308 |     # creat scale pyramid
309 |     scales=[]
310 |     while minl>=12:
311 |         scales += [m*np.power(factor, factor_count)]
312 |         minl = minl*factor
313 |         factor_count += 1
314 | 
315 |     # first stage
316 |     for j in range(len(scales)):
317 |         scale=scales[j]
318 |         hs=int(np.ceil(h*scale))
319 |         ws=int(np.ceil(w*scale))
320 |         im_data = imresample(img, (hs, ws))
321 |         im_data = (im_data-127.5)*0.0078125
322 |         img_x = np.expand_dims(im_data, 0)
323 |         img_y = np.transpose(img_x, (0,2,1,3))
324 |         out = pnet(img_y)
325 |         out0 = np.transpose(out[0], (0,2,1,3))
326 |         out1 = np.transpose(out[1], (0,2,1,3))
327 | 
328 |         boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
329 | 
330 |         # inter-scale nms
331 |         pick = nms(boxes.copy(), 0.5, 'Union')
332 |         if boxes.size>0 and pick.size>0:
333 |             boxes = boxes[pick,:]
334 |             total_boxes = np.append(total_boxes, boxes, axis=0)
335 | 
336 |     numbox = total_boxes.shape[0]
337 |     if numbox>0:
338 |         pick = nms(total_boxes.copy(), 0.7, 'Union')
339 |         total_boxes = total_boxes[pick,:]
340 |         regw = total_boxes[:,2]-total_boxes[:,0]
341 |         regh = total_boxes[:,3]-total_boxes[:,1]
342 |         qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
343 |         qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
344 |         qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
345 |         qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
346 |         total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
347 |         total_boxes = rerec(total_boxes.copy())
348 |         total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
349 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
350 | 
351 |     numbox = total_boxes.shape[0]
352 |     if numbox>0:
353 |         # second stage
354 |         tempimg = np.zeros((24,24,3,numbox))
355 |         for k in range(0,numbox):
356 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
357 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
358 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
359 |                 tempimg[:,:,:,k] = imresample(tmp, (24, 24))
360 |             else:
361 |                 return np.empty()
362 |         tempimg = (tempimg-127.5)*0.0078125
363 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
364 |         out = rnet(tempimg1)
365 |         out0 = np.transpose(out[0])
366 |         out1 = np.transpose(out[1])
367 |         score = out1[1,:]
368 |         ipass = np.where(score>threshold[1])
369 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
370 |         mv = out0[:,ipass[0]]
371 |         if total_boxes.shape[0]>0:
372 |             pick = nms(total_boxes, 0.7, 'Union')
373 |             total_boxes = total_boxes[pick,:]
374 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
375 |             total_boxes = rerec(total_boxes.copy())
376 | 
377 |     numbox = total_boxes.shape[0]
378 |     if numbox>0:
379 |         # third stage
380 |         total_boxes = np.fix(total_boxes).astype(np.int32)
381 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
382 |         tempimg = np.zeros((48,48,3,numbox))
383 |         for k in range(0,numbox):
384 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
385 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
386 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
387 |                 tempimg[:,:,:,k] = imresample(tmp, (48, 48))
388 |             else:
389 |                 return np.empty()
390 |         tempimg = (tempimg-127.5)*0.0078125
391 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
392 |         out = onet(tempimg1)
393 |         out0 = np.transpose(out[0])
394 |         out1 = np.transpose(out[1])
395 |         out2 = np.transpose(out[2])
396 |         score = out2[1,:]
397 |         points = out1
398 |         ipass = np.where(score>threshold[2])
399 |         points = points[:,ipass[0]]
400 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
401 |         mv = out0[:,ipass[0]]
402 | 
403 |         w = total_boxes[:,2]-total_boxes[:,0]+1
404 |         h = total_boxes[:,3]-total_boxes[:,1]+1
405 |         points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
406 |         points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
407 |         if total_boxes.shape[0]>0:
408 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
409 |             pick = nms(total_boxes.copy(), 0.7, 'Min')
410 |             total_boxes = total_boxes[pick,:]
411 |             points = points[:,pick]
412 | 
413 |     return total_boxes, points
414 | 
415 | 
416 | # function [boundingbox] = bbreg(boundingbox,reg)
417 | def bbreg(boundingbox,reg):
418 |     # calibrate bounding boxes
419 |     if reg.shape[1]==1:
420 |         reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
421 | 
422 |     w = boundingbox[:,2]-boundingbox[:,0]+1
423 |     h = boundingbox[:,3]-boundingbox[:,1]+1
424 |     b1 = boundingbox[:,0]+reg[:,0]*w
425 |     b2 = boundingbox[:,1]+reg[:,1]*h
426 |     b3 = boundingbox[:,2]+reg[:,2]*w
427 |     b4 = boundingbox[:,3]+reg[:,3]*h
428 |     boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
429 |     return boundingbox
430 | 
431 | def generateBoundingBox(imap, reg, scale, t):
432 |     # use heatmap to generate bounding boxes
433 |     stride=2
434 |     cellsize=12
435 | 
436 |     imap = np.transpose(imap)
437 |     dx1 = np.transpose(reg[:,:,0])
438 |     dy1 = np.transpose(reg[:,:,1])
439 |     dx2 = np.transpose(reg[:,:,2])
440 |     dy2 = np.transpose(reg[:,:,3])
441 |     y, x = np.where(imap >= t)
442 |     if y.shape[0]==1:
443 |         dx1 = np.flipud(dx1)
444 |         dy1 = np.flipud(dy1)
445 |         dx2 = np.flipud(dx2)
446 |         dy2 = np.flipud(dy2)
447 |     score = imap[(y,x)]
448 |     reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
449 |     if reg.size==0:
450 |         reg = np.empty((0,3))
451 |     bb = np.transpose(np.vstack([y,x]))
452 |     q1 = np.fix((stride*bb+1)/scale)
453 |     q2 = np.fix((stride*bb+cellsize-1+1)/scale)
454 |     boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
455 |     return boundingbox, reg
456 | 
457 | # function pick = nms(boxes,threshold,type)
458 | def nms(boxes, threshold, method):
459 |     if boxes.size==0:
460 |         return np.empty((0,3))
461 |     x1 = boxes[:,0]
462 |     y1 = boxes[:,1]
463 |     x2 = boxes[:,2]
464 |     y2 = boxes[:,3]
465 |     s = boxes[:,4]
466 |     area = (x2-x1+1) * (y2-y1+1)
467 |     I = np.argsort(s)
468 |     pick = np.zeros_like(s, dtype=np.int16)
469 |     counter = 0
470 |     while I.size>0:
471 |         i = I[-1]
472 |         pick[counter] = i
473 |         counter += 1
474 |         idx = I[0:-1]
475 |         xx1 = np.maximum(x1[i], x1[idx])
476 |         yy1 = np.maximum(y1[i], y1[idx])
477 |         xx2 = np.minimum(x2[i], x2[idx])
478 |         yy2 = np.minimum(y2[i], y2[idx])
479 |         w = np.maximum(0.0, xx2-xx1+1)
480 |         h = np.maximum(0.0, yy2-yy1+1)
481 |         inter = w * h
482 |         if method is 'Min':
483 |             o = inter / np.minimum(area[i], area[idx])
484 |         else:
485 |             o = inter / (area[i] + area[idx] - inter)
486 |         I = I[np.where(o<=threshold)]
487 |     pick = pick[0:counter]
488 |     return pick
489 | 
490 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
491 | def pad(total_boxes, w, h):
492 |     # compute the padding coordinates (pad the bounding boxes to square)
493 |     tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
494 |     tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
495 |     numbox = total_boxes.shape[0]
496 | 
497 |     dx = np.ones((numbox), dtype=np.int32)
498 |     dy = np.ones((numbox), dtype=np.int32)
499 |     edx = tmpw.copy().astype(np.int32)
500 |     edy = tmph.copy().astype(np.int32)
501 | 
502 |     x = total_boxes[:,0].copy().astype(np.int32)
503 |     y = total_boxes[:,1].copy().astype(np.int32)
504 |     ex = total_boxes[:,2].copy().astype(np.int32)
505 |     ey = total_boxes[:,3].copy().astype(np.int32)
506 | 
507 |     tmp = np.where(ex>w)
508 |     edx[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],0)
509 |     ex[tmp] = w
510 | 
511 |     tmp = np.where(ey>h)
512 |     edy[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],0)
513 |     ey[tmp] = h
514 | 
515 |     tmp = np.where(x<1)
516 |     dx[tmp] = np.expand_dims(2-x[tmp],0)
517 |     x[tmp] = 1
518 | 
519 |     tmp = np.where(y<1)
520 |     dy[tmp] = np.expand_dims(2-y[tmp],0)
521 |     y[tmp] = 1
522 | 
523 |     return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
524 | 
525 | # function [bboxA] = rerec(bboxA)
526 | def rerec(bboxA):
527 |     # convert bboxA to square
528 |     h = bboxA[:,3]-bboxA[:,1]
529 |     w = bboxA[:,2]-bboxA[:,0]
530 |     l = np.maximum(w, h)
531 |     bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
532 |     bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
533 |     bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
534 |     return bboxA
535 | 
536 | def imresample(img, sz):
537 |     im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #pylint: disable=no-member
538 |     return im_data
539 | 
540 |     # This method is kept for debugging purpose
541 | #     h=img.shape[0]
542 | #     w=img.shape[1]
543 | #     hs, ws = sz
544 | #     dx = float(w) / ws
545 | #     dy = float(h) / hs
546 | #     im_data = np.zeros((hs,ws,3))
547 | #     for a1 in range(0,hs):
548 | #         for a2 in range(0,ws):
549 | #             for a3 in range(0,3):
550 | #                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
551 | #     return im_data
552 | 
553 | 


--------------------------------------------------------------------------------
/face_detector_MTcnn.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import cv2
 4 | import detect_face
 5 | 
 6 | 
 7 | 
 8 | if __name__=="__main__":
 9 |     image_size=200
10 |     minsize=20
11 |     threshold=[0.6,0.7,0.7]
12 |     factor = 0.709  # scale factor
13 |     print("Creating MTcnn networks and load paramenters..")
14 |     #########################build mtcnn########################
15 |     with tf.Graph().as_default():
16 |         sess=tf.Session()
17 |         with sess.as_default():
18 |             pnet,rnet,onet=detect_face.create_mtcnn(sess,'./model/')
19 | 
20 |     capture=cv2.VideoCapture(0)
21 |     while(capture.isOpened()):
22 |         ret,frame=capture.read()
23 |         bounding_box,_=detect_face.detect_face(frame,minsize,pnet,rnet,onet,threshold,factor)
24 | 
25 |         nb_faces=bounding_box.shape[0]#人脸检测的个数
26 |         #标记人脸
27 |         for face_position in bounding_box:
28 |             rect=face_position.astype(int)
29 |             #矩形框
30 |             cv2.rectangle(frame,(rect[0],rect[1]),(rect[2],rect[3]),(0,255,255),2,1)
31 |             cv2.putText(frame,"faces:%d"%(nb_faces),(10,20),cv2.FONT_HERSHEY_COMPLEX,1,(255, 0, 255), 4)
32 | 
33 | 
34 | 
35 |         cv2.imshow('Video',frame)
36 |         if cv2.waitKey(1)& 0xff==27:
37 |             break
38 |     capture.release()
39 |     cv2.destroyAllWindows()
40 | 
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/facenet.py:
--------------------------------------------------------------------------------
  1 | """Functions for building the face recognition network.
  2 | """
  3 | # MIT License
  4 | #
  5 | # Copyright (c) 2016 David Sandberg
  6 | #
  7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | # of this software and associated documentation files (the "Software"), to deal
  9 | # in the Software without restriction, including without limitation the rights
 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | # copies of the Software, and to permit persons to whom the Software is
 12 | # furnished to do so, subject to the following conditions:
 13 | #
 14 | # The above copyright notice and this permission notice shall be included in all
 15 | # copies or substantial portions of the Software.
 16 | #
 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 | # SOFTWARE.
 24 | 
 25 | # pylint: disable=missing-docstring
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | 
 30 | import os
 31 | from subprocess import Popen, PIPE
 32 | import tensorflow as tf
 33 | from tensorflow.python.framework import ops
 34 | import numpy as np
 35 | from scipy import misc
 36 | from sklearn.model_selection import KFold
 37 | from scipy import interpolate
 38 | from tensorflow.python.training import training
 39 | import random
 40 | import re
 41 | from tensorflow.python.platform import gfile
 42 | from six import iteritems
 43 | 
 44 | 
 45 | def triplet_loss(anchor, positive, negative, alpha):
 46 |     """Calculate the triplet loss according to the FaceNet paper
 47 | 
 48 |     Args:
 49 |       anchor: the embeddings for the anchor images.
 50 |       positive: the embeddings for the positive images.
 51 |       negative: the embeddings for the negative images.
 52 | 
 53 |     Returns:
 54 |       the triplet loss according to the FaceNet paper as a float tensor.
 55 |     """
 56 |     with tf.variable_scope('triplet_loss'):
 57 |         pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
 58 |         neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
 59 | 
 60 |         basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)
 61 |         loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
 62 | 
 63 |     return loss
 64 | 
 65 | 
 66 | def decov_loss(xs):
 67 |     """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf
 68 |     'Reducing Overfitting In Deep Networks by Decorrelating Representation'
 69 |     """
 70 |     x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
 71 |     m = tf.reduce_mean(x, 0, True)
 72 |     z = tf.expand_dims(x - m, 2)
 73 |     corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0, 2, 1])), 0)
 74 |     corr_frob_sqr = tf.reduce_sum(tf.square(corr))
 75 |     corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
 76 |     loss = 0.5 * (corr_frob_sqr - corr_diag_sqr)
 77 |     return loss
 78 | 
 79 | 
 80 | def center_loss(features, label, alfa, nrof_classes):
 81 |     """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
 82 |        (http://ydwen.github.io/papers/WenECCV16.pdf)
 83 |     """
 84 |     nrof_features = features.get_shape()[1]
 85 |     centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
 86 |                               initializer=tf.constant_initializer(0), trainable=False)
 87 |     label = tf.reshape(label, [-1])
 88 |     centers_batch = tf.gather(centers, label)
 89 |     diff = (1 - alfa) * (centers_batch - features)
 90 |     centers = tf.scatter_sub(centers, label, diff)
 91 |     loss = tf.reduce_mean(tf.square(features - centers_batch))
 92 |     return loss, centers
 93 | 
 94 | 
 95 | def get_image_paths_and_labels(dataset):
 96 |     image_paths_flat = []
 97 |     labels_flat = []
 98 |     for i in range(len(dataset)):
 99 |         image_paths_flat += dataset[i].image_paths
100 |         labels_flat += [i] * len(dataset[i].image_paths)
101 |     return image_paths_flat, labels_flat
102 | 
103 | 
104 | def shuffle_examples(image_paths, labels):
105 |     shuffle_list = list(zip(image_paths, labels))
106 |     random.shuffle(shuffle_list)
107 |     image_paths_shuff, labels_shuff = zip(*shuffle_list)
108 |     return image_paths_shuff, labels_shuff
109 | 
110 | 
111 | def read_images_from_disk(input_queue):
112 |     """Consumes a single filename and label as a ' '-delimited string.
113 |     Args:
114 |       filename_and_label_tensor: A scalar string tensor.
115 |     Returns:
116 |       Two tensors: the decoded image, and the string label.
117 |     """
118 |     label = input_queue[1]
119 |     file_contents = tf.read_file(input_queue[0])
120 |     example = tf.image.decode_image(file_contents, channels=3)
121 |     return example, label
122 | 
123 | 
124 | def random_rotate_image(image):
125 |     angle = np.random.uniform(low=-10.0, high=10.0)
126 |     return misc.imrotate(image, angle, 'bicubic')
127 | 
128 | 
129 | def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs,
130 |                           random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True):
131 |     images = ops.convert_to_tensor(image_list, dtype=tf.string)
132 |     labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
133 | 
134 |     # Makes an input queue
135 |     input_queue = tf.train.slice_input_producer([images, labels],
136 |                                                 num_epochs=max_nrof_epochs, shuffle=shuffle)
137 | 
138 |     images_and_labels = []
139 |     for _ in range(nrof_preprocess_threads):
140 |         image, label = read_images_from_disk(input_queue)
141 |         if random_rotate:
142 |             image = tf.py_func(random_rotate_image, [image], tf.uint8)
143 |         if random_crop:
144 |             image = tf.random_crop(image, [image_size, image_size, 3])
145 |         else:
146 |             image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size)
147 |         if random_flip:
148 |             image = tf.image.random_flip_left_right(image)
149 |         # pylint: disable=no-member
150 |         image.set_shape((image_size, image_size, 3))
151 |         image = tf.image.per_image_standardization(image)
152 |         images_and_labels.append([image, label])
153 | 
154 |     image_batch, label_batch = tf.train.batch_join(
155 |         images_and_labels, batch_size=batch_size,
156 |         capacity=4 * nrof_preprocess_threads * batch_size,
157 |         allow_smaller_final_batch=True)
158 | 
159 |     return image_batch, label_batch
160 | 
161 | 
162 | def _add_loss_summaries(total_loss):
163 |     """Add summaries for losses.
164 | 
165 |     Generates moving average for all losses and associated summaries for
166 |     visualizing the performance of the network.
167 | 
168 |     Args:
169 |       total_loss: Total loss from loss().
170 |     Returns:
171 |       loss_averages_op: op for generating moving averages of losses.
172 |     """
173 |     # Compute the moving average of all individual losses and the total loss.
174 |     loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
175 |     losses = tf.get_collection('losses')
176 |     loss_averages_op = loss_averages.apply(losses + [total_loss])
177 | 
178 |     # Attach a scalar summmary to all individual losses and the total loss; do the
179 |     # same for the averaged version of the losses.
180 |     for l in losses + [total_loss]:
181 |         # Name each loss as '(raw)' and name the moving average version of the loss
182 |         # as the original loss name.
183 |         tf.summary.scalar(l.op.name + ' (raw)', l)
184 |         tf.summary.scalar(l.op.name, loss_averages.average(l))
185 | 
186 |     return loss_averages_op
187 | 
188 | 
189 | def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars,
190 |           log_histograms=True):
191 |     # Generate moving averages of all losses and associated summaries.
192 |     loss_averages_op = _add_loss_summaries(total_loss)
193 | 
194 |     # Compute gradients.
195 |     with tf.control_dependencies([loss_averages_op]):
196 |         if optimizer == 'ADAGRAD':
197 |             opt = tf.train.AdagradOptimizer(learning_rate)
198 |         elif optimizer == 'ADADELTA':
199 |             opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6)
200 |         elif optimizer == 'ADAM':
201 |             opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1)
202 |         elif optimizer == 'RMSPROP':
203 |             opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0)
204 |         elif optimizer == 'MOM':
205 |             opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True)
206 |         else:
207 |             raise ValueError('Invalid optimization algorithm')
208 | 
209 |         grads = opt.compute_gradients(total_loss, update_gradient_vars)
210 | 
211 |     # Apply gradients.
212 |     apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
213 | 
214 |     # Add histograms for trainable variables.
215 |     if log_histograms:
216 |         for var in tf.trainable_variables():
217 |             tf.summary.histogram(var.op.name, var)
218 | 
219 |     # Add histograms for gradients.
220 |     if log_histograms:
221 |         for grad, var in grads:
222 |             if grad is not None:
223 |                 tf.summary.histogram(var.op.name + '/gradients', grad)
224 | 
225 |     # Track the moving averages of all trainable variables.
226 |     variable_averages = tf.train.ExponentialMovingAverage(
227 |         moving_average_decay, global_step)
228 |     variables_averages_op = variable_averages.apply(tf.trainable_variables())
229 | 
230 |     with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
231 |         train_op = tf.no_op(name='train')
232 | 
233 |     return train_op
234 | 
235 | 
236 | def prewhiten(x):
237 |     mean = np.mean(x)
238 |     std = np.std(x)
239 |     std_adj = np.maximum(std, 1.0 / np.sqrt(x.size))
240 |     y = np.multiply(np.subtract(x, mean), 1 / std_adj)
241 |     return y
242 | 
243 | 
244 | def crop(image, random_crop, image_size):
245 |     if image.shape[1] > image_size:
246 |         sz1 = int(image.shape[1] // 2)
247 |         sz2 = int(image_size // 2)
248 |         if random_crop:
249 |             diff = sz1 - sz2
250 |             (h, v) = (np.random.randint(-diff, diff + 1), np.random.randint(-diff, diff + 1))
251 |         else:
252 |             (h, v) = (0, 0)
253 |         image = image[(sz1 - sz2 + v):(sz1 + sz2 + v), (sz1 - sz2 + h):(sz1 + sz2 + h), :]
254 |     return image
255 | 
256 | 
257 | def flip(image, random_flip):
258 |     if random_flip and np.random.choice([True, False]):
259 |         image = np.fliplr(image)
260 |     return image
261 | 
262 | 
263 | def to_rgb(img):
264 |     w, h = img.shape
265 |     ret = np.empty((w, h, 3), dtype=np.uint8)
266 |     ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
267 |     return ret
268 | 
269 | 
270 | def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True):
271 |     nrof_samples = len(image_paths)
272 |     images = np.zeros((nrof_samples, image_size, image_size, 3))
273 |     for i in range(nrof_samples):
274 |         img = misc.imread(image_paths[i])
275 |         if img.ndim == 2:
276 |             img = to_rgb(img)
277 |         if do_prewhiten:
278 |             img = prewhiten(img)
279 |         img = crop(img, do_random_crop, image_size)
280 |         img = flip(img, do_random_flip)
281 |         images[i, :, :, :] = img
282 |     return images
283 | 
284 | 
285 | def get_label_batch(label_data, batch_size, batch_index):
286 |     nrof_examples = np.size(label_data, 0)
287 |     j = batch_index * batch_size % nrof_examples
288 |     if j + batch_size <= nrof_examples:
289 |         batch = label_data[j:j + batch_size]
290 |     else:
291 |         x1 = label_data[j:nrof_examples]
292 |         x2 = label_data[0:nrof_examples - j]
293 |         batch = np.vstack([x1, x2])
294 |     batch_int = batch.astype(np.int64)
295 |     return batch_int
296 | 
297 | 
298 | def get_batch(image_data, batch_size, batch_index):
299 |     nrof_examples = np.size(image_data, 0)
300 |     j = batch_index * batch_size % nrof_examples
301 |     if j + batch_size <= nrof_examples:
302 |         batch = image_data[j:j + batch_size, :, :, :]
303 |     else:
304 |         x1 = image_data[j:nrof_examples, :, :, :]
305 |         x2 = image_data[0:nrof_examples - j, :, :, :]
306 |         batch = np.vstack([x1, x2])
307 |     batch_float = batch.astype(np.float32)
308 |     return batch_float
309 | 
310 | 
311 | def get_triplet_batch(triplets, batch_index, batch_size):
312 |     ax, px, nx = triplets
313 |     a = get_batch(ax, int(batch_size / 3), batch_index)
314 |     p = get_batch(px, int(batch_size / 3), batch_index)
315 |     n = get_batch(nx, int(batch_size / 3), batch_index)
316 |     batch = np.vstack([a, p, n])
317 |     return batch
318 | 
319 | 
320 | def get_learning_rate_from_file(filename, epoch):
321 |     with open(filename, 'r') as f:
322 |         for line in f.readlines():
323 |             line = line.split('#', 1)[0]
324 |             if line:
325 |                 par = line.strip().split(':')
326 |                 e = int(par[0])
327 |                 lr = float(par[1])
328 |                 if e <= epoch:
329 |                     learning_rate = lr
330 |                 else:
331 |                     return learning_rate
332 | 
333 | 
334 | class ImageClass():
335 |     "Stores the paths to images for a given class"
336 | 
337 |     def __init__(self, name, image_paths):
338 |         self.name = name
339 |         self.image_paths = image_paths
340 | 
341 |     def __str__(self):
342 |         return self.name + ', ' + str(len(self.image_paths)) + ' images'
343 | 
344 |     def __len__(self):
345 |         return len(self.image_paths)
346 | 
347 | 
348 | def get_dataset(path, has_class_directories=True):
349 |     dataset = []
350 |     path_exp = os.path.expanduser(path)
351 |     classes = os.listdir(path_exp)
352 |     classes.sort()
353 |     nrof_classes = len(classes)
354 |     for i in range(nrof_classes):
355 |         class_name = classes[i]
356 |         facedir = os.path.join(path_exp, class_name)
357 |         image_paths = get_image_paths(facedir)
358 |         dataset.append(ImageClass(class_name, image_paths))
359 | 
360 |     return dataset
361 | 
362 | 
363 | def get_image_paths(facedir):
364 |     image_paths = []
365 |     if os.path.isdir(facedir):
366 |         images = os.listdir(facedir)
367 |         image_paths = [os.path.join(facedir, img) for img in images]
368 |     return image_paths
369 | 
370 | 
371 | def split_dataset(dataset, split_ratio, mode):
372 |     if mode == 'SPLIT_CLASSES':
373 |         nrof_classes = len(dataset)
374 |         class_indices = np.arange(nrof_classes)
375 |         np.random.shuffle(class_indices)
376 |         split = int(round(nrof_classes * split_ratio))
377 |         train_set = [dataset[i] for i in class_indices[0:split]]
378 |         test_set = [dataset[i] for i in class_indices[split:-1]]
379 |     elif mode == 'SPLIT_IMAGES':
380 |         train_set = []
381 |         test_set = []
382 |         min_nrof_images = 2
383 |         for cls in dataset:
384 |             paths = cls.image_paths
385 |             np.random.shuffle(paths)
386 |             split = int(round(len(paths) * split_ratio))
387 |             if split < min_nrof_images:
388 |                 continue  # Not enough images for test set. Skip class...
389 |             train_set.append(ImageClass(cls.name, paths[0:split]))
390 |             test_set.append(ImageClass(cls.name, paths[split:-1]))
391 |     else:
392 |         raise ValueError('Invalid train/test split mode "%s"' % mode)
393 |     return train_set, test_set
394 | 
395 | 
396 | def load_model(model):
397 |     # Check if the model is a model directory (containing a metagraph and a checkpoint file)
398 |     #  or if it is a protobuf file with a frozen graph
399 |     model_exp = os.path.expanduser(model)
400 |     if (os.path.isfile(model_exp)):
401 |         print('Model filename: %s' % model_exp)
402 |         with gfile.FastGFile(model_exp, 'rb') as f:
403 |             graph_def = tf.GraphDef()
404 |             graph_def.ParseFromString(f.read())
405 |             tf.import_graph_def(graph_def, name='')
406 |     else:
407 |         print('Model directory: %s' % model_exp)
408 |         meta_file, ckpt_file = get_model_filenames(model_exp)
409 | 
410 |         print('Metagraph file: %s' % meta_file)
411 |         print('Checkpoint file: %s' % ckpt_file)
412 | 
413 |         saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file))
414 |         saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file))
415 | 
416 | 
417 | def get_model_filenames(model_dir):
418 |     files = os.listdir(model_dir)
419 |     meta_files = [s for s in files if s.endswith('.meta')]
420 |     if len(meta_files) == 0:
421 |         raise ValueError('No meta file found in the model directory (%s)' % model_dir)
422 |     elif len(meta_files) > 1:
423 |         raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir)
424 |     meta_file = meta_files[0]
425 |     meta_files = [s for s in files if '.ckpt' in s]
426 |     max_step = -1
427 |     for f in files:
428 |         step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f)
429 |         if step_str is not None and len(step_str.groups()) >= 2:
430 |             step = int(step_str.groups()[1])
431 |             if step > max_step:
432 |                 max_step = step
433 |                 ckpt_file = step_str.groups()[0]
434 |     return meta_file, ckpt_file
435 | 
436 | 
437 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10):
438 |     assert (embeddings1.shape[0] == embeddings2.shape[0])
439 |     assert (embeddings1.shape[1] == embeddings2.shape[1])
440 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
441 |     nrof_thresholds = len(thresholds)
442 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
443 | 
444 |     tprs = np.zeros((nrof_folds, nrof_thresholds))
445 |     fprs = np.zeros((nrof_folds, nrof_thresholds))
446 |     accuracy = np.zeros((nrof_folds))
447 | 
448 |     diff = np.subtract(embeddings1, embeddings2)
449 |     dist = np.sum(np.square(diff), 1)
450 |     indices = np.arange(nrof_pairs)
451 | 
452 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
453 | 
454 |         # Find the best threshold for the fold
455 |         acc_train = np.zeros((nrof_thresholds))
456 |         for threshold_idx, threshold in enumerate(thresholds):
457 |             _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
458 |         best_threshold_index = np.argmax(acc_train)
459 |         for threshold_idx, threshold in enumerate(thresholds):
460 |             tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold,
461 |                                                                                                  dist[test_set],
462 |                                                                                                  actual_issame[
463 |                                                                                                      test_set])
464 |         _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set],
465 |                                                       actual_issame[test_set])
466 | 
467 |     tpr = np.mean(tprs, 0)
468 |     fpr = np.mean(fprs, 0)
469 |     return tpr, fpr, accuracy
470 | 
471 | 
472 | def calculate_accuracy(threshold, dist, actual_issame):
473 |     predict_issame = np.less(dist, threshold)
474 |     tp = np.sum(np.logical_and(predict_issame, actual_issame))
475 |     fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
476 |     tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
477 |     fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
478 | 
479 |     tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
480 |     fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
481 |     acc = float(tp + tn) / dist.size
482 |     return tpr, fpr, acc
483 | 
484 | 
485 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
486 |     assert (embeddings1.shape[0] == embeddings2.shape[0])
487 |     assert (embeddings1.shape[1] == embeddings2.shape[1])
488 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
489 |     nrof_thresholds = len(thresholds)
490 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
491 | 
492 |     val = np.zeros(nrof_folds)
493 |     far = np.zeros(nrof_folds)
494 | 
495 |     diff = np.subtract(embeddings1, embeddings2)
496 |     dist = np.sum(np.square(diff), 1)
497 |     indices = np.arange(nrof_pairs)
498 | 
499 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
500 | 
501 |         # Find the threshold that gives FAR = far_target
502 |         far_train = np.zeros(nrof_thresholds)
503 |         for threshold_idx, threshold in enumerate(thresholds):
504 |             _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
505 |         if np.max(far_train) >= far_target:
506 |             f = interpolate.interp1d(far_train, thresholds, kind='slinear')
507 |             threshold = f(far_target)
508 |         else:
509 |             threshold = 0.0
510 | 
511 |         val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
512 | 
513 |     val_mean = np.mean(val)
514 |     far_mean = np.mean(far)
515 |     val_std = np.std(val)
516 |     return val_mean, val_std, far_mean
517 | 
518 | 
519 | def calculate_val_far(threshold, dist, actual_issame):
520 |     predict_issame = np.less(dist, threshold)
521 |     true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
522 |     false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
523 |     n_same = np.sum(actual_issame)
524 |     n_diff = np.sum(np.logical_not(actual_issame))
525 |     val = float(true_accept) / float(n_same)
526 |     far = float(false_accept) / float(n_diff)
527 |     return val, far
528 | 
529 | 
530 | def store_revision_info(src_path, output_dir, arg_string):
531 |     try:
532 |         # Get git hash
533 |         cmd = ['git', 'rev-parse', 'HEAD']
534 |         gitproc = Popen(cmd, stdout=PIPE, cwd=src_path)
535 |         (stdout, _) = gitproc.communicate()
536 |         git_hash = stdout.strip()
537 |     except OSError as e:
538 |         git_hash = ' '.join(cmd) + ': ' + e.strerror
539 | 
540 |     try:
541 |         # Get local changes
542 |         cmd = ['git', 'diff', 'HEAD']
543 |         gitproc = Popen(cmd, stdout=PIPE, cwd=src_path)
544 |         (stdout, _) = gitproc.communicate()
545 |         git_diff = stdout.strip()
546 |     except OSError as e:
547 |         git_diff = ' '.join(cmd) + ': ' + e.strerror
548 | 
549 |     # Store a text file in the log directory
550 |     rev_info_filename = os.path.join(output_dir, 'revision_info.txt')
551 |     with open(rev_info_filename, "w") as text_file:
552 |         text_file.write('arguments: %s\n--------------------\n' % arg_string)
553 |         text_file.write('tensorflow version: %s\n--------------------\n' % tf.__version__)  # @UndefinedVariable
554 |         text_file.write('git hash: %s\n--------------------\n' % git_hash)
555 |         text_file.write('%s' % git_diff)
556 | 
557 | 
558 | def list_variables(filename):
559 |     reader = training.NewCheckpointReader(filename)
560 |     variable_map = reader.get_variable_to_shape_map()
561 |     names = sorted(variable_map.keys())
562 |     return names
563 | 
564 | 
565 | def put_images_on_grid(images, shape=(16, 8)):
566 |     nrof_images = images.shape[0]
567 |     img_size = images.shape[1]
568 |     bw = 3
569 |     img = np.zeros((shape[1] * (img_size + bw) + bw, shape[0] * (img_size + bw) + bw, 3), np.float32)
570 |     for i in range(shape[1]):
571 |         x_start = i * (img_size + bw) + bw
572 |         for j in range(shape[0]):
573 |             img_index = i * shape[0] + j
574 |             if img_index >= nrof_images:
575 |                 break
576 |             y_start = j * (img_size + bw) + bw
577 |             img[x_start:x_start + img_size, y_start:y_start + img_size, :] = images[img_index, :, :, :]
578 |         if img_index >= nrof_images:
579 |             break
580 |     return img
581 | 
582 | 
583 | def write_arguments_to_file(args, filename):
584 |     with open(filename, 'w') as f:
585 |         for key, value in iteritems(vars(args)):
586 |             f.write('%s: %s\n' % (key, str(value)))


--------------------------------------------------------------------------------
/model/det1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/windylijie/face-detect-MTcnn-faceNet/dee60f8c14b5d5caaab4eb4d8d2d065014df2cc4/model/det1.npy


--------------------------------------------------------------------------------
/model/det2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/windylijie/face-detect-MTcnn-faceNet/dee60f8c14b5d5caaab4eb4d8d2d065014df2cc4/model/det2.npy


--------------------------------------------------------------------------------
/model/det3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/windylijie/face-detect-MTcnn-faceNet/dee60f8c14b5d5caaab4eb4d8d2d065014df2cc4/model/det3.npy


--------------------------------------------------------------------------------
/realtime_detect_face_and_recognition.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from collect_frame_to_csv import collect_frame_to_csv
 3 | import detect_face
 4 | import cv2
 5 | from cal_128XVector_user_facenet import cal_128_vector,build_facenet_model,cal_dist_from_csv
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | if __name__=="__main__":
15 |     collect_frame_to_csv()
16 | 
17 |     detection=input("detect or not(y/n):")
18 |     if detection=='y':
19 |         csv_dir = './data/data.csv'#人脸128向量的数据
20 |         # 调用facenet模型
21 |         sess1, images_placeholder, phase_train_placeholder, embeddings = build_facenet_model()
22 | 
23 |         image_size = 200
24 |         minsize = 20
25 |         threshold = [0.6, 0.7, 0.7]
26 |         factor = 0.709  # scale factor
27 |         print("Creating MTcnn networks and load paramenters..")
28 |         #########################build mtcnn########################
29 |         with tf.Graph().as_default():
30 |             sess = tf.Session()
31 |             with sess.as_default():
32 |                 pnet, rnet, onet = detect_face.create_mtcnn(sess, './model/')
33 | 
34 |         capture = cv2.VideoCapture(0)
35 |         while (capture.isOpened()):
36 |             ret, frame = capture.read()
37 |             bounding_box, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
38 | 
39 |             nb_faces = bounding_box.shape[0]  # 人脸检测的个数
40 |             # 标记人脸
41 |             for face_position in bounding_box:
42 |                 rect = face_position.astype(int)
43 |                 image=frame[rect[1]:rect[3],rect[0]:rect[2]]#截取人脸的ROI区域
44 |                 array=cal_128_vector(image,sess1, images_placeholder, phase_train_placeholder, embeddings)#计算人脸的128向量
45 |                 dist,label=cal_dist_from_csv(csv_dir,array)
46 |                 # 矩形框
47 |                 cv2.rectangle(frame, (rect[0], rect[1]), (rect[2], rect[3]), (0, 255, 255), 2, 1)
48 |                 cv2.putText(frame, "faces:%d" % (nb_faces), (10, 20), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 255), 4)
49 |                 cv2.putText(frame, '%.2f' % (dist), (rect[0], rect[1] - 30), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 255), 4)
50 |                 cv2.putText(frame, label, (rect[0], rect[1] ), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 255), 4)
51 | 
52 |             cv2.imshow('Video', frame)
53 |             if cv2.waitKey(1) & 0xff == 27:
54 |                 break
55 |         capture.release()
56 |         cv2.destroyAllWindows()
57 |     else:
58 |         print('The End...')
59 | 


--------------------------------------------------------------------------------