├── README.md ├── age_gender ├── AgeGender.py └── age_deploy.prototxt ├── face_dataset ├── face1.jpg └── face2.jpg ├── face_recognition ├── LICENSE ├── README.md ├── __pycache__ │ ├── face_recog.cpython-39.pyc │ ├── network.cpython-39.pyc │ ├── pre_process.cpython-39.pyc │ └── turn.cpython-39.pyc ├── face_recog.py ├── model_data │ ├── onet.h5 │ ├── pnet.h5 │ └── rnet.h5 ├── network.py ├── pre_process.py ├── turn.py └── utils │ └── __pycache__ │ ├── utils.cpython-36.pyc │ └── utils.cpython-39.pyc ├── predict.py ├── show.py └── train.py /README.md: -------------------------------------------------------------------------------- 1 | # CV_SHU_Project 2 | 本项目的目标是设计并实施一个面部情绪识别系统,本系统利用卷积神经网络(Convolutional Neural Network,CNN)算法,基于Tensorflow深度学习框架,来预测和分类人的面部情绪,最后通过图形化界面完成以下两个功能的设计:从本地读取图片进行面部情绪的分类和实时视频情绪识别功能。 3 | -------------------------------------------------------------------------------- /age_gender/AgeGender.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import argparse 3 | import time 4 | 5 | def detect_and_draw_boxes(network, input_frame, threshold=0.7): 6 | frame_copy = input_frame.copy() 7 | height, width = frame_copy.shape[:2] 8 | blob = cv2.dnn.blobFromImage(frame_copy, 1.0, (300, 300), [104, 117, 123], True, False) 9 | network.setInput(blob) 10 | detected_objects = network.forward() 11 | bounding_boxes = [] 12 | for i in range(detected_objects.shape[2]): 13 | conf = detected_objects[0, 0, i, 2] 14 | if conf > threshold: 15 | x_start = int(detected_objects[0, 0, i, 3] * width) 16 | y_start = int(detected_objects[0, 0, i, 4] * height) 17 | x_end = int(detected_objects[0, 0, i, 5] * width) 18 | y_end = int(detected_objects[0, 0, i, 6] * height) 19 | bounding_boxes.append([x_start, y_start, x_end, y_end]) 20 | cv2.rectangle(frame_copy, (x_start, y_start), (x_end, y_end), (0, 255, 0), int(round(height/150)), 8) 21 | return frame_copy, bounding_boxes 22 | 23 | parser = argparse.ArgumentParser(description='Age and Gender') 24 | parser.add_argument('--input') 25 | parser.add_argument("--device", default="cpu") 26 | args = parser.parse_args() 27 | 28 | MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746) 29 | ages = ['(0-2)', '(4-6)', '(8-12)', '(15-20)', '(25-32)', '(38-43)', '(48-53)', '(60-100)'] 30 | genders = ['Male', 'Female'] 31 | faceProto, faceModel = "opencv_face_detector.pbtxt", "opencv_face_detector_uint8.pb" 32 | ageProto, ageModel = "age_deploy.prototxt", "age_net.caffemodel" 33 | genderProto, genderModel = "gender_deploy.prototxt", "gender_net.caffemodel" 34 | genderNet = cv2.dnn.readNet(genderModel, genderProto) 35 | faceNet = cv2.dnn.readNet(faceModel, faceProto) 36 | ageNet = cv2.dnn.readNet(ageModel, ageProto) 37 | 38 | video_capture = cv2.VideoCapture(args.input if args.input else 0) 39 | padding = 20 40 | 41 | while cv2.waitKey(1) < 0: 42 | start_time = time.time() 43 | ret, frame = video_capture.read() 44 | if not ret: 45 | cv2.waitKey() 46 | break 47 | frame_with_boxes, boxes = detect_and_draw_boxes(faceNet, frame) 48 | if not boxes: 49 | continue 50 | for box in boxes: 51 | face = frame[max(0,box[1]-padding):min(box[3]+padding,frame.shape[0]-1),max(0,box[0]-padding):min(box[2]+padding, frame.shape[1]-1)] 52 | blob = cv2.dnn.blobFromImage(face, 1.0, (227, 227), MEAN_VALUES, swapRB=False) 53 | genderNet.setInput(blob) 54 | gender_predictions = genderNet.forward() 55 | gender = genders[gender_predictions[0].argmax()] 56 | print(f"性别 : {gender}, 可信度 = {round(gender_predictions[0].max(), 3)}") 57 | ageNet.setInput(blob) 58 | age_predictions = ageNet.forward() 59 | age = ages[age_predictions[0].argmax()] 60 | print(f"年龄 : {age}, 可信度 = {round(age_predictions[0].max(), 3)}") 61 | info = f"{gender},{age}" 62 | cv2.putText(frame_with_boxes, info, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2, cv2.LINE_AA) 63 | cv2.imshow("Age-Gender", frame_with_boxes) 64 | print(f"Time : {round(time.time() - start_time, 3)}") 65 | -------------------------------------------------------------------------------- /age_gender/age_deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | input: "data" 3 | input_dim: 1 4 | input_dim: 3 5 | input_dim: 227 6 | input_dim: 227 7 | layers { 8 | name: "conv1" 9 | type: CONVOLUTION 10 | bottom: "data" 11 | top: "conv1" 12 | convolution_param { 13 | num_output: 96 14 | kernel_size: 7 15 | stride: 4 16 | } 17 | } 18 | layers { 19 | name: "relu1" 20 | type: RELU 21 | bottom: "conv1" 22 | top: "conv1" 23 | } 24 | layers { 25 | name: "pool1" 26 | type: POOLING 27 | bottom: "conv1" 28 | top: "pool1" 29 | pooling_param { 30 | pool: MAX 31 | kernel_size: 3 32 | stride: 2 33 | } 34 | } 35 | layers { 36 | name: "norm1" 37 | type: LRN 38 | bottom: "pool1" 39 | top: "norm1" 40 | lrn_param { 41 | local_size: 5 42 | alpha: 0.0001 43 | beta: 0.75 44 | } 45 | } 46 | layers { 47 | name: "conv2" 48 | type: CONVOLUTION 49 | bottom: "norm1" 50 | top: "conv2" 51 | convolution_param { 52 | num_output: 256 53 | pad: 2 54 | kernel_size: 5 55 | } 56 | } 57 | layers { 58 | name: "relu2" 59 | type: RELU 60 | bottom: "conv2" 61 | top: "conv2" 62 | } 63 | layers { 64 | name: "pool2" 65 | type: POOLING 66 | bottom: "conv2" 67 | top: "pool2" 68 | pooling_param { 69 | pool: MAX 70 | kernel_size: 3 71 | stride: 2 72 | } 73 | } 74 | layers { 75 | name: "norm2" 76 | type: LRN 77 | bottom: "pool2" 78 | top: "norm2" 79 | lrn_param { 80 | local_size: 5 81 | alpha: 0.0001 82 | beta: 0.75 83 | } 84 | } 85 | layers { 86 | name: "conv3" 87 | type: CONVOLUTION 88 | bottom: "norm2" 89 | top: "conv3" 90 | convolution_param { 91 | num_output: 384 92 | pad: 1 93 | kernel_size: 3 94 | } 95 | } 96 | layers{ 97 | name: "relu3" 98 | type: RELU 99 | bottom: "conv3" 100 | top: "conv3" 101 | } 102 | layers { 103 | name: "pool5" 104 | type: POOLING 105 | bottom: "conv3" 106 | top: "pool5" 107 | pooling_param { 108 | pool: MAX 109 | kernel_size: 3 110 | stride: 2 111 | } 112 | } 113 | layers { 114 | name: "fc6" 115 | type: INNER_PRODUCT 116 | bottom: "pool5" 117 | top: "fc6" 118 | inner_product_param { 119 | num_output: 512 120 | } 121 | } 122 | layers { 123 | name: "relu6" 124 | type: RELU 125 | bottom: "fc6" 126 | top: "fc6" 127 | } 128 | layers { 129 | name: "drop6" 130 | type: DROPOUT 131 | bottom: "fc6" 132 | top: "fc6" 133 | dropout_param { 134 | dropout_ratio: 0.5 135 | } 136 | } 137 | layers { 138 | name: "fc7" 139 | type: INNER_PRODUCT 140 | bottom: "fc6" 141 | top: "fc7" 142 | inner_product_param { 143 | num_output: 512 144 | } 145 | } 146 | layers { 147 | name: "relu7" 148 | type: RELU 149 | bottom: "fc7" 150 | top: "fc7" 151 | } 152 | layers { 153 | name: "drop7" 154 | type: DROPOUT 155 | bottom: "fc7" 156 | top: "fc7" 157 | dropout_param { 158 | dropout_ratio: 0.5 159 | } 160 | } 161 | layers { 162 | name: "fc8" 163 | type: INNER_PRODUCT 164 | bottom: "fc7" 165 | top: "fc8" 166 | inner_product_param { 167 | num_output: 8 168 | } 169 | } 170 | layers { 171 | name: "prob" 172 | type: SOFTMAX 173 | bottom: "fc8" 174 | top: "prob" 175 | } 176 | -------------------------------------------------------------------------------- /face_dataset/face1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/CV_SHU_Project/8420d512ec19578df2eeaa1424395e7bf4be2569/face_dataset/face1.jpg -------------------------------------------------------------------------------- /face_dataset/face2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/CV_SHU_Project/8420d512ec19578df2eeaa1424395e7bf4be2569/face_dataset/face2.jpg -------------------------------------------------------------------------------- /face_recognition/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 JiaQi Xu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /face_recognition/README.md: -------------------------------------------------------------------------------- 1 | ## Face-Recognition:人脸识别算法在Keras当中的实现 2 | --- 3 | 4 | ### 目录 5 | 1. [所需环境 Environment](#所需环境) 6 | 2. [文件下载 Download](#文件下载) 7 | 3. [使用方法 Usage](#使用方法) 8 | 4. [效果 Performance](#效果) 9 | 10 | ## 所需环境 11 | tensorflow-gpu==1.13.1 12 | keras==2.1.5 13 | 14 | ## 文件下载 15 | 进行预测所需的facenet_keras.h5可以在Release里面下载。 16 | 也可以去百度网盘下载 17 | 链接: https://pan.baidu.com/s/1A9jCJa_sQ4D3ejelgXX2RQ 提取码: tkhg 18 | 19 | ## 使用方法 20 | 1、先将整个仓库download下来。 21 | 2、下载完之后解压,同时下载facenet_keras.h5文件。 22 | 3、将facenet_keras.h5放入model_data中。 23 | 4、将自己想要识别的人脸放入到face_dataset中。 24 | 5、运行face_recognize.py即可。 25 | 6、align.py可以查看人脸对齐的效果。 26 | 27 | ## 效果 28 | face_recognize.py的运行结果: 29 | ![result](/result/result.png)) 30 | -------------------------------------------------------------------------------- /face_recognition/__pycache__/face_recog.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/CV_SHU_Project/8420d512ec19578df2eeaa1424395e7bf4be2569/face_recognition/__pycache__/face_recog.cpython-39.pyc -------------------------------------------------------------------------------- /face_recognition/__pycache__/network.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/CV_SHU_Project/8420d512ec19578df2eeaa1424395e7bf4be2569/face_recognition/__pycache__/network.cpython-39.pyc -------------------------------------------------------------------------------- /face_recognition/__pycache__/pre_process.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/CV_SHU_Project/8420d512ec19578df2eeaa1424395e7bf4be2569/face_recognition/__pycache__/pre_process.cpython-39.pyc -------------------------------------------------------------------------------- /face_recognition/__pycache__/turn.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/CV_SHU_Project/8420d512ec19578df2eeaa1424395e7bf4be2569/face_recognition/__pycache__/turn.cpython-39.pyc -------------------------------------------------------------------------------- /face_recognition/face_recog.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | import face_recognition.pre_process as u 5 | from face_recognition.network import InceptionResNetV1 6 | from face_recognition.network import mtcnn 7 | 8 | class face_1(): 9 | def __init__(self): 10 | dir_path = os.path.dirname(os.path.realpath(__file__)) # 获取当前文件的目录 11 | pnet_model_path = os.path.join(dir_path, 'model_data', 'pnet.h5') # 构建pnet模型文件路径 12 | rnet_model_path = os.path.join(dir_path, 'model_data', 'rnet.h5') # 构建rnet模型文件路径 13 | onet_model_path = os.path.join(dir_path, 'model_data', 'onet.h5') # 构建onet模型文件路径 14 | self.m1 = mtcnn(pnet_model_path, rnet_model_path, onet_model_path) # 创建MTCNN实例,用于人脸检测 15 | self.t1 = [0.5, 0.6, 0.8] # 设置MTCNN的阈值 16 | self.f1 = InceptionResNetV1() # 创建InceptionResNetV1实例,用于人脸识别 17 | model_path = os.path.join(dir_path, 'model_data', 'facenet_keras.h5') # 加载预训练的facenet模型权重 18 | self.f1.load_weights(model_path) 19 | f_list = os.listdir(os.path.join("face_dataset")) # 获取人脸数据集目录下的文件列表 20 | self.known_face_encodings = [] # 存储已知人脸编码 21 | self.known_face_names = [] # 存储已知人脸的名称 22 | for face in f_list: 23 | name = face.split(".")[0] # 获取人脸图像的文件名作为人脸名称 24 | img = cv2.imread("./face_dataset/" + face) # 读取人脸图像 25 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 将图像转换为RGB格式 26 | rectangles = self.m1.detectFace(img, self.t1) # 使用MTCNN检测人脸框 27 | rectangles = u.rect2square(np.array(rectangles)) # 将检测到的人脸框转换为正方形 28 | rectangle = rectangles[0] # 取第一个人脸框 29 | landmark = np.reshape(rectangle[5:15], (5, 2)) - np.array([int(rectangle[0]), int(rectangle[1])]) # 提取关键点坐标并对齐 30 | crop_img = img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] # 根据人脸框裁剪人脸图像 31 | crop_img, _ = u.Alignment_1(crop_img, landmark) # 对齐裁剪后的人脸图像 32 | crop_img = np.expand_dims(cv2.resize(crop_img, (160, 160)), 0) # 调整图像尺寸为160x160,并扩展维度 33 | 34 | face_encoding = u.calc_128_vec(self.f1, crop_img) # 计算人脸的128维编码 35 | 36 | self.known_face_encodings.append(face_encoding) # 存储已知人脸编码 37 | self.known_face_names.append(name) # 存储已知人脸的名称 38 | 39 | def recog(self, draw): 40 | height, width, _ = np.shape(draw) # 获取视频帧的尺寸 41 | draw_rgb = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB) # 将视频帧转换为RGB格式 42 | rectangles = self.m1.detectFace(draw_rgb, self.t1) # 使用MTCNN检测人脸框 43 | if len(rectangles) == 0: 44 | return 45 | rectangles = u.rect2square(np.array(rectangles, dtype=np.int32)) # 将检测到的人脸框转换为正方形 46 | rectangles[:, [0, 2]] = np.clip(rectangles[:, [0, 2]], 0, width) # 对超出图像边界的坐标进行裁剪 47 | rectangles[:, [1, 3]] = np.clip(rectangles[:, [1, 3]], 0, height) # 对超出图像边界的坐标进行裁剪 48 | face_encodings = [] 49 | for rectangle in rectangles: 50 | landmark = np.reshape(rectangle[5:15], (5, 2)) - np.array([int(rectangle[0]), int(rectangle[1])]) # 提取关键点坐标并对齐 51 | crop_img = draw_rgb[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] # 根据人脸框裁剪人脸图像 52 | 53 | crop_img, _ = u.Alignment_1(crop_img, landmark) # 对齐裁剪后的人脸图像 54 | crop_img = np.expand_dims(cv2.resize(crop_img, (160, 160)), 0) # 调整图像尺寸为160x160,并扩展维度 55 | 56 | face_encoding = u.calc_128_vec(self.f1, crop_img) # 计算人脸的128维编码 57 | face_encodings.append(face_encoding) # 存储当前帧中检测到的人脸编码 58 | face_names = [] 59 | for face_encoding in face_encodings: 60 | matches = u.compare_faces(self.known_face_encodings, face_encoding, tolerance=0.9) # 比较当前帧中的人脸编码与已知人脸编码的相似度 61 | name = "Unknown" # 默认为未知人脸 62 | 63 | face_distances = u.face_distance(self.known_face_encodings, face_encoding) # 计算当前帧中的人脸编码与已知人脸编码的欧氏距离 64 | 65 | best_match_index = np.argmin(face_distances) # 找到欧氏距离最小的索引 66 | if matches[best_match_index]: # 判断是否匹配成功 67 | name = self.known_face_names[best_match_index] # 获取匹配成功的人脸的名称 68 | face_names.append(name) # 存储当前帧中每个人脸的名称 69 | 70 | rectangles = rectangles[:, 0:4] # 提取人脸框的坐标 71 | 72 | for (left, top, right, bottom), name in zip(rectangles, face_names): 73 | cv2.rectangle(draw, (left, top), (right, bottom), (0, 0, 255), 2) # 绘制人脸框 74 | font = cv2.FONT_HERSHEY_SIMPLEX 75 | cv2.putText(draw, name, (left, bottom - 15), font, 0.75, (255, 255, 255), 2) # 在人脸框上方绘制人脸的名称 76 | return draw 77 | def open_camera(self): 78 | self.video_capture = cv2.VideoCapture(0) # 打开摄像头 79 | 80 | while True: 81 | ret, draw = self.video_capture.read() # 读取视频帧 82 | self.recog(draw) # 人脸识别 83 | cv2.imshow('Video', draw) # 显示视频帧 84 | if cv2.waitKey(20) & 0xFF == ord('q'): # 按下q键退出循环 85 | break 86 | 87 | self.video_capture.release() # 释放摄像头 88 | cv2.destroyAllWindows() # 关闭窗口 89 | 90 | 91 | if __name__ == "__main__": 92 | dudu = face_1() # 创建face_1类实例 93 | video_capture = cv2.VideoCapture(0) # 打开摄像头 94 | 95 | while True: 96 | ret, draw = video_capture.read() # 读取视频帧 97 | dudu.recog(draw) # 人脸识别 98 | cv2.imshow('Video', draw) # 显示视频帧 99 | if cv2.waitKey(20) & 0xFF == ord('q'): # 按下q键退出循环 100 | break 101 | 102 | video_capture.release() # 释放摄像头 103 | cv2.destroyAllWindows() # 关闭窗口 104 | -------------------------------------------------------------------------------- /face_recognition/model_data/onet.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/CV_SHU_Project/8420d512ec19578df2eeaa1424395e7bf4be2569/face_recognition/model_data/onet.h5 -------------------------------------------------------------------------------- /face_recognition/model_data/pnet.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/CV_SHU_Project/8420d512ec19578df2eeaa1424395e7bf4be2569/face_recognition/model_data/pnet.h5 -------------------------------------------------------------------------------- /face_recognition/model_data/rnet.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/CV_SHU_Project/8420d512ec19578df2eeaa1424395e7bf4be2569/face_recognition/model_data/rnet.h5 -------------------------------------------------------------------------------- /face_recognition/network.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | from keras.layers import (Activation, Conv2D, Dense, Flatten, Input, MaxPool2D, Permute, Reshape) 5 | from keras.layers import PReLU 6 | from keras.models import Model, Sequential 7 | 8 | import face_recognition.pre_process as pre_p 9 | 10 | def create_Pnet(weight_path): 11 | # P模型 12 | inputs = Input(shape=[None, None, 3]) 13 | x = Conv2D(10, (3, 3), strides=1, padding='valid', name='conv1')(inputs) 14 | x = PReLU(shared_axes=[1,2],name='PReLU1')(x) 15 | x = MaxPool2D(pool_size=2)(x) 16 | x = Conv2D(16, (3, 3), strides=1, padding='valid', name='conv2')(x) 17 | x = PReLU(shared_axes=[1,2],name='PReLU2')(x) 18 | x = Conv2D(32, (3, 3), strides=1, padding='valid', name='conv3')(x) 19 | x = PReLU(shared_axes=[1,2],name='PReLU3')(x) 20 | classifier = Conv2D(2, (1, 1), activation='softmax', name='conv4-1')(x) 21 | bbox_regress = Conv2D(4, (1, 1), name='conv4-2')(x) 22 | model = Model([inputs], [classifier, bbox_regress]) 23 | model.load_weights(weight_path, by_name=True) 24 | return model 25 | 26 | def create_Rnet(weight_path): 27 | # R模型 28 | inputs = Input(shape=[24, 24, 3]) 29 | x = Conv2D(28, (3, 3), strides=1, padding='valid', name='conv1')(inputs) 30 | x = PReLU(shared_axes=[1, 2], name='prelu1')(x) 31 | x = MaxPool2D(pool_size=3, strides=2, padding='same')(x) 32 | x = Conv2D(48, (3, 3), strides=1, padding='valid', name='conv2')(x) 33 | x = PReLU(shared_axes=[1, 2], name='prelu2')(x) 34 | x = MaxPool2D(pool_size=3, strides=2)(x) 35 | x = Conv2D(64, (2, 2), strides=1, padding='valid', name='conv3')(x) 36 | x = PReLU(shared_axes=[1, 2], name='prelu3')(x) 37 | x = Permute((3, 2, 1))(x) 38 | x = Flatten()(x) 39 | x = Dense(128, name='conv4')(x) 40 | x = PReLU(name='prelu4')(x) 41 | classifier = Dense(2, activation='softmax', name='conv5-1')(x) 42 | bbox_regress = Dense(4, name='conv5-2')(x) 43 | model = Model([inputs], [classifier, bbox_regress]) 44 | model.load_weights(weight_path, by_name=True) 45 | return model 46 | 47 | def create_Onet(weight_path): 48 | # O模型 49 | inputs = Input(shape=[48,48,3]) 50 | x = Conv2D(32, (3, 3), strides=1, padding='valid', name='conv1')(inputs) 51 | x = PReLU(shared_axes=[1,2],name='prelu1')(x) 52 | x = MaxPool2D(pool_size=3, strides=2, padding='same')(x) 53 | x = Conv2D(64, (3, 3), strides=1, padding='valid', name='conv2')(x) 54 | x = PReLU(shared_axes=[1,2],name='prelu2')(x) 55 | x = MaxPool2D(pool_size=3, strides=2)(x) 56 | x = Conv2D(64, (3, 3), strides=1, padding='valid', name='conv3')(x) 57 | x = PReLU(shared_axes=[1,2],name='prelu3')(x) 58 | x = MaxPool2D(pool_size=2)(x) 59 | x = Conv2D(128, (2, 2), strides=1, padding='valid', name='conv4')(x) 60 | x = PReLU(shared_axes=[1,2],name='prelu4')(x) 61 | x = Permute((3,2,1))(x) 62 | x = Flatten()(x) 63 | x = Dense(256, name='conv5') (x) 64 | x = PReLU(name='prelu5')(x) 65 | classifier = Dense(2, activation='softmax',name='conv6-1')(x) 66 | bbox_regress = Dense(4,name='conv6-2')(x) 67 | landmark_regress = Dense(10,name='conv6-3')(x) 68 | model = Model([inputs], [classifier, bbox_regress, landmark_regress]) 69 | model.load_weights(weight_path, by_name=True) 70 | return model 71 | 72 | class mtcnn(): 73 | def __init__(self, pnet_path='face_recogniton/model_data/pnet.h5', 74 | rnet_path='face_recogniton/model_data/rnet.h5', 75 | onet_path='face_recogniton/model_data/onet.h5'): 76 | # 初始化 77 | self.Pnet = create_Pnet(pnet_path) 78 | self.Rnet = create_Rnet(rnet_path) 79 | self.Onet = create_Onet(onet_path) 80 | def detectFace(self, img, threshold): 81 | # 检测图中的人脸 82 | copy_img = (img.copy() - 127.5) / 127.5 83 | origin_h, origin_w, _ = copy_img.shape 84 | scales = pre_p.calculateScales(img) 85 | out = [] 86 | for scale in scales: 87 | hs = int(origin_h * scale) 88 | ws = int(origin_w * scale) 89 | scale_img = cv2.resize(copy_img, (ws, hs)) 90 | inputs = np.expand_dims(scale_img, 0) 91 | ouput = self.Pnet.predict(inputs) 92 | ouput = [ouput[0][0], ouput[1][0]] 93 | out.append(ouput) 94 | rectangles = [] 95 | for i in range(len(scales)): 96 | cls_prob = out[i][0][:, :, 1] 97 | roi = out[i][1] 98 | out_h, out_w = cls_prob.shape 99 | out_side = max(out_h, out_w) 100 | rectangle = pre_p.detect_face_12net(cls_prob, roi, out_side, 1 / scales[i], origin_w, origin_h, threshold[0]) 101 | rectangles.extend(rectangle) 102 | rectangles = np.array(pre_p.NMS(rectangles, 0.7)) 103 | if len(rectangles) == 0: 104 | return rectangles 105 | predict_24_batch = [] 106 | for rectangle in rectangles: 107 | crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] 108 | scale_img = cv2.resize(crop_img, (24, 24)) 109 | predict_24_batch.append(scale_img) 110 | cls_prob, roi_prob = self.Rnet.predict(np.array(predict_24_batch)) 111 | rectangles = pre_p.filter_face_24net(cls_prob, roi_prob, rectangles, origin_w, origin_h, threshold[1]) 112 | if len(rectangles) == 0: 113 | return rectangles 114 | predict_batch = [] 115 | for rectangle in rectangles: 116 | crop_img = copy_img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] 117 | scale_img = cv2.resize(crop_img, (48, 48)) 118 | predict_batch.append(scale_img) 119 | cls_prob, roi_prob, pts_prob = self.Onet.predict(np.array(predict_batch)) 120 | rectangles = pre_p.filter_face_48net(cls_prob, roi_prob, pts_prob, rectangles, origin_w, origin_h, threshold[2]) 121 | return rectangles 122 | 123 | 124 | 125 | 126 | 127 | from functools import partial 128 | 129 | from keras import backend as K 130 | from keras.layers import (Activation, BatchNormalization, Concatenate, Conv2D, 131 | Dense, Dropout, GlobalAveragePooling2D, Input, 132 | Lambda, MaxPooling2D, add) 133 | from keras.models import Model 134 | 135 | 136 | def scaling(x, scale): 137 | # 按比例缩放 138 | return x * scale 139 | 140 | 141 | def _generate_layer_name(name, branch_idx=None, prefix=None): 142 | if prefix is None: 143 | return None 144 | if branch_idx is None: 145 | return '_'.join((prefix, name)) 146 | return '_'.join((prefix, 'Branch', str(branch_idx), name)) 147 | 148 | 149 | def conv2d_bn(x, filters, kernel_size, strides=1, padding='same', activation='relu', use_bias=False, name=None): 150 | # 卷积层 151 | x = Conv2D(filters, 152 | kernel_size, 153 | strides=strides, 154 | padding=padding, 155 | use_bias=use_bias, 156 | name=name)(x) 157 | if not use_bias: 158 | x = BatchNormalization(axis=3, momentum=0.995, epsilon=0.001, 159 | scale=False, name=_generate_layer_name('BatchNorm', prefix=name))(x) 160 | if activation is not None: 161 | x = Activation(activation, name=_generate_layer_name('Activation', prefix=name))(x) 162 | return x 163 | 164 | 165 | def _inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): 166 | # Inception-ResNet块 167 | channel_axis = 3 168 | if block_idx is None: 169 | prefix = None 170 | else: 171 | prefix = '_'.join((block_type, str(block_idx))) 172 | 173 | name_fmt = partial(_generate_layer_name, prefix=prefix) 174 | 175 | if block_type == 'Block35': 176 | branch_0 = conv2d_bn(x, 32, 1, name=name_fmt('Conv2d_1x1', 0)) 177 | branch_1 = conv2d_bn(x, 32, 1, name=name_fmt('Conv2d_0a_1x1', 1)) 178 | branch_1 = conv2d_bn(branch_1, 32, 3, name=name_fmt('Conv2d_0b_3x3', 1)) 179 | branch_2 = conv2d_bn(x, 32, 1, name=name_fmt('Conv2d_0a_1x1', 2)) 180 | branch_2 = conv2d_bn(branch_2, 32, 3, name=name_fmt('Conv2d_0b_3x3', 2)) 181 | branch_2 = conv2d_bn(branch_2, 32, 3, name=name_fmt('Conv2d_0c_3x3', 2)) 182 | branches = [branch_0, branch_1, branch_2] 183 | elif block_type == 'Block17': 184 | branch_0 = conv2d_bn(x, 128, 1, name=name_fmt('Conv2d_1x1', 0)) 185 | branch_1 = conv2d_bn(x, 128, 1, name=name_fmt('Conv2d_0a_1x1', 1)) 186 | branch_1 = conv2d_bn(branch_1, 128, [1, 7], name=name_fmt('Conv2d_0b_1x7', 1)) 187 | branch_1 = conv2d_bn(branch_1, 128, [7, 1], name=name_fmt('Conv2d_0c_7x1', 1)) 188 | branches = [branch_0, branch_1] 189 | elif block_type == 'Block8': 190 | branch_0 = conv2d_bn(x, 192, 1, name=name_fmt('Conv2d_1x1', 0)) 191 | branch_1 = conv2d_bn(x, 192, 1, name=name_fmt('Conv2d_0a_1x1', 1)) 192 | branch_1 = conv2d_bn(branch_1, 192, [1, 3], name=name_fmt('Conv2d_0b_1x3', 1)) 193 | branch_1 = conv2d_bn(branch_1, 192, [3, 1], name=name_fmt('Conv2d_0c_3x1', 1)) 194 | branches = [branch_0, branch_1] 195 | 196 | mixed = Concatenate(axis=channel_axis, name=name_fmt('Concatenate'))(branches) 197 | up = conv2d_bn(mixed, K.int_shape(x)[channel_axis], 1, activation=None, use_bias=True, 198 | name=name_fmt('Conv2d_1x1')) 199 | up = Lambda(scaling, 200 | output_shape=K.int_shape(up)[1:], 201 | arguments={'scale': scale})(up) 202 | x = add([x, up]) 203 | if activation is not None: 204 | x = Activation(activation, name=name_fmt('Activation'))(x) 205 | return x 206 | 207 | 208 | def InceptionResNetV1(input_shape=(160, 160, 3), 209 | classes=128, 210 | dropout_keep_prob=0.8): 211 | # 创建InceptionResNetV1模型 212 | channel_axis = 3 213 | inputs = Input(shape=input_shape) 214 | x = conv2d_bn(inputs, 32, 3, strides=2, padding='valid', name='Conv2d_1a_3x3') 215 | x = conv2d_bn(x, 32, 3, padding='valid', name='Conv2d_2a_3x3') 216 | x = conv2d_bn(x, 64, 3, name='Conv2d_2b_3x3') 217 | x = MaxPooling2D(3, strides=2, name='MaxPool_3a_3x3')(x) 218 | 219 | x = conv2d_bn(x, 80, 1, padding='valid', name='Conv2d_3b_1x1') 220 | x = conv2d_bn(x, 192, 3, padding='valid', name='Conv2d_4a_3x3') 221 | x = conv2d_bn(x, 256, 3, strides=2, padding='valid', name='Conv2d_4b_3x3') 222 | 223 | for block_idx in range(1, 6): 224 | x = _inception_resnet_block(x, scale=0.17, block_type='Block35', block_idx=block_idx) 225 | 226 | name_fmt = partial(_generate_layer_name, prefix='Mixed_6a') 227 | branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid', name=name_fmt('Conv2d_1a_3x3', 0)) 228 | branch_1 = conv2d_bn(x, 192, 1, name=name_fmt('Conv2d_0a_1x1', 1)) 229 | branch_1 = conv2d_bn(branch_1, 192, 3, name=name_fmt('Conv2d_0b_3x3', 1)) 230 | branch_1 = conv2d_bn(branch_1, 256, 3, strides=2, padding='valid', name=name_fmt('Conv2d_1a_3x3', 1)) 231 | branch_pool = MaxPooling2D(3, strides=2, padding='valid', name=name_fmt('MaxPool_1a_3x3', 2))(x) 232 | branches = [branch_0, branch_1, branch_pool] 233 | x = Concatenate(axis=channel_axis, name='Mixed_6a')(branches) 234 | 235 | for block_idx in range(1, 11): 236 | x = _inception_resnet_block(x, 237 | scale=0.1, 238 | block_type='Block17', 239 | block_idx=block_idx) 240 | 241 | name_fmt = partial(_generate_layer_name, prefix='Mixed_7a') 242 | branch_0 = conv2d_bn(x, 256, 1, name=name_fmt('Conv2d_0a_1x1', 0)) 243 | branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid', name=name_fmt('Conv2d_1a_3x3', 0)) 244 | branch_1 = conv2d_bn(x, 256, 1, name=name_fmt('Conv2d_0a_1x1', 1)) 245 | branch_1 = conv2d_bn(branch_1, 256, 3, strides=2, padding='valid', name=name_fmt('Conv2d_1a_3x3', 1)) 246 | branch_2 = conv2d_bn(x, 256, 1, name=name_fmt('Conv2d_0a_1x1', 2)) 247 | branch_2 = conv2d_bn(branch_2, 256, 3, name=name_fmt('Conv2d_0b_3x3', 2)) 248 | branch_2 = conv2d_bn(branch_2, 256, 3, strides=2, padding='valid', name=name_fmt('Conv2d_1a_3x3', 2)) 249 | branch_pool = MaxPooling2D(3, strides=2, padding='valid', name=name_fmt('MaxPool_1a_3x3', 3))(x) 250 | branches = [branch_0, branch_1, branch_2, branch_pool] 251 | x = Concatenate(axis=channel_axis, name='Mixed_7a')(branches) 252 | 253 | for block_idx in range(1, 6): 254 | x = _inception_resnet_block(x, 255 | scale=0.2, 256 | block_type='Block8', 257 | block_idx=block_idx) 258 | x = _inception_resnet_block(x, scale=1., activation=None, block_type='Block8', block_idx=6) 259 | 260 | x = GlobalAveragePooling2D(name='AvgPool')(x) 261 | x = Dropout(1.0 - dropout_keep_prob, name='Dropout')(x) 262 | x = Dense(classes, use_bias=False, name='Bottleneck')(x) 263 | bn_name = _generate_layer_name('BatchNorm', prefix='Bottleneck') 264 | x = BatchNormalization(momentum=0.995, epsilon=0.001, scale=False, 265 | name=bn_name)(x) 266 | 267 | model = Model(inputs, x, name='inception_resnet_v1') 268 | 269 | return model 270 | -------------------------------------------------------------------------------- /face_recognition/pre_process.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | from operator import itemgetter 4 | 5 | import cv2 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | 9 | 10 | #-----------------------------# 11 | # 计算原始输入图像 12 | # 每一次缩放的比例 13 | #-----------------------------# 14 | def calculateScales(img): 15 | pr_scale = 1.0 16 | h,w,_ = img.shape 17 | 18 | #--------------------------------------------# 19 | # 将最大的图像大小进行一个固定 20 | # 如果图像的短边大于500,则将短边固定为500 21 | # 如果图像的长边小于500,则将长边固定为500 22 | #--------------------------------------------# 23 | if min(w,h)>500: 24 | pr_scale = 500.0/min(h,w) 25 | w = int(w*pr_scale) 26 | h = int(h*pr_scale) 27 | elif max(w,h)<500: 28 | pr_scale = 500.0/max(h,w) 29 | w = int(w*pr_scale) 30 | h = int(h*pr_scale) 31 | 32 | #------------------------------------------------# 33 | # 建立图像金字塔的scales,防止图像的宽高小于12 34 | #------------------------------------------------# 35 | scales = [] 36 | factor = 0.709 37 | factor_count = 0 38 | minl = min(h,w) 39 | while minl >= 12: 40 | scales.append(pr_scale*pow(factor, factor_count)) 41 | minl *= factor 42 | factor_count += 1 43 | return scales 44 | 45 | #-----------------------------# 46 | # 将长方形调整为正方形 47 | #-----------------------------# 48 | def rect2square(rectangles): 49 | w = rectangles[:,2] - rectangles[:,0] 50 | h = rectangles[:,3] - rectangles[:,1] 51 | l = np.maximum(w,h).T 52 | rectangles[:,0] = rectangles[:,0] + w*0.5 - l*0.5 53 | rectangles[:,1] = rectangles[:,1] + h*0.5 - l*0.5 54 | rectangles[:,2:4] = rectangles[:,0:2] + np.repeat([l], 2, axis = 0).T 55 | return rectangles 56 | 57 | #-------------------------------------# 58 | # 非极大抑制 59 | #-------------------------------------# 60 | def NMS(rectangles,threshold): 61 | if len(rectangles)==0: 62 | return rectangles 63 | boxes = np.array(rectangles) 64 | x1 = boxes[:,0] 65 | y1 = boxes[:,1] 66 | x2 = boxes[:,2] 67 | y2 = boxes[:,3] 68 | s = boxes[:,4] 69 | area = np.multiply(x2-x1+1, y2-y1+1) 70 | I = np.array(s.argsort()) 71 | pick = [] 72 | while len(I)>0: 73 | xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]]) #I[-1] have hightest prob score, I[0:-1]->others 74 | yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]]) 75 | xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]]) 76 | yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]]) 77 | w = np.maximum(0.0, xx2 - xx1 + 1) 78 | h = np.maximum(0.0, yy2 - yy1 + 1) 79 | inter = w * h 80 | o = inter / (area[I[-1]] + area[I[0:-1]] - inter) 81 | pick.append(I[-1]) 82 | I = I[np.where(o<=threshold)[0]] 83 | result_rectangle = boxes[pick].tolist() 84 | return result_rectangle 85 | 86 | #-------------------------------------# 87 | # 对pnet处理后的结果进行处理 88 | # 为了方便理解,我将代码进行了重构 89 | # 具体代码与视频有较大区别 90 | #-------------------------------------# 91 | def detect_face_12net(cls_prob,roi,out_side,scale,width,height,threshold): 92 | #-------------------------------------# 93 | # 计算特征点之间的步长 94 | #-------------------------------------# 95 | stride = 0 96 | if out_side != 1: 97 | stride = float(2*out_side-1)/(out_side-1) 98 | 99 | #-------------------------------------# 100 | # 获得满足得分门限的特征点的坐标 101 | #-------------------------------------# 102 | (y,x) = np.where(cls_prob >= threshold) 103 | 104 | #-----------------------------------------# 105 | # 获得满足得分门限的特征点得分 106 | # 最终获得的score的shape为:[num_box, 1] 107 | #-------------------------------------------# 108 | score = np.expand_dims(cls_prob[y, x], -1) 109 | 110 | #-------------------------------------------------------# 111 | # 将对应的特征点的坐标转换成位于原图上的先验框的坐标 112 | # 利用回归网络的预测结果对先验框的左上角与右下角进行调整 113 | # 获得对应的粗略预测框 114 | # 最终获得的boundingbox的shape为:[num_box, 4] 115 | #-------------------------------------------------------# 116 | boundingbox = np.concatenate([np.expand_dims(x, -1), np.expand_dims(y, -1)], axis = -1) 117 | top_left = np.fix(stride * boundingbox + 0) 118 | bottom_right = np.fix(stride * boundingbox + 11) 119 | boundingbox = np.concatenate((top_left,bottom_right), axis = -1) 120 | boundingbox = (boundingbox + roi[y, x] * 12.0) * scale 121 | 122 | #-------------------------------------------------------# 123 | # 将预测框和得分进行堆叠,并转换成正方形 124 | # 最终获得的rectangles的shape为:[num_box, 5] 125 | #-------------------------------------------------------# 126 | rectangles = np.concatenate((boundingbox, score), axis = -1) 127 | rectangles = rect2square(rectangles) 128 | 129 | rectangles[:, [1,3]] = np.clip(rectangles[:, [1,3]], 0, height) 130 | rectangles[:, [0,2]] = np.clip(rectangles[:, [0,2]], 0, width) 131 | return rectangles 132 | 133 | #-------------------------------------# 134 | # 对Rnet处理后的结果进行处理 135 | # 为了方便理解,我将代码进行了重构 136 | # 具体代码与视频有较大区别 137 | #-------------------------------------# 138 | def filter_face_24net(cls_prob, roi, rectangles, width, height, threshold): 139 | #-------------------------------------# 140 | # 利用得分进行筛选 141 | #-------------------------------------# 142 | pick = cls_prob[:, 1] >= threshold 143 | 144 | score = cls_prob[pick, 1:2] 145 | rectangles = rectangles[pick, :4] 146 | roi = roi[pick, :] 147 | 148 | #-------------------------------------------------------# 149 | # 利用Rnet网络的预测结果对粗略预测框进行调整 150 | # 最终获得的rectangles的shape为:[num_box, 4] 151 | #-------------------------------------------------------# 152 | w = np.expand_dims(rectangles[:, 2] - rectangles[:, 0], -1) 153 | h = np.expand_dims(rectangles[:, 3] - rectangles[:, 1], -1) 154 | rectangles[:, [0,2]] = rectangles[:, [0,2]] + roi[:, [0,2]] * w 155 | rectangles[:, [1,3]] = rectangles[:, [1,3]] + roi[:, [1,3]] * w 156 | 157 | #-------------------------------------------------------# 158 | # 将预测框和得分进行堆叠,并转换成正方形 159 | # 最终获得的rectangles的shape为:[num_box, 5] 160 | #-------------------------------------------------------# 161 | rectangles = np.concatenate((rectangles,score), axis=-1) 162 | rectangles = rect2square(rectangles) 163 | 164 | rectangles[:, [1,3]] = np.clip(rectangles[:, [1,3]], 0, height) 165 | rectangles[:, [0,2]] = np.clip(rectangles[:, [0,2]], 0, width) 166 | return np.array(NMS(rectangles, 0.7)) 167 | 168 | #-------------------------------------# 169 | # 对onet处理后的结果进行处理 170 | # 为了方便理解,我将代码进行了重构 171 | # 具体代码与视频有较大区别 172 | #-------------------------------------# 173 | def filter_face_48net(cls_prob, roi, pts, rectangles, width, height, threshold): 174 | #-------------------------------------# 175 | # 利用得分进行筛选 176 | #-------------------------------------# 177 | pick = cls_prob[:, 1] >= threshold 178 | 179 | score = cls_prob[pick, 1:2] 180 | rectangles = rectangles[pick, :4] 181 | pts = pts[pick, :] 182 | roi = roi[pick, :] 183 | 184 | w = np.expand_dims(rectangles[:, 2] - rectangles[:, 0], -1) 185 | h = np.expand_dims(rectangles[:, 3] - rectangles[:, 1], -1) 186 | #-------------------------------------------------------# 187 | # 利用Onet网络的预测结果对预测框进行调整 188 | # 通过解码获得人脸关键点与预测框的坐标 189 | # 最终获得的face_marks的shape为:[num_box, 10] 190 | # 最终获得的rectangles的shape为:[num_box, 4] 191 | #-------------------------------------------------------# 192 | face_marks = np.zeros_like(pts) 193 | face_marks[:, [0,2,4,6,8]] = w * pts[:, [0,1,2,3,4]] + rectangles[:, 0:1] 194 | face_marks[:, [1,3,5,7,9]] = h * pts[:, [5,6,7,8,9]] + rectangles[:, 1:2] 195 | rectangles[:, [0,2]] = rectangles[:, [0,2]] + roi[:, [0,2]] * w 196 | rectangles[:, [1,3]] = rectangles[:, [1,3]] + roi[:, [1,3]] * w 197 | #-------------------------------------------------------# 198 | # 将预测框和得分进行堆叠 199 | # 最终获得的rectangles的shape为:[num_box, 15] 200 | #-------------------------------------------------------# 201 | rectangles = np.concatenate((rectangles,score,face_marks),axis=-1) 202 | 203 | rectangles[:, [1,3]] = np.clip(rectangles[:, [1,3]], 0, height) 204 | rectangles[:, [0,2]] = np.clip(rectangles[:, [0,2]], 0, width) 205 | return np.array(NMS(rectangles,0.3)) 206 | 207 | #-------------------------------------# 208 | # 人脸对齐 209 | #-------------------------------------# 210 | def Alignment_1(img,landmark): 211 | if landmark.shape[0]==68: 212 | x = landmark[36,0] - landmark[45,0] 213 | y = landmark[36,1] - landmark[45,1] 214 | elif landmark.shape[0]==5: 215 | x = landmark[0,0] - landmark[1,0] 216 | y = landmark[0,1] - landmark[1,1] 217 | 218 | if x==0: 219 | angle = 0 220 | else: 221 | angle = math.atan(y/x)*180/math.pi 222 | 223 | center = (img.shape[1]//2, img.shape[0]//2) 224 | 225 | RotationMatrix = cv2.getRotationMatrix2D(center, angle, 1) 226 | new_img = cv2.warpAffine(img,RotationMatrix,(img.shape[1],img.shape[0])) 227 | 228 | RotationMatrix = np.array(RotationMatrix) 229 | new_landmark = [] 230 | for i in range(landmark.shape[0]): 231 | pts = [] 232 | pts.append(RotationMatrix[0,0]*landmark[i,0]+RotationMatrix[0,1]*landmark[i,1]+RotationMatrix[0,2]) 233 | pts.append(RotationMatrix[1,0]*landmark[i,0]+RotationMatrix[1,1]*landmark[i,1]+RotationMatrix[1,2]) 234 | new_landmark.append(pts) 235 | 236 | new_landmark = np.array(new_landmark) 237 | 238 | return new_img, new_landmark 239 | 240 | def Alignment_2(img,std_landmark,landmark): 241 | def Transformation(std_landmark,landmark): 242 | std_landmark = np.matrix(std_landmark).astype(np.float64) 243 | landmark = np.matrix(landmark).astype(np.float64) 244 | 245 | c1 = np.mean(std_landmark, axis=0) 246 | c2 = np.mean(landmark, axis=0) 247 | std_landmark -= c1 248 | landmark -= c2 249 | 250 | s1 = np.std(std_landmark) 251 | s2 = np.std(landmark) 252 | std_landmark /= s1 253 | landmark /= s2 254 | 255 | U, S, Vt = np.linalg.svd(std_landmark.T * landmark) 256 | R = (U * Vt).T 257 | 258 | return np.vstack([np.hstack(((s2 / s1) * R, c2.T - (s2 / s1) * R * c1.T)),np.matrix([0., 0., 1.])]) 259 | 260 | Trans_Matrix = Transformation(std_landmark,landmark) # Shape: 3 * 3 261 | Trans_Matrix = Trans_Matrix[:2] 262 | Trans_Matrix = cv2.invertAffineTransform(Trans_Matrix) 263 | new_img = cv2.warpAffine(img,Trans_Matrix,(img.shape[1],img.shape[0])) 264 | 265 | Trans_Matrix = np.array(Trans_Matrix) 266 | new_landmark = [] 267 | for i in range(landmark.shape[0]): 268 | pts = [] 269 | pts.append(Trans_Matrix[0,0]*landmark[i,0]+Trans_Matrix[0,1]*landmark[i,1]+Trans_Matrix[0,2]) 270 | pts.append(Trans_Matrix[1,0]*landmark[i,0]+Trans_Matrix[1,1]*landmark[i,1]+Trans_Matrix[1,2]) 271 | new_landmark.append(pts) 272 | 273 | new_landmark = np.array(new_landmark) 274 | 275 | return new_img, new_landmark 276 | 277 | #---------------------------------# 278 | # 图片预处理 279 | # 高斯归一化 280 | #---------------------------------# 281 | def pre_process(x): 282 | if x.ndim == 4: 283 | axis = (1, 2, 3) 284 | size = x[0].size 285 | elif x.ndim == 3: 286 | axis = (0, 1, 2) 287 | size = x.size 288 | else: 289 | raise ValueError('Dimension should be 3 or 4') 290 | 291 | mean = np.mean(x, axis=axis, keepdims=True) 292 | std = np.std(x, axis=axis, keepdims=True) 293 | std_adj = np.maximum(std, 1.0/np.sqrt(size)) 294 | y = (x - mean) / std_adj 295 | return y 296 | 297 | #---------------------------------# 298 | # l2标准化 299 | #---------------------------------# 300 | def l2_normalize(x, axis=-1, epsilon=1e-10): 301 | output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon)) 302 | return output 303 | 304 | #---------------------------------# 305 | # 计算128特征值 306 | #---------------------------------# 307 | def calc_128_vec(model,img): 308 | face_img = pre_process(img) 309 | pre = model.predict(face_img) 310 | pre = l2_normalize(np.concatenate(pre)) 311 | pre = np.reshape(pre,[128]) 312 | return pre 313 | 314 | #---------------------------------# 315 | # 计算人脸距离 316 | #---------------------------------# 317 | def face_distance(face_encodings, face_to_compare): 318 | if len(face_encodings) == 0: 319 | return np.empty((0)) 320 | return np.linalg.norm(face_encodings - face_to_compare, axis=1) 321 | 322 | #---------------------------------# 323 | # 比较人脸 324 | #---------------------------------# 325 | def compare_faces(known_face_encodings, face_encoding_to_check, tolerance=0.6): 326 | dis = face_distance(known_face_encodings, face_encoding_to_check) 327 | return list(dis <= tolerance) 328 | 329 | -------------------------------------------------------------------------------- /face_recognition/turn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | import face_recognition.pre_process as utils 5 | from face_recognition.network import InceptionResNetV1 6 | from face_recognition.network import mtcnn 7 | 8 | class FaceTurner: 9 | def __init__(self, file_path): 10 | self.file_path = file_path 11 | self.threshold = [0.75, 0.75, 0.75] 12 | dir_path = os.path.dirname(os.path.realpath(__file__)) # 获取当前文件的目录 13 | pnet_model_path = os.path.join(dir_path, 'model_data', 'pnet.h5') # 构建pnet模型文件路径 14 | rnet_model_path = os.path.join(dir_path, 'model_data', 'rnet.h5') # 构建rnet模型文件路径 15 | onet_model_path = os.path.join(dir_path, 'model_data', 'onet.h5') # 构建onet模型文件路径 16 | self.mtcnn_model = mtcnn(pnet_model_path, rnet_model_path, onet_model_path) # 添加模型文件路径 17 | model_path = os.path.join(dir_path, 'model_data', 'facenet_keras.h5') # 构建facenet模型文件路径 18 | self.facenet_model = InceptionResNetV1() 19 | self.facenet_model.load_weights(model_path) 20 | 21 | def turn_face(self): 22 | img = cv2.imread(self.file_path) 23 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 24 | rectangles = self.mtcnn_model.detectFace(img, self.threshold) 25 | 26 | draw = img.copy() 27 | rectangles = utils.rect2square(np.array(rectangles)) 28 | 29 | for rectangle in rectangles: 30 | landmark = np.reshape(rectangle[5:15], (5, 2)) - np.array([int(rectangle[0]), int(rectangle[1])]) 31 | crop_img = img[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] 32 | 33 | # 将抠图后的人脸大小改变到固定尺寸 34 | display_img1 = cv2.resize(cv2.cvtColor(crop_img, cv2.COLOR_RGB2BGR), (400, 400)) 35 | cv2.imshow('旋转前', display_img1) 36 | 37 | crop_img, _ = utils.Alignment_1(crop_img, landmark) 38 | crop_img = cv2.resize(crop_img, (160, 160)) 39 | feature1 = utils.calc_128_vec(self.facenet_model, np.expand_dims(crop_img, 0)) 40 | print(feature1) 41 | 42 | final_img = cv2.cvtColor(crop_img, cv2.COLOR_RGB2BGR) 43 | 44 | # 将旋转后的人脸大小改变到固定尺寸 45 | display_img2 = cv2.resize(final_img, (400, 400)) 46 | cv2.imshow('旋转后', display_img2) 47 | 48 | cv2.waitKey(0) 49 | cv2.destroyAllWindows() 50 | return final_img 51 | -------------------------------------------------------------------------------- /face_recognition/utils/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/CV_SHU_Project/8420d512ec19578df2eeaa1424395e7bf4be2569/face_recognition/utils/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /face_recognition/utils/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/CV_SHU_Project/8420d512ec19578df2eeaa1424395e7bf4be2569/face_recognition/utils/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from tensorflow import keras 3 | import cv2 4 | 5 | xmlfile = cv2.CascadeClassifier("haarcascade_frontalface_alt2.xml") 6 | xmlfile.load('haarcascade_frontalface_alt2.xml') 7 | 8 | # 定义情绪识别类 9 | class emotion_recog(): 10 | def __init__(s): 11 | # 加载预训练模型 12 | s.model = keras.models.load_model('E:/data_sets/model.h5') 13 | s.h=48 14 | s.w=48 15 | s.batch_size=64 16 | # 定义分类标签 17 | s.class_names = ['anger', 'contempt', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise'] 18 | s.predict_class = [] # 初始化predict_class 19 | 20 | # 进行情绪识别的方法 21 | def emotion(s): 22 | try: 23 | # 创建ImageDataGenerator 24 | s.datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255) 25 | # 从文件夹中读取图片 26 | s.generator = s.datagen.flow_from_directory('./predict', target_size=(s.h, s.w), batch_size=s.batch_size, seed=11, shuffle=False, class_mode='categorical') 27 | # 进行情绪预测 28 | s.predict = s.model.predict(s.generator) 29 | print('predict_img:', s.predict) # 打印模型预测结果 30 | s.predict_indices = npy.argmax(s.predict, axis=1) 31 | print('predict_class_indices:', s.predict_indices) # 打印类别指数 32 | s.predict_class = [s.class_names[index] for index in s.predict_indices] 33 | print(s.predict_class) 34 | except Exception as e: 35 | print('Error in emotion method:', e) # 打印异常信息 36 | 37 | # 视频识别方法 38 | def video_recog(s): 39 | s.cap = cv2.VideoCapture(0) 40 | while (1): 41 | ret, picture = s.cap.read() 42 | # 进行人脸及情绪识别 43 | s.recog(picture) 44 | if cv2.waitKey(1) & 0xFF == ord('q'): 45 | break 46 | s.cap.release() 47 | cv2.destroyAllWindows() 48 | 49 | # 人脸及情绪识别方法 50 | def recog(s, picture): 51 | grey = cv2.cvtColor(picture, cv2.COLOR_BGR2GRAY) 52 | xml = cv2.CascadeClassifier("haarcascade_frontalface_alt2.xml") 53 | xml.load('haarcascade_frontalface_alt2.xml') 54 | face = xml.detectMultiScale(grey, scaleFactor=1.2, minNeighbors=3, minSize=(50, 50)) 55 | if len(face): 56 | for Rect in face: 57 | x, y, w, h = Rect 58 | # 画出人脸框 59 | picture=cv2.rectangle(picture, (x, y), (x + h, y + w), (0, 255, 0), 2) 60 | f = cv2.resize(grey[y:(y + h), x:(x + w)], (48, 48)) 61 | cv2.imwrite('./predict/img/1.jpg', f) 62 | # 进行情绪识别 63 | emotion.emotion() 64 | # 将识别结果显示在画面上 65 | cv2.putText(picture, str(emotion.predict_class), (x, y), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), 2) 66 | # 显示画面 67 | cv2.imshow("Image", picture) 68 | # 当输入q时,退出程序 69 | if cv2.waitKey(1) & 0xFF == ord('q'): 70 | sys.exit(0) 71 | 72 | # 图片情绪识别方法 73 | def img_recog(s, img_path): 74 | # 读取图片 75 | picture = cv2.imread(img_path, 1) 76 | # 进行人脸及情绪识别 77 | s.recog(picture) 78 | -------------------------------------------------------------------------------- /show.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import filedialog 3 | from PIL import ImageTk, Image 4 | from my import emotion_discern 5 | 6 | class Application(tk.Frame): 7 | def __init__(self, master=None): 8 | super().__init__(master) 9 | self.master = master 10 | self.pack() 11 | self.create_widgets() 12 | 13 | def create_widgets(self): 14 | self.upload_button = tk.Button(self, text="上传图片", command=self.upload_img) 15 | self.upload_button.pack(side="top") 16 | 17 | self.video_button = tk.Button(self, text="打开摄像头", command=self.open_video) 18 | self.video_button.pack(side="top") 19 | 20 | self.quit = tk.Button(self, text="退出", fg="red", command=self.master.destroy) 21 | self.quit.pack(side="bottom") 22 | 23 | def upload_img(self): 24 | file_path = filedialog.askopenfilename() 25 | if file_path: 26 | emotion = emotion_discern() 27 | emotion_result = emotion.image_discern(file_path) 28 | self.upload_button.config(text=str(emotion_result)) 29 | img = Image.open(file_path) 30 | img = ImageTk.PhotoImage(img) 31 | panel = tk.Label(root, image=img) 32 | panel.image = img 33 | panel.pack(side="bottom", fill="both", expand="yes") 34 | 35 | def open_video(self): 36 | emotion = emotion_discern() 37 | emotion.video_discern() 38 | 39 | 40 | root = tk.Tk() 41 | app = Application(master=root) 42 | app.mainloop() 43 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import numpy as np 5 | import pandas as pd 6 | import matplotlib as mpl 7 | import matplotlib.pyplot as plt 8 | import tensorflow as tf 9 | from tensorflow import keras 10 | import scipy 11 | import scipy.misc as sm 12 | from PIL import Image 13 | import csv 14 | 15 | cls_names=['anger','disgust','fear','happy','normal','sad','surprised'] 16 | train_dir='../my/fer2013/train' 17 | val_dir='../my/fer2013/val' 18 | test_dir='../my/fer2013/test' 19 | h, w, ch=48, 48, 1 20 | bs, n_cls=64, 11 21 | 22 | # 数据增强 23 | train_gen=keras.preprocessing.image.ImageDataGenerator( 24 | rescale=1./255, # 将像素值缩放到0和1之间 25 | rotation_range=20, # 将像素值缩放到0和1之间 26 | width_shift_range=0.3, # 随机水平平移图像的宽度范围(图像宽度的20%) 27 | height_shift_range=0.3, # 随机水平平移图像的宽度范围(图像宽度的20%) 28 | shear_range=0.3, # 随机水平平移图像的宽度范围(图像宽度的20%) 29 | zoom_range=0.3, # 随机水平平移图像的宽度范围(图像宽度的20%) 30 | horizontal_flip=True, # 随机水平平移图像的宽度范围(图像宽度的20%) 31 | fill_mode='nearest' # 填充新创建的像素的方法(使用最近邻像素的值) 32 | ) 33 | train_it=train_gen.flow_from_directory( 34 | train_dir, 35 | target_size=(h,w), 36 | batch_size=bs, 37 | seed=11, 38 | shuffle=True, 39 | class_mode='categorical') 40 | 41 | # 验证 42 | val_gen=keras.preprocessing.image.ImageDataGenerator(rescale=1./255) 43 | val_it=val_gen.flow_from_directory( 44 | val_dir, 45 | target_size=(h,w), 46 | batch_size=bs, 47 | seed=11, 48 | shuffle=False, 49 | class_mode='categorical' 50 | ) 51 | 52 | # 测试 53 | test_gen=keras.preprocessing.image.ImageDataGenerator(rescale=1./255) 54 | test_it=test_gen.flow_from_directory( 55 | test_dir, 56 | target_size=(h,w), 57 | batch_size=bs, 58 | seed=11, 59 | shuffle=False, 60 | class_mode='categorical' 61 | ) 62 | 63 | # 获取训练和验证样本数 64 | train_num=train_it.samples 65 | val_num=val_it.samples 66 | print(train_num, val_num) 67 | 68 | # 构建模型 69 | model = keras.models.Sequential() 70 | model.add(keras.layers.Conv2D(filters=32,kernel_size=3,padding='same',activation='relu',input_shape=(w,h,3))) 71 | model.add(keras.layers.Conv2D(filters=32,kernel_size=3,padding='same',activation='relu')) 72 | model.add(keras.layers.MaxPool2D(pool_size=2)) 73 | 74 | model.add(keras.layers.Conv2D(filters=64,kernel_size=3,padding='same',activation='relu')) 75 | model.add(keras.layers.Conv2D(filters=64,kernel_size=3,padding='same',activation='relu')) 76 | model.add(keras.layers.MaxPool2D(pool_size=2)) 77 | 78 | model.add(keras.layers.Conv2D(filters=64,kernel_size=3,padding='same',activation='relu')) 79 | model.add(keras.layers.Conv2D(filters=64,kernel_size=3,padding='same',activation='relu')) 80 | model.add(keras.layers.MaxPool2D(pool_size=2)) 81 | 82 | model.add(keras.layers.Flatten()) 83 | model.add(keras.layers.Dense(64,activation='relu')) 84 | model.add(keras.layers.Dropout(0.4)) 85 | model.add(keras.layers.Dense(n_cls,activation='softmax')) 86 | 87 | # 编译模型 88 | model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy']) 89 | model.summary() 90 | 91 | # 训练模型 92 | epochs = 40 93 | history=model.fit_generator( 94 | train_it, 95 | steps_per_epoch=train_num//bs, 96 | epochs=epochs, 97 | validation_data=val_it, 98 | validation_steps=val_num//bs 99 | ) 100 | # 保存模型 101 | model.save('model.h5') 102 | --------------------------------------------------------------------------------