├── .gitignore ├── README.md ├── cal_128XVector_user_facenet.py ├── collect_frame_to_csv.py ├── data └── data.csv ├── detect_face.py ├── face_detector_MTcnn.py ├── facenet.py ├── model ├── det1.npy ├── det2.npy └── det3.npy └── realtime_detect_face_and_recognition.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # face-detect-MTcnn-faceNet 2 | 工作平台: 3 | windos 4 | tensorflow 5 | pycharm 6 | opencv 7 | 8 | 总体功能: 9 | 10 | Mtcnn 实现人脸检测 11 | 12 | faceNet : 实现相似度计算 13 | 14 | 15 | 16 | 17 | 18 | 19 | 第一步:实现人脸检测功能(MTcnn) 20 | 最开始的时候使用的是opencv自带的人脸级联分类器进行人脸检测,但是 后来查一下资料说MTcnn能够实现的效果更好 ,所以尝试着用一下。 21 | 22 | 对于MTcnn 框架 是存在 PNet RNet ONet 三个网络架构级联成的,由于电脑渣渣 所以直接下载别人训练好的库 存在mode/文件夹下 23 | 24 | (1)detect_face.py 文件实现了MTcnn人脸检测的相关函数 25 | 26 | 27 | 28 | (2)face_detector_MTcnn.py 是对detect_face.py进行测试:实现了视频流下人脸的检测和定位功能 29 | 30 | 31 | 第二步:利用faceNet 实现两张图片距离向量的计算 32 | 33 | (1)facenet.py 是我直接在网上下载的 文件 他实现了相关函数的处理具体faceNet的实现原理这里就不讲了 可以自行收索资料了解一下 我当时是看了吴恩达的视屏才知道这个方法, 开始的时候我只使用直方图比较 效果很差 所以这个faceNet方法很好 34 | 35 | (2)cal_128XVector_user_facenet.py 文件是根据facenet.py文件里相关函数,计算出两张图片的distance: 36 | 37 | 38 | 1、build_facenet_model(modir='./model/20170512-110547.pb')函数: 是建立faceNet模型用的 由于电脑渣渣加上图片数据难找, 没法实现模型的训练我就下载了一个文件,(20170512-110547.pb) 可以在官网下载 39 | 40 | 41 | 2、cal_128_vector()函数: 就是计算一张图片的distance向量 计算结果会产生[1,128]数组 42 | 43 | 44 | 3、cal_dist()函数: 计算两个数组的方差和 ,是根据这个结果来衡量两张图片的相似度 45 | 46 | 47 | 4、saver_data_to_csv(array,label='lijie2',csv_dir='./data/data.csv'):将采集的图片放到 csv 文件中 label 作为该数据的标签 后面可以通过标签来判别识别的人脸的属性 48 | 49 | 50 | 5、cal_dist_from_csv():计算 实时采集的图片 与 已经存在csv的数据对比 ,输出结果是 该实时图片与CSV文件中相似的 图片的distance 和标签 51 | 52 | 53 | 54 | 第三步:collect_frame_to_csv.py实时检测人脸,采集人脸数据到csv文件中去 也就是存储用户的人脸信息(faceNet + opencv ) 55 | 56 | 具体实现方法 在程序中注释的很详细 可以直接运行使用 57 | 58 | 59 | 60 | 第四步:realtime_detect_face_and_recognition.py 61 | 62 | 把相关的文件传放到必要的位置上可以直接执行 实现人脸的属性检测 并分类 63 | 64 | 65 | 66 | 67 | 备注: 68 | 由于20170512-110547.pb 这个文件大于25M 没法在线上传 ,需要自己下载 放到 model 文件夹下 69 | 70 | (1)将所有的文件放到一个工程中 执行collect_frame_to_csv.py 文件 ,这个就会采集人脸数据 会让你输入标签 标签如果已有 就会覆盖掉以前的数据 71 | 72 | (2)数据采集完后 ,直接执行realtime_detect_face_and_recognition.py 文件 会咨询你是否 采集 数据 如果上一步做了 就 输入n 73 | 74 | 接着会问你 是否进行 detect 输入y 就可以实现人脸检测 和 标注 75 | 如果 检测的人脸在不能与csv文件中匹配 就会显示 others 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /cal_128XVector_user_facenet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import tensorflow as tf 3 | import numpy as np 4 | import cv2 5 | import facenet 6 | import pandas as pd 7 | 8 | def build_facenet_model(modir='./model/20170512-110547.pb'): 9 | tf.Graph().as_default() 10 | sess=tf.Session() 11 | facenet.load_model(modir) 12 | 13 | images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 14 | embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 15 | phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") 16 | 17 | print('facenet embedding模型建立完毕') 18 | return sess, images_placeholder, phase_train_placeholder, embeddings 19 | 20 | #通过facenet模型 计算图片的128向量 21 | def cal_128_vector(frame,sess, images_placeholder, phase_train_placeholder, embeddings): 22 | scaled_reshape=[] 23 | embeddings_size=embeddings.get_shape()[1] 24 | #frame=cv2.imread(frame)#如果fram是路径的话 去掉注释 25 | frame=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) 26 | image=cv2.resize(frame,(200,200)) 27 | image=facenet.prewhiten(image) 28 | scaled_reshape.append(image.reshape(-1,200,200,3)) 29 | array=np.zeros((1,embeddings_size)) 30 | array[0,:]=sess.run(embeddings,feed_dict={images_placeholder: scaled_reshape[0], 31 | phase_train_placeholder: False })[0] 32 | 33 | return array 34 | 35 | def cal_dist(array0,array1): 36 | dist = np.sqrt(np.sum(np.square(array0[0] - array1[0]))) 37 | return dist 38 | 39 | #将128向量保存到csv文件中去 40 | def saver_data_to_csv(array,label='lijie2',csv_dir='./data/data.csv'): 41 | # data1=DataFrame(array,index=None,columns=[label]) 42 | # data1.to_csv(csv_dir) 43 | array=array[0,:] 44 | info=pd.read_csv(csv_dir) 45 | #print(info.shape) 46 | info[label]=array 47 | #print(info.shape) 48 | info.to_csv(csv_dir,index=None) 49 | return info 50 | 51 | #计算两个数组之间的距离 并返回距离最小值 和 对应的标签 52 | def cal_dist_from_csv(csv_dir,array): 53 | array1=array[0,:] 54 | final_column='others'#如果不满足最小距离 返回 others 55 | pre_dist=1 56 | info=pd.read_csv(csv_dir) 57 | #print(info.head(0)) 58 | for i,column in enumerate(info.head(0)): 59 | array2=info[column] 60 | dist=cal_dist(array1,array2) 61 | if dist=12: 311 | scales += [m*np.power(factor, factor_count)] 312 | minl = minl*factor 313 | factor_count += 1 314 | 315 | # first stage 316 | for j in range(len(scales)): 317 | scale=scales[j] 318 | hs=int(np.ceil(h*scale)) 319 | ws=int(np.ceil(w*scale)) 320 | im_data = imresample(img, (hs, ws)) 321 | im_data = (im_data-127.5)*0.0078125 322 | img_x = np.expand_dims(im_data, 0) 323 | img_y = np.transpose(img_x, (0,2,1,3)) 324 | out = pnet(img_y) 325 | out0 = np.transpose(out[0], (0,2,1,3)) 326 | out1 = np.transpose(out[1], (0,2,1,3)) 327 | 328 | boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) 329 | 330 | # inter-scale nms 331 | pick = nms(boxes.copy(), 0.5, 'Union') 332 | if boxes.size>0 and pick.size>0: 333 | boxes = boxes[pick,:] 334 | total_boxes = np.append(total_boxes, boxes, axis=0) 335 | 336 | numbox = total_boxes.shape[0] 337 | if numbox>0: 338 | pick = nms(total_boxes.copy(), 0.7, 'Union') 339 | total_boxes = total_boxes[pick,:] 340 | regw = total_boxes[:,2]-total_boxes[:,0] 341 | regh = total_boxes[:,3]-total_boxes[:,1] 342 | qq1 = total_boxes[:,0]+total_boxes[:,5]*regw 343 | qq2 = total_boxes[:,1]+total_boxes[:,6]*regh 344 | qq3 = total_boxes[:,2]+total_boxes[:,7]*regw 345 | qq4 = total_boxes[:,3]+total_boxes[:,8]*regh 346 | total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) 347 | total_boxes = rerec(total_boxes.copy()) 348 | total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) 349 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 350 | 351 | numbox = total_boxes.shape[0] 352 | if numbox>0: 353 | # second stage 354 | tempimg = np.zeros((24,24,3,numbox)) 355 | for k in range(0,numbox): 356 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 357 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 358 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 359 | tempimg[:,:,:,k] = imresample(tmp, (24, 24)) 360 | else: 361 | return np.empty() 362 | tempimg = (tempimg-127.5)*0.0078125 363 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 364 | out = rnet(tempimg1) 365 | out0 = np.transpose(out[0]) 366 | out1 = np.transpose(out[1]) 367 | score = out1[1,:] 368 | ipass = np.where(score>threshold[1]) 369 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 370 | mv = out0[:,ipass[0]] 371 | if total_boxes.shape[0]>0: 372 | pick = nms(total_boxes, 0.7, 'Union') 373 | total_boxes = total_boxes[pick,:] 374 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) 375 | total_boxes = rerec(total_boxes.copy()) 376 | 377 | numbox = total_boxes.shape[0] 378 | if numbox>0: 379 | # third stage 380 | total_boxes = np.fix(total_boxes).astype(np.int32) 381 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 382 | tempimg = np.zeros((48,48,3,numbox)) 383 | for k in range(0,numbox): 384 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 385 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 386 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 387 | tempimg[:,:,:,k] = imresample(tmp, (48, 48)) 388 | else: 389 | return np.empty() 390 | tempimg = (tempimg-127.5)*0.0078125 391 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 392 | out = onet(tempimg1) 393 | out0 = np.transpose(out[0]) 394 | out1 = np.transpose(out[1]) 395 | out2 = np.transpose(out[2]) 396 | score = out2[1,:] 397 | points = out1 398 | ipass = np.where(score>threshold[2]) 399 | points = points[:,ipass[0]] 400 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 401 | mv = out0[:,ipass[0]] 402 | 403 | w = total_boxes[:,2]-total_boxes[:,0]+1 404 | h = total_boxes[:,3]-total_boxes[:,1]+1 405 | points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 406 | points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 407 | if total_boxes.shape[0]>0: 408 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) 409 | pick = nms(total_boxes.copy(), 0.7, 'Min') 410 | total_boxes = total_boxes[pick,:] 411 | points = points[:,pick] 412 | 413 | return total_boxes, points 414 | 415 | 416 | # function [boundingbox] = bbreg(boundingbox,reg) 417 | def bbreg(boundingbox,reg): 418 | # calibrate bounding boxes 419 | if reg.shape[1]==1: 420 | reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) 421 | 422 | w = boundingbox[:,2]-boundingbox[:,0]+1 423 | h = boundingbox[:,3]-boundingbox[:,1]+1 424 | b1 = boundingbox[:,0]+reg[:,0]*w 425 | b2 = boundingbox[:,1]+reg[:,1]*h 426 | b3 = boundingbox[:,2]+reg[:,2]*w 427 | b4 = boundingbox[:,3]+reg[:,3]*h 428 | boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) 429 | return boundingbox 430 | 431 | def generateBoundingBox(imap, reg, scale, t): 432 | # use heatmap to generate bounding boxes 433 | stride=2 434 | cellsize=12 435 | 436 | imap = np.transpose(imap) 437 | dx1 = np.transpose(reg[:,:,0]) 438 | dy1 = np.transpose(reg[:,:,1]) 439 | dx2 = np.transpose(reg[:,:,2]) 440 | dy2 = np.transpose(reg[:,:,3]) 441 | y, x = np.where(imap >= t) 442 | if y.shape[0]==1: 443 | dx1 = np.flipud(dx1) 444 | dy1 = np.flipud(dy1) 445 | dx2 = np.flipud(dx2) 446 | dy2 = np.flipud(dy2) 447 | score = imap[(y,x)] 448 | reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) 449 | if reg.size==0: 450 | reg = np.empty((0,3)) 451 | bb = np.transpose(np.vstack([y,x])) 452 | q1 = np.fix((stride*bb+1)/scale) 453 | q2 = np.fix((stride*bb+cellsize-1+1)/scale) 454 | boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) 455 | return boundingbox, reg 456 | 457 | # function pick = nms(boxes,threshold,type) 458 | def nms(boxes, threshold, method): 459 | if boxes.size==0: 460 | return np.empty((0,3)) 461 | x1 = boxes[:,0] 462 | y1 = boxes[:,1] 463 | x2 = boxes[:,2] 464 | y2 = boxes[:,3] 465 | s = boxes[:,4] 466 | area = (x2-x1+1) * (y2-y1+1) 467 | I = np.argsort(s) 468 | pick = np.zeros_like(s, dtype=np.int16) 469 | counter = 0 470 | while I.size>0: 471 | i = I[-1] 472 | pick[counter] = i 473 | counter += 1 474 | idx = I[0:-1] 475 | xx1 = np.maximum(x1[i], x1[idx]) 476 | yy1 = np.maximum(y1[i], y1[idx]) 477 | xx2 = np.minimum(x2[i], x2[idx]) 478 | yy2 = np.minimum(y2[i], y2[idx]) 479 | w = np.maximum(0.0, xx2-xx1+1) 480 | h = np.maximum(0.0, yy2-yy1+1) 481 | inter = w * h 482 | if method is 'Min': 483 | o = inter / np.minimum(area[i], area[idx]) 484 | else: 485 | o = inter / (area[i] + area[idx] - inter) 486 | I = I[np.where(o<=threshold)] 487 | pick = pick[0:counter] 488 | return pick 489 | 490 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) 491 | def pad(total_boxes, w, h): 492 | # compute the padding coordinates (pad the bounding boxes to square) 493 | tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) 494 | tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) 495 | numbox = total_boxes.shape[0] 496 | 497 | dx = np.ones((numbox), dtype=np.int32) 498 | dy = np.ones((numbox), dtype=np.int32) 499 | edx = tmpw.copy().astype(np.int32) 500 | edy = tmph.copy().astype(np.int32) 501 | 502 | x = total_boxes[:,0].copy().astype(np.int32) 503 | y = total_boxes[:,1].copy().astype(np.int32) 504 | ex = total_boxes[:,2].copy().astype(np.int32) 505 | ey = total_boxes[:,3].copy().astype(np.int32) 506 | 507 | tmp = np.where(ex>w) 508 | edx[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],0) 509 | ex[tmp] = w 510 | 511 | tmp = np.where(ey>h) 512 | edy[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],0) 513 | ey[tmp] = h 514 | 515 | tmp = np.where(x<1) 516 | dx[tmp] = np.expand_dims(2-x[tmp],0) 517 | x[tmp] = 1 518 | 519 | tmp = np.where(y<1) 520 | dy[tmp] = np.expand_dims(2-y[tmp],0) 521 | y[tmp] = 1 522 | 523 | return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph 524 | 525 | # function [bboxA] = rerec(bboxA) 526 | def rerec(bboxA): 527 | # convert bboxA to square 528 | h = bboxA[:,3]-bboxA[:,1] 529 | w = bboxA[:,2]-bboxA[:,0] 530 | l = np.maximum(w, h) 531 | bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 532 | bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 533 | bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) 534 | return bboxA 535 | 536 | def imresample(img, sz): 537 | im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #pylint: disable=no-member 538 | return im_data 539 | 540 | # This method is kept for debugging purpose 541 | # h=img.shape[0] 542 | # w=img.shape[1] 543 | # hs, ws = sz 544 | # dx = float(w) / ws 545 | # dy = float(h) / hs 546 | # im_data = np.zeros((hs,ws,3)) 547 | # for a1 in range(0,hs): 548 | # for a2 in range(0,ws): 549 | # for a3 in range(0,3): 550 | # im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] 551 | # return im_data 552 | 553 | -------------------------------------------------------------------------------- /face_detector_MTcnn.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import cv2 4 | import detect_face 5 | 6 | 7 | 8 | if __name__=="__main__": 9 | image_size=200 10 | minsize=20 11 | threshold=[0.6,0.7,0.7] 12 | factor = 0.709 # scale factor 13 | print("Creating MTcnn networks and load paramenters..") 14 | #########################build mtcnn######################## 15 | with tf.Graph().as_default(): 16 | sess=tf.Session() 17 | with sess.as_default(): 18 | pnet,rnet,onet=detect_face.create_mtcnn(sess,'./model/') 19 | 20 | capture=cv2.VideoCapture(0) 21 | while(capture.isOpened()): 22 | ret,frame=capture.read() 23 | bounding_box,_=detect_face.detect_face(frame,minsize,pnet,rnet,onet,threshold,factor) 24 | 25 | nb_faces=bounding_box.shape[0]#人脸检测的个数 26 | #标记人脸 27 | for face_position in bounding_box: 28 | rect=face_position.astype(int) 29 | #矩形框 30 | cv2.rectangle(frame,(rect[0],rect[1]),(rect[2],rect[3]),(0,255,255),2,1) 31 | cv2.putText(frame,"faces:%d"%(nb_faces),(10,20),cv2.FONT_HERSHEY_COMPLEX,1,(255, 0, 255), 4) 32 | 33 | 34 | 35 | cv2.imshow('Video',frame) 36 | if cv2.waitKey(1)& 0xff==27: 37 | break 38 | capture.release() 39 | cv2.destroyAllWindows() 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /facenet.py: -------------------------------------------------------------------------------- 1 | """Functions for building the face recognition network. 2 | """ 3 | # MIT License 4 | # 5 | # Copyright (c) 2016 David Sandberg 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | # pylint: disable=missing-docstring 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | import os 31 | from subprocess import Popen, PIPE 32 | import tensorflow as tf 33 | from tensorflow.python.framework import ops 34 | import numpy as np 35 | from scipy import misc 36 | from sklearn.model_selection import KFold 37 | from scipy import interpolate 38 | from tensorflow.python.training import training 39 | import random 40 | import re 41 | from tensorflow.python.platform import gfile 42 | from six import iteritems 43 | 44 | 45 | def triplet_loss(anchor, positive, negative, alpha): 46 | """Calculate the triplet loss according to the FaceNet paper 47 | 48 | Args: 49 | anchor: the embeddings for the anchor images. 50 | positive: the embeddings for the positive images. 51 | negative: the embeddings for the negative images. 52 | 53 | Returns: 54 | the triplet loss according to the FaceNet paper as a float tensor. 55 | """ 56 | with tf.variable_scope('triplet_loss'): 57 | pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1) 58 | neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1) 59 | 60 | basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha) 61 | loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0) 62 | 63 | return loss 64 | 65 | 66 | def decov_loss(xs): 67 | """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf 68 | 'Reducing Overfitting In Deep Networks by Decorrelating Representation' 69 | """ 70 | x = tf.reshape(xs, [int(xs.get_shape()[0]), -1]) 71 | m = tf.reduce_mean(x, 0, True) 72 | z = tf.expand_dims(x - m, 2) 73 | corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0, 2, 1])), 0) 74 | corr_frob_sqr = tf.reduce_sum(tf.square(corr)) 75 | corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr))) 76 | loss = 0.5 * (corr_frob_sqr - corr_diag_sqr) 77 | return loss 78 | 79 | 80 | def center_loss(features, label, alfa, nrof_classes): 81 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" 82 | (http://ydwen.github.io/papers/WenECCV16.pdf) 83 | """ 84 | nrof_features = features.get_shape()[1] 85 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, 86 | initializer=tf.constant_initializer(0), trainable=False) 87 | label = tf.reshape(label, [-1]) 88 | centers_batch = tf.gather(centers, label) 89 | diff = (1 - alfa) * (centers_batch - features) 90 | centers = tf.scatter_sub(centers, label, diff) 91 | loss = tf.reduce_mean(tf.square(features - centers_batch)) 92 | return loss, centers 93 | 94 | 95 | def get_image_paths_and_labels(dataset): 96 | image_paths_flat = [] 97 | labels_flat = [] 98 | for i in range(len(dataset)): 99 | image_paths_flat += dataset[i].image_paths 100 | labels_flat += [i] * len(dataset[i].image_paths) 101 | return image_paths_flat, labels_flat 102 | 103 | 104 | def shuffle_examples(image_paths, labels): 105 | shuffle_list = list(zip(image_paths, labels)) 106 | random.shuffle(shuffle_list) 107 | image_paths_shuff, labels_shuff = zip(*shuffle_list) 108 | return image_paths_shuff, labels_shuff 109 | 110 | 111 | def read_images_from_disk(input_queue): 112 | """Consumes a single filename and label as a ' '-delimited string. 113 | Args: 114 | filename_and_label_tensor: A scalar string tensor. 115 | Returns: 116 | Two tensors: the decoded image, and the string label. 117 | """ 118 | label = input_queue[1] 119 | file_contents = tf.read_file(input_queue[0]) 120 | example = tf.image.decode_image(file_contents, channels=3) 121 | return example, label 122 | 123 | 124 | def random_rotate_image(image): 125 | angle = np.random.uniform(low=-10.0, high=10.0) 126 | return misc.imrotate(image, angle, 'bicubic') 127 | 128 | 129 | def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, 130 | random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True): 131 | images = ops.convert_to_tensor(image_list, dtype=tf.string) 132 | labels = ops.convert_to_tensor(label_list, dtype=tf.int32) 133 | 134 | # Makes an input queue 135 | input_queue = tf.train.slice_input_producer([images, labels], 136 | num_epochs=max_nrof_epochs, shuffle=shuffle) 137 | 138 | images_and_labels = [] 139 | for _ in range(nrof_preprocess_threads): 140 | image, label = read_images_from_disk(input_queue) 141 | if random_rotate: 142 | image = tf.py_func(random_rotate_image, [image], tf.uint8) 143 | if random_crop: 144 | image = tf.random_crop(image, [image_size, image_size, 3]) 145 | else: 146 | image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size) 147 | if random_flip: 148 | image = tf.image.random_flip_left_right(image) 149 | # pylint: disable=no-member 150 | image.set_shape((image_size, image_size, 3)) 151 | image = tf.image.per_image_standardization(image) 152 | images_and_labels.append([image, label]) 153 | 154 | image_batch, label_batch = tf.train.batch_join( 155 | images_and_labels, batch_size=batch_size, 156 | capacity=4 * nrof_preprocess_threads * batch_size, 157 | allow_smaller_final_batch=True) 158 | 159 | return image_batch, label_batch 160 | 161 | 162 | def _add_loss_summaries(total_loss): 163 | """Add summaries for losses. 164 | 165 | Generates moving average for all losses and associated summaries for 166 | visualizing the performance of the network. 167 | 168 | Args: 169 | total_loss: Total loss from loss(). 170 | Returns: 171 | loss_averages_op: op for generating moving averages of losses. 172 | """ 173 | # Compute the moving average of all individual losses and the total loss. 174 | loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') 175 | losses = tf.get_collection('losses') 176 | loss_averages_op = loss_averages.apply(losses + [total_loss]) 177 | 178 | # Attach a scalar summmary to all individual losses and the total loss; do the 179 | # same for the averaged version of the losses. 180 | for l in losses + [total_loss]: 181 | # Name each loss as '(raw)' and name the moving average version of the loss 182 | # as the original loss name. 183 | tf.summary.scalar(l.op.name + ' (raw)', l) 184 | tf.summary.scalar(l.op.name, loss_averages.average(l)) 185 | 186 | return loss_averages_op 187 | 188 | 189 | def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, 190 | log_histograms=True): 191 | # Generate moving averages of all losses and associated summaries. 192 | loss_averages_op = _add_loss_summaries(total_loss) 193 | 194 | # Compute gradients. 195 | with tf.control_dependencies([loss_averages_op]): 196 | if optimizer == 'ADAGRAD': 197 | opt = tf.train.AdagradOptimizer(learning_rate) 198 | elif optimizer == 'ADADELTA': 199 | opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) 200 | elif optimizer == 'ADAM': 201 | opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) 202 | elif optimizer == 'RMSPROP': 203 | opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0) 204 | elif optimizer == 'MOM': 205 | opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True) 206 | else: 207 | raise ValueError('Invalid optimization algorithm') 208 | 209 | grads = opt.compute_gradients(total_loss, update_gradient_vars) 210 | 211 | # Apply gradients. 212 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) 213 | 214 | # Add histograms for trainable variables. 215 | if log_histograms: 216 | for var in tf.trainable_variables(): 217 | tf.summary.histogram(var.op.name, var) 218 | 219 | # Add histograms for gradients. 220 | if log_histograms: 221 | for grad, var in grads: 222 | if grad is not None: 223 | tf.summary.histogram(var.op.name + '/gradients', grad) 224 | 225 | # Track the moving averages of all trainable variables. 226 | variable_averages = tf.train.ExponentialMovingAverage( 227 | moving_average_decay, global_step) 228 | variables_averages_op = variable_averages.apply(tf.trainable_variables()) 229 | 230 | with tf.control_dependencies([apply_gradient_op, variables_averages_op]): 231 | train_op = tf.no_op(name='train') 232 | 233 | return train_op 234 | 235 | 236 | def prewhiten(x): 237 | mean = np.mean(x) 238 | std = np.std(x) 239 | std_adj = np.maximum(std, 1.0 / np.sqrt(x.size)) 240 | y = np.multiply(np.subtract(x, mean), 1 / std_adj) 241 | return y 242 | 243 | 244 | def crop(image, random_crop, image_size): 245 | if image.shape[1] > image_size: 246 | sz1 = int(image.shape[1] // 2) 247 | sz2 = int(image_size // 2) 248 | if random_crop: 249 | diff = sz1 - sz2 250 | (h, v) = (np.random.randint(-diff, diff + 1), np.random.randint(-diff, diff + 1)) 251 | else: 252 | (h, v) = (0, 0) 253 | image = image[(sz1 - sz2 + v):(sz1 + sz2 + v), (sz1 - sz2 + h):(sz1 + sz2 + h), :] 254 | return image 255 | 256 | 257 | def flip(image, random_flip): 258 | if random_flip and np.random.choice([True, False]): 259 | image = np.fliplr(image) 260 | return image 261 | 262 | 263 | def to_rgb(img): 264 | w, h = img.shape 265 | ret = np.empty((w, h, 3), dtype=np.uint8) 266 | ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img 267 | return ret 268 | 269 | 270 | def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True): 271 | nrof_samples = len(image_paths) 272 | images = np.zeros((nrof_samples, image_size, image_size, 3)) 273 | for i in range(nrof_samples): 274 | img = misc.imread(image_paths[i]) 275 | if img.ndim == 2: 276 | img = to_rgb(img) 277 | if do_prewhiten: 278 | img = prewhiten(img) 279 | img = crop(img, do_random_crop, image_size) 280 | img = flip(img, do_random_flip) 281 | images[i, :, :, :] = img 282 | return images 283 | 284 | 285 | def get_label_batch(label_data, batch_size, batch_index): 286 | nrof_examples = np.size(label_data, 0) 287 | j = batch_index * batch_size % nrof_examples 288 | if j + batch_size <= nrof_examples: 289 | batch = label_data[j:j + batch_size] 290 | else: 291 | x1 = label_data[j:nrof_examples] 292 | x2 = label_data[0:nrof_examples - j] 293 | batch = np.vstack([x1, x2]) 294 | batch_int = batch.astype(np.int64) 295 | return batch_int 296 | 297 | 298 | def get_batch(image_data, batch_size, batch_index): 299 | nrof_examples = np.size(image_data, 0) 300 | j = batch_index * batch_size % nrof_examples 301 | if j + batch_size <= nrof_examples: 302 | batch = image_data[j:j + batch_size, :, :, :] 303 | else: 304 | x1 = image_data[j:nrof_examples, :, :, :] 305 | x2 = image_data[0:nrof_examples - j, :, :, :] 306 | batch = np.vstack([x1, x2]) 307 | batch_float = batch.astype(np.float32) 308 | return batch_float 309 | 310 | 311 | def get_triplet_batch(triplets, batch_index, batch_size): 312 | ax, px, nx = triplets 313 | a = get_batch(ax, int(batch_size / 3), batch_index) 314 | p = get_batch(px, int(batch_size / 3), batch_index) 315 | n = get_batch(nx, int(batch_size / 3), batch_index) 316 | batch = np.vstack([a, p, n]) 317 | return batch 318 | 319 | 320 | def get_learning_rate_from_file(filename, epoch): 321 | with open(filename, 'r') as f: 322 | for line in f.readlines(): 323 | line = line.split('#', 1)[0] 324 | if line: 325 | par = line.strip().split(':') 326 | e = int(par[0]) 327 | lr = float(par[1]) 328 | if e <= epoch: 329 | learning_rate = lr 330 | else: 331 | return learning_rate 332 | 333 | 334 | class ImageClass(): 335 | "Stores the paths to images for a given class" 336 | 337 | def __init__(self, name, image_paths): 338 | self.name = name 339 | self.image_paths = image_paths 340 | 341 | def __str__(self): 342 | return self.name + ', ' + str(len(self.image_paths)) + ' images' 343 | 344 | def __len__(self): 345 | return len(self.image_paths) 346 | 347 | 348 | def get_dataset(path, has_class_directories=True): 349 | dataset = [] 350 | path_exp = os.path.expanduser(path) 351 | classes = os.listdir(path_exp) 352 | classes.sort() 353 | nrof_classes = len(classes) 354 | for i in range(nrof_classes): 355 | class_name = classes[i] 356 | facedir = os.path.join(path_exp, class_name) 357 | image_paths = get_image_paths(facedir) 358 | dataset.append(ImageClass(class_name, image_paths)) 359 | 360 | return dataset 361 | 362 | 363 | def get_image_paths(facedir): 364 | image_paths = [] 365 | if os.path.isdir(facedir): 366 | images = os.listdir(facedir) 367 | image_paths = [os.path.join(facedir, img) for img in images] 368 | return image_paths 369 | 370 | 371 | def split_dataset(dataset, split_ratio, mode): 372 | if mode == 'SPLIT_CLASSES': 373 | nrof_classes = len(dataset) 374 | class_indices = np.arange(nrof_classes) 375 | np.random.shuffle(class_indices) 376 | split = int(round(nrof_classes * split_ratio)) 377 | train_set = [dataset[i] for i in class_indices[0:split]] 378 | test_set = [dataset[i] for i in class_indices[split:-1]] 379 | elif mode == 'SPLIT_IMAGES': 380 | train_set = [] 381 | test_set = [] 382 | min_nrof_images = 2 383 | for cls in dataset: 384 | paths = cls.image_paths 385 | np.random.shuffle(paths) 386 | split = int(round(len(paths) * split_ratio)) 387 | if split < min_nrof_images: 388 | continue # Not enough images for test set. Skip class... 389 | train_set.append(ImageClass(cls.name, paths[0:split])) 390 | test_set.append(ImageClass(cls.name, paths[split:-1])) 391 | else: 392 | raise ValueError('Invalid train/test split mode "%s"' % mode) 393 | return train_set, test_set 394 | 395 | 396 | def load_model(model): 397 | # Check if the model is a model directory (containing a metagraph and a checkpoint file) 398 | # or if it is a protobuf file with a frozen graph 399 | model_exp = os.path.expanduser(model) 400 | if (os.path.isfile(model_exp)): 401 | print('Model filename: %s' % model_exp) 402 | with gfile.FastGFile(model_exp, 'rb') as f: 403 | graph_def = tf.GraphDef() 404 | graph_def.ParseFromString(f.read()) 405 | tf.import_graph_def(graph_def, name='') 406 | else: 407 | print('Model directory: %s' % model_exp) 408 | meta_file, ckpt_file = get_model_filenames(model_exp) 409 | 410 | print('Metagraph file: %s' % meta_file) 411 | print('Checkpoint file: %s' % ckpt_file) 412 | 413 | saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file)) 414 | saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file)) 415 | 416 | 417 | def get_model_filenames(model_dir): 418 | files = os.listdir(model_dir) 419 | meta_files = [s for s in files if s.endswith('.meta')] 420 | if len(meta_files) == 0: 421 | raise ValueError('No meta file found in the model directory (%s)' % model_dir) 422 | elif len(meta_files) > 1: 423 | raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir) 424 | meta_file = meta_files[0] 425 | meta_files = [s for s in files if '.ckpt' in s] 426 | max_step = -1 427 | for f in files: 428 | step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f) 429 | if step_str is not None and len(step_str.groups()) >= 2: 430 | step = int(step_str.groups()[1]) 431 | if step > max_step: 432 | max_step = step 433 | ckpt_file = step_str.groups()[0] 434 | return meta_file, ckpt_file 435 | 436 | 437 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10): 438 | assert (embeddings1.shape[0] == embeddings2.shape[0]) 439 | assert (embeddings1.shape[1] == embeddings2.shape[1]) 440 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 441 | nrof_thresholds = len(thresholds) 442 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 443 | 444 | tprs = np.zeros((nrof_folds, nrof_thresholds)) 445 | fprs = np.zeros((nrof_folds, nrof_thresholds)) 446 | accuracy = np.zeros((nrof_folds)) 447 | 448 | diff = np.subtract(embeddings1, embeddings2) 449 | dist = np.sum(np.square(diff), 1) 450 | indices = np.arange(nrof_pairs) 451 | 452 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 453 | 454 | # Find the best threshold for the fold 455 | acc_train = np.zeros((nrof_thresholds)) 456 | for threshold_idx, threshold in enumerate(thresholds): 457 | _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) 458 | best_threshold_index = np.argmax(acc_train) 459 | for threshold_idx, threshold in enumerate(thresholds): 460 | tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold, 461 | dist[test_set], 462 | actual_issame[ 463 | test_set]) 464 | _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], 465 | actual_issame[test_set]) 466 | 467 | tpr = np.mean(tprs, 0) 468 | fpr = np.mean(fprs, 0) 469 | return tpr, fpr, accuracy 470 | 471 | 472 | def calculate_accuracy(threshold, dist, actual_issame): 473 | predict_issame = np.less(dist, threshold) 474 | tp = np.sum(np.logical_and(predict_issame, actual_issame)) 475 | fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 476 | tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) 477 | fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) 478 | 479 | tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn) 480 | fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn) 481 | acc = float(tp + tn) / dist.size 482 | return tpr, fpr, acc 483 | 484 | 485 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10): 486 | assert (embeddings1.shape[0] == embeddings2.shape[0]) 487 | assert (embeddings1.shape[1] == embeddings2.shape[1]) 488 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 489 | nrof_thresholds = len(thresholds) 490 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 491 | 492 | val = np.zeros(nrof_folds) 493 | far = np.zeros(nrof_folds) 494 | 495 | diff = np.subtract(embeddings1, embeddings2) 496 | dist = np.sum(np.square(diff), 1) 497 | indices = np.arange(nrof_pairs) 498 | 499 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 500 | 501 | # Find the threshold that gives FAR = far_target 502 | far_train = np.zeros(nrof_thresholds) 503 | for threshold_idx, threshold in enumerate(thresholds): 504 | _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) 505 | if np.max(far_train) >= far_target: 506 | f = interpolate.interp1d(far_train, thresholds, kind='slinear') 507 | threshold = f(far_target) 508 | else: 509 | threshold = 0.0 510 | 511 | val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) 512 | 513 | val_mean = np.mean(val) 514 | far_mean = np.mean(far) 515 | val_std = np.std(val) 516 | return val_mean, val_std, far_mean 517 | 518 | 519 | def calculate_val_far(threshold, dist, actual_issame): 520 | predict_issame = np.less(dist, threshold) 521 | true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) 522 | false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 523 | n_same = np.sum(actual_issame) 524 | n_diff = np.sum(np.logical_not(actual_issame)) 525 | val = float(true_accept) / float(n_same) 526 | far = float(false_accept) / float(n_diff) 527 | return val, far 528 | 529 | 530 | def store_revision_info(src_path, output_dir, arg_string): 531 | try: 532 | # Get git hash 533 | cmd = ['git', 'rev-parse', 'HEAD'] 534 | gitproc = Popen(cmd, stdout=PIPE, cwd=src_path) 535 | (stdout, _) = gitproc.communicate() 536 | git_hash = stdout.strip() 537 | except OSError as e: 538 | git_hash = ' '.join(cmd) + ': ' + e.strerror 539 | 540 | try: 541 | # Get local changes 542 | cmd = ['git', 'diff', 'HEAD'] 543 | gitproc = Popen(cmd, stdout=PIPE, cwd=src_path) 544 | (stdout, _) = gitproc.communicate() 545 | git_diff = stdout.strip() 546 | except OSError as e: 547 | git_diff = ' '.join(cmd) + ': ' + e.strerror 548 | 549 | # Store a text file in the log directory 550 | rev_info_filename = os.path.join(output_dir, 'revision_info.txt') 551 | with open(rev_info_filename, "w") as text_file: 552 | text_file.write('arguments: %s\n--------------------\n' % arg_string) 553 | text_file.write('tensorflow version: %s\n--------------------\n' % tf.__version__) # @UndefinedVariable 554 | text_file.write('git hash: %s\n--------------------\n' % git_hash) 555 | text_file.write('%s' % git_diff) 556 | 557 | 558 | def list_variables(filename): 559 | reader = training.NewCheckpointReader(filename) 560 | variable_map = reader.get_variable_to_shape_map() 561 | names = sorted(variable_map.keys()) 562 | return names 563 | 564 | 565 | def put_images_on_grid(images, shape=(16, 8)): 566 | nrof_images = images.shape[0] 567 | img_size = images.shape[1] 568 | bw = 3 569 | img = np.zeros((shape[1] * (img_size + bw) + bw, shape[0] * (img_size + bw) + bw, 3), np.float32) 570 | for i in range(shape[1]): 571 | x_start = i * (img_size + bw) + bw 572 | for j in range(shape[0]): 573 | img_index = i * shape[0] + j 574 | if img_index >= nrof_images: 575 | break 576 | y_start = j * (img_size + bw) + bw 577 | img[x_start:x_start + img_size, y_start:y_start + img_size, :] = images[img_index, :, :, :] 578 | if img_index >= nrof_images: 579 | break 580 | return img 581 | 582 | 583 | def write_arguments_to_file(args, filename): 584 | with open(filename, 'w') as f: 585 | for key, value in iteritems(vars(args)): 586 | f.write('%s: %s\n' % (key, str(value))) -------------------------------------------------------------------------------- /model/det1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windylijie/face-detect-MTcnn-faceNet/dee60f8c14b5d5caaab4eb4d8d2d065014df2cc4/model/det1.npy -------------------------------------------------------------------------------- /model/det2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windylijie/face-detect-MTcnn-faceNet/dee60f8c14b5d5caaab4eb4d8d2d065014df2cc4/model/det2.npy -------------------------------------------------------------------------------- /model/det3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/windylijie/face-detect-MTcnn-faceNet/dee60f8c14b5d5caaab4eb4d8d2d065014df2cc4/model/det3.npy -------------------------------------------------------------------------------- /realtime_detect_face_and_recognition.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from collect_frame_to_csv import collect_frame_to_csv 3 | import detect_face 4 | import cv2 5 | from cal_128XVector_user_facenet import cal_128_vector,build_facenet_model,cal_dist_from_csv 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | if __name__=="__main__": 15 | collect_frame_to_csv() 16 | 17 | detection=input("detect or not(y/n):") 18 | if detection=='y': 19 | csv_dir = './data/data.csv'#人脸128向量的数据 20 | # 调用facenet模型 21 | sess1, images_placeholder, phase_train_placeholder, embeddings = build_facenet_model() 22 | 23 | image_size = 200 24 | minsize = 20 25 | threshold = [0.6, 0.7, 0.7] 26 | factor = 0.709 # scale factor 27 | print("Creating MTcnn networks and load paramenters..") 28 | #########################build mtcnn######################## 29 | with tf.Graph().as_default(): 30 | sess = tf.Session() 31 | with sess.as_default(): 32 | pnet, rnet, onet = detect_face.create_mtcnn(sess, './model/') 33 | 34 | capture = cv2.VideoCapture(0) 35 | while (capture.isOpened()): 36 | ret, frame = capture.read() 37 | bounding_box, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) 38 | 39 | nb_faces = bounding_box.shape[0] # 人脸检测的个数 40 | # 标记人脸 41 | for face_position in bounding_box: 42 | rect = face_position.astype(int) 43 | image=frame[rect[1]:rect[3],rect[0]:rect[2]]#截取人脸的ROI区域 44 | array=cal_128_vector(image,sess1, images_placeholder, phase_train_placeholder, embeddings)#计算人脸的128向量 45 | dist,label=cal_dist_from_csv(csv_dir,array) 46 | # 矩形框 47 | cv2.rectangle(frame, (rect[0], rect[1]), (rect[2], rect[3]), (0, 255, 255), 2, 1) 48 | cv2.putText(frame, "faces:%d" % (nb_faces), (10, 20), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 255), 4) 49 | cv2.putText(frame, '%.2f' % (dist), (rect[0], rect[1] - 30), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 255), 4) 50 | cv2.putText(frame, label, (rect[0], rect[1] ), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 255), 4) 51 | 52 | cv2.imshow('Video', frame) 53 | if cv2.waitKey(1) & 0xff == 27: 54 | break 55 | capture.release() 56 | cv2.destroyAllWindows() 57 | else: 58 | print('The End...') 59 | --------------------------------------------------------------------------------