├── README.md ├── README.md~ ├── img ├── 1.jpg ├── 2.jpg ├── 2007_007763.jpg ├── 2008_001009.jpg ├── 2008_001322.jpg ├── 2008_002079.jpg ├── 2008_002470.jpg ├── 2008_002506.jpg ├── 2008_004176.jpg ├── 2008_007676.jpg ├── 2009_004587.jpg ├── 3.jpg ├── 4.jpg ├── 5.jpg ├── 6.jpg ├── 7.jpg ├── image_003_1.jpg ├── image_004_1.jpg ├── image_019_1.jpg ├── image_020_1.jpg └── image_09.jpg ├── landmarkPredict.py ├── landmarkPredict_webcam.py ├── model ├── VGG_mean.binaryproto ├── deploy.prototxt └── deploy.prototxt~ ├── result ├── 1.png ├── 2.png └── 3.png ├── testList.txt └── train ├── deploy.prototxt ├── solver.prototxt └── train_val.prototxt /README.md: -------------------------------------------------------------------------------- 1 | # face-landmark-localization 2 | This is a project predict face landmarks (68 points) and head pose (3d pose, yaw,roll,pitch). 3 | 4 | 5 | ## Install 6 | - [caffe](https://github.com/BVLC/caffe) 7 | - [dlib face detector](http://dlib.net/)

8 | you can down [dlib18.17](http://pan.baidu.com/s/1gey9Wd1)

9 | cd your dlib folder

10 | cd python_example

11 | ./compile_dlib_python_module.bat

12 | add dlib.so to the python path

13 | if using dlib18.18, you can follow the [official instruction](http://dlib.net/) 14 | - opencv

15 | 16 | ## Usage 17 | 18 | - Command : python landmarkPredict.py predictImage testList.txt

19 | (testList.txt is a file contain the path of the images.) 20 | 21 | 22 | ## Model 23 | 24 | - You can download the pre-trained model from [dropbox](https://www.dropbox.com/s/z89prwhg0jpgp47/68point_dlib_with_pose.caffemodel?dl=0) or [baidu yun](http://pan.baidu.com/s/1c14aFyK) 25 | 26 | ## Result 27 | ![](result/1.png) 28 | ![](result/2.png) 29 | ![](result/3.png) 30 | -------------------------------------------------------------------------------- /README.md~: -------------------------------------------------------------------------------- 1 | # face-landmark-localization 2 | This is a project predict face landmarks (68 points) and head pose (3d pose, yaw,roll,pitch). 3 | 4 | 5 | ## Install 6 | - [caffe](https://github.com/BVLC/caffe) 7 | - [dlib face detector](http://dlib.net/)

8 | 9 | cd your dlib folder

10 | cd python_example

11 | ./compile_dlib_python_module.bat

12 | add dlib.so to the python path

13 | - opencv

14 | ## Usage 15 | 16 | - Command : python landmarkPredict.py predictImage testList.txt

17 | (testList.txt is a file contain the path of the images.) 18 | 19 | 20 | ## Model 21 | 22 | - You can download the pre-trained model from [here](http://pan.baidu.com/s/1mhf274c) 23 | 24 | ## Result 25 | ![](result/1.png) 26 | ![](result/2.png) 27 | ![](result/3.png) 28 | -------------------------------------------------------------------------------- /img/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/1.jpg -------------------------------------------------------------------------------- /img/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2.jpg -------------------------------------------------------------------------------- /img/2007_007763.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2007_007763.jpg -------------------------------------------------------------------------------- /img/2008_001009.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_001009.jpg -------------------------------------------------------------------------------- /img/2008_001322.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_001322.jpg -------------------------------------------------------------------------------- /img/2008_002079.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_002079.jpg -------------------------------------------------------------------------------- /img/2008_002470.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_002470.jpg -------------------------------------------------------------------------------- /img/2008_002506.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_002506.jpg -------------------------------------------------------------------------------- /img/2008_004176.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_004176.jpg -------------------------------------------------------------------------------- /img/2008_007676.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_007676.jpg -------------------------------------------------------------------------------- /img/2009_004587.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2009_004587.jpg -------------------------------------------------------------------------------- /img/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/3.jpg -------------------------------------------------------------------------------- /img/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/4.jpg -------------------------------------------------------------------------------- /img/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/5.jpg -------------------------------------------------------------------------------- /img/6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/6.jpg -------------------------------------------------------------------------------- /img/7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/7.jpg -------------------------------------------------------------------------------- /img/image_003_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/image_003_1.jpg -------------------------------------------------------------------------------- /img/image_004_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/image_004_1.jpg -------------------------------------------------------------------------------- /img/image_019_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/image_019_1.jpg -------------------------------------------------------------------------------- /img/image_020_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/image_020_1.jpg -------------------------------------------------------------------------------- /img/image_09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/image_09.jpg -------------------------------------------------------------------------------- /landmarkPredict.py: -------------------------------------------------------------------------------- 1 | #usage :python landmarkPredict.py predictImage testList.txt 2 | 3 | import os 4 | import sys 5 | import numpy as np 6 | import cv2 7 | import caffe 8 | import dlib 9 | import matplotlib.pyplot as plt 10 | 11 | system_height = 650 12 | system_width = 1280 13 | channels = 1 14 | test_num = 1 15 | pointNum = 68 16 | 17 | S0_width = 60 18 | S0_height = 60 19 | vgg_height = 224 20 | vgg_width = 224 21 | M_left = -0.15 22 | M_right = +1.15 23 | M_top = -0.10 24 | M_bottom = +1.25 25 | pose_name = ['Pitch', 'Yaw', 'Roll'] # respect to ['head down','out of plane left','in plane right'] 26 | 27 | def recover_coordinate(largetBBox, facepoint, width, height): 28 | point = np.zeros(np.shape(facepoint)) 29 | cut_width = largetBBox[1] - largetBBox[0] 30 | cut_height = largetBBox[3] - largetBBox[2] 31 | scale_x = cut_width*1.0/width; 32 | scale_y = cut_height*1.0/height; 33 | point[0::2]=[float(j * scale_x + largetBBox[0]) for j in facepoint[0::2]] 34 | point[1::2]=[float(j * scale_y + largetBBox[2]) for j in facepoint[1::2]] 35 | return point 36 | 37 | def show_image(img, facepoint, bboxs, headpose): 38 | plt.figure(figsize=(20,10)) 39 | for faceNum in range(0,facepoint.shape[0]): 40 | cv2.rectangle(img, (int(bboxs[faceNum,0]), int(bboxs[faceNum,2])), (int(bboxs[faceNum,1]), int(bboxs[faceNum,3])), (0,0,255), 2) 41 | for p in range(0,3): 42 | plt.text(int(bboxs[faceNum,0]), int(bboxs[faceNum,2])-p*30, 43 | '{:s} {:.2f}'.format(pose_name[p], headpose[faceNum,p]), 44 | bbox=dict(facecolor='blue', alpha=0.5), 45 | fontsize=12, color='white') 46 | for i in range(0,facepoint.shape[1]/2): 47 | cv2.circle(img,(int(round(facepoint[faceNum,i*2])),int(round(facepoint[faceNum,i*2+1]))),1,(0,255,0),2) 48 | height = img.shape[0] 49 | width = img.shape[1] 50 | if height > system_height or width > system_width: 51 | height_radius = system_height*1.0/height 52 | width_radius = system_width*1.0/width 53 | radius = min(height_radius,width_radius) 54 | img = cv2.resize(img, (0,0), fx=radius, fy=radius) 55 | 56 | img = img[:,:,[2,1,0]] 57 | plt.imshow(img) 58 | plt.show() 59 | 60 | 61 | def recoverPart(point,bbox,left,right,top,bottom,img_height,img_width,height,width): 62 | largeBBox = getCutSize(bbox,left,right,top,bottom) 63 | retiBBox = retifyBBoxSize(img_height,img_width,largeBBox) 64 | recover = recover_coordinate(retiBBox,point,height,width) 65 | recover=recover.astype('float32') 66 | return recover 67 | 68 | 69 | def getRGBTestPart(bbox,left,right,top,bottom,img,height,width): 70 | largeBBox = getCutSize(bbox,left,right,top,bottom) 71 | retiBBox = retifyBBox(img,largeBBox) 72 | # cv2.rectangle(img, (int(retiBBox[0]), int(retiBBox[2])), (int(retiBBox[1]), int(retiBBox[3])), (0,0,255), 2) 73 | # cv2.imshow('f',img) 74 | # cv2.waitKey(0) 75 | face = img[int(retiBBox[2]):int(retiBBox[3]), int(retiBBox[0]):int(retiBBox[1]), :] 76 | face = cv2.resize(face,(height,width),interpolation = cv2.INTER_AREA) 77 | face=face.astype('float32') 78 | return face 79 | 80 | def batchRecoverPart(predictPoint,totalBBox,totalSize,left,right,top,bottom,height,width): 81 | recoverPoint = np.zeros(predictPoint.shape) 82 | for i in range(0,predictPoint.shape[0]): 83 | recoverPoint[i] = recoverPart(predictPoint[i],totalBBox[i],left,right,top,bottom,totalSize[i,0],totalSize[i,1],height,width) 84 | return recoverPoint 85 | 86 | 87 | 88 | def retifyBBox(img,bbox): 89 | img_height = np.shape(img)[0] - 1 90 | img_width = np.shape(img)[1] - 1 91 | if bbox[0] <0: 92 | bbox[0] = 0 93 | if bbox[1] <0: 94 | bbox[1] = 0 95 | if bbox[2] <0: 96 | bbox[2] = 0 97 | if bbox[3] <0: 98 | bbox[3] = 0 99 | if bbox[0] > img_width: 100 | bbox[0] = img_width 101 | if bbox[1] > img_width: 102 | bbox[1] = img_width 103 | if bbox[2] > img_height: 104 | bbox[2] = img_height 105 | if bbox[3] > img_height: 106 | bbox[3] = img_height 107 | return bbox 108 | 109 | def retifyBBoxSize(img_height,img_width,bbox): 110 | if bbox[0] <0: 111 | bbox[0] = 0 112 | if bbox[1] <0: 113 | bbox[1] = 0 114 | if bbox[2] <0: 115 | bbox[2] = 0 116 | if bbox[3] <0: 117 | bbox[3] = 0 118 | if bbox[0] > img_width: 119 | bbox[0] = img_width 120 | if bbox[1] > img_width: 121 | bbox[1] = img_width 122 | if bbox[2] > img_height: 123 | bbox[2] = img_height 124 | if bbox[3] > img_height: 125 | bbox[3] = img_height 126 | return bbox 127 | 128 | def getCutSize(bbox,left,right,top,bottom): #left, right, top, and bottom 129 | 130 | box_width = bbox[1] - bbox[0] 131 | box_height = bbox[3] - bbox[2] 132 | cut_size=np.zeros((4)) 133 | cut_size[0] = bbox[0] + left * box_width 134 | cut_size[1] = bbox[1] + (right - 1) * box_width 135 | cut_size[2] = bbox[2] + top * box_height 136 | cut_size[3] = bbox[3] + (bottom-1) * box_height 137 | return cut_size 138 | 139 | 140 | def detectFace(img): 141 | detector = dlib.get_frontal_face_detector() 142 | dets = detector(img,1) 143 | bboxs = np.zeros((len(dets),4)) 144 | for i, d in enumerate(dets): 145 | bboxs[i,0] = d.left(); 146 | bboxs[i,1] = d.right(); 147 | bboxs[i,2] = d.top(); 148 | bboxs[i,3] = d.bottom(); 149 | return bboxs; 150 | 151 | 152 | def predictImage(filename): 153 | vgg_point_MODEL_FILE = 'model/deploy.prototxt' 154 | vgg_point_PRETRAINED = 'model/68point_dlib_with_pose.caffemodel' 155 | mean_filename='model/VGG_mean.binaryproto' 156 | vgg_point_net=caffe.Net(vgg_point_MODEL_FILE,vgg_point_PRETRAINED,caffe.TEST) 157 | # caffe.set_mode_cpu() 158 | caffe.set_mode_gpu() 159 | caffe.set_device(0) 160 | f = open(filename) 161 | line = f.readline() 162 | index = 0 163 | proto_data = open(mean_filename, "rb").read() 164 | a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data) 165 | mean = caffe.io.blobproto_to_array(a)[0] 166 | 167 | while line: 168 | print index 169 | line = line.strip() 170 | info = line.split(' ') 171 | imgPath = info[0] 172 | print imgPath 173 | num = 1 174 | colorImage = cv2.imread(imgPath) 175 | bboxs = detectFace(colorImage) 176 | faceNum = bboxs.shape[0] 177 | faces = np.zeros((1,3,vgg_height,vgg_width)) 178 | predictpoints = np.zeros((faceNum,pointNum*2)) 179 | predictpose = np.zeros((faceNum,3)) 180 | imgsize = np.zeros((2)) 181 | imgsize[0] = colorImage.shape[0]-1 182 | imgsize[1] = colorImage.shape[1]-1 183 | TotalSize = np.zeros((faceNum,2)) 184 | for i in range(0,faceNum): 185 | TotalSize[i] = imgsize 186 | for i in range(0,faceNum): 187 | bbox = bboxs[i] 188 | colorface = getRGBTestPart(bbox,M_left,M_right,M_top,M_bottom,colorImage,vgg_height,vgg_width) 189 | normalface = np.zeros(mean.shape) 190 | normalface[0] = colorface[:,:,0] 191 | normalface[1] = colorface[:,:,1] 192 | normalface[2] = colorface[:,:,2] 193 | normalface = normalface - mean 194 | faces[0] = normalface 195 | 196 | blobName = '68point' 197 | data4DL = np.zeros([faces.shape[0],1,1,1]) 198 | vgg_point_net.set_input_arrays(faces.astype(np.float32),data4DL.astype(np.float32)) 199 | vgg_point_net.forward() 200 | predictpoints[i] = vgg_point_net.blobs[blobName].data[0] 201 | 202 | blobName = 'poselayer' 203 | pose_prediction = vgg_point_net.blobs[blobName].data 204 | predictpose[i] = pose_prediction * 50 205 | 206 | predictpoints = predictpoints * vgg_height/2 + vgg_width/2 207 | level1Point = batchRecoverPart(predictpoints,bboxs,TotalSize,M_left,M_right,M_top,M_bottom,vgg_height,vgg_width) 208 | 209 | show_image(colorImage, level1Point, bboxs, predictpose) 210 | line = f.readline() 211 | index = index + 1 212 | 213 | 214 | if __name__ == '__main__': 215 | if len(sys.argv) < 2: 216 | print(__doc__) 217 | else: 218 | func = globals()[sys.argv[1]] 219 | func(*sys.argv[2:]) -------------------------------------------------------------------------------- /landmarkPredict_webcam.py: -------------------------------------------------------------------------------- 1 | #usage :python landmarkPredict.py predictImage testList.txt 2 | 3 | import os 4 | import sys 5 | import numpy as np 6 | import cv2 7 | import caffe 8 | import dlib 9 | import matplotlib.pyplot as plt 10 | 11 | system_height = 650 12 | system_width = 1280 13 | channels = 1 14 | test_num = 1 15 | pointNum = 68 16 | 17 | S0_width = 60 18 | S0_height = 60 19 | vgg_height = 224 20 | vgg_width = 224 21 | M_left = -0.15 22 | M_right = +1.15 23 | M_top = -0.10 24 | M_bottom = +1.25 25 | pose_name = ['Pitch', 'Yaw', 'Roll'] # respect to ['head down','out of plane left','in plane right'] 26 | 27 | def recover_coordinate(largetBBox, facepoint, width, height): 28 | point = np.zeros(np.shape(facepoint)) 29 | cut_width = largetBBox[1] - largetBBox[0] 30 | cut_height = largetBBox[3] - largetBBox[2] 31 | scale_x = cut_width*1.0/width; 32 | scale_y = cut_height*1.0/height; 33 | point[0::2]=[float(j * scale_x + largetBBox[0]) for j in facepoint[0::2]] 34 | point[1::2]=[float(j * scale_y + largetBBox[2]) for j in facepoint[1::2]] 35 | return point 36 | 37 | def show_image(img, facepoint, bboxs, headpose): 38 | plt.figure(figsize=(20,10)) 39 | for faceNum in range(0,facepoint.shape[0]): 40 | cv2.rectangle(img, (int(bboxs[faceNum,0]), int(bboxs[faceNum,2])), (int(bboxs[faceNum,1]), int(bboxs[faceNum,3])), (0,0,255), 2) 41 | for p in range(0,3): 42 | plt.text(int(bboxs[faceNum,0]), int(bboxs[faceNum,2])-p*30, 43 | '{:s} {:.2f}'.format(pose_name[p], headpose[faceNum,p]), 44 | bbox=dict(facecolor='blue', alpha=0.5), 45 | fontsize=12, color='white') 46 | for i in range(0,facepoint.shape[1]/2): 47 | cv2.circle(img,(int(round(facepoint[faceNum,i*2])),int(round(facepoint[faceNum,i*2+1]))),1,(0,255,0),2) 48 | height = img.shape[0] 49 | width = img.shape[1] 50 | if height > system_height or width > system_width: 51 | height_radius = system_height*1.0/height 52 | width_radius = system_width*1.0/width 53 | radius = min(height_radius,width_radius) 54 | img = cv2.resize(img, (0,0), fx=radius, fy=radius) 55 | 56 | img = img[:,:,[2,1,0]] 57 | cv2.imshow('frame',img) 58 | 59 | 60 | 61 | def recoverPart(point,bbox,left,right,top,bottom,img_height,img_width,height,width): 62 | largeBBox = getCutSize(bbox,left,right,top,bottom) 63 | retiBBox = retifyBBoxSize(img_height,img_width,largeBBox) 64 | recover = recover_coordinate(retiBBox,point,height,width) 65 | recover=recover.astype('float32') 66 | return recover 67 | 68 | 69 | def getRGBTestPart(bbox,left,right,top,bottom,img,height,width): 70 | largeBBox = getCutSize(bbox,left,right,top,bottom) 71 | retiBBox = retifyBBox(img,largeBBox) 72 | # cv2.rectangle(img, (int(retiBBox[0]), int(retiBBox[2])), (int(retiBBox[1]), int(retiBBox[3])), (0,0,255), 2) 73 | # cv2.imshow('f',img) 74 | # cv2.waitKey(0) 75 | face = img[int(retiBBox[2]):int(retiBBox[3]), int(retiBBox[0]):int(retiBBox[1]), :] 76 | face = cv2.resize(face,(height,width),interpolation = cv2.INTER_AREA) 77 | face=face.astype('float32') 78 | return face 79 | 80 | def batchRecoverPart(predictPoint,totalBBox,totalSize,left,right,top,bottom,height,width): 81 | recoverPoint = np.zeros(predictPoint.shape) 82 | for i in range(0,predictPoint.shape[0]): 83 | recoverPoint[i] = recoverPart(predictPoint[i],totalBBox[i],left,right,top,bottom,totalSize[i,0],totalSize[i,1],height,width) 84 | return recoverPoint 85 | 86 | 87 | 88 | def retifyBBox(img,bbox): 89 | img_height = np.shape(img)[0] - 1 90 | img_width = np.shape(img)[1] - 1 91 | if bbox[0] <0: 92 | bbox[0] = 0 93 | if bbox[1] <0: 94 | bbox[1] = 0 95 | if bbox[2] <0: 96 | bbox[2] = 0 97 | if bbox[3] <0: 98 | bbox[3] = 0 99 | if bbox[0] > img_width: 100 | bbox[0] = img_width 101 | if bbox[1] > img_width: 102 | bbox[1] = img_width 103 | if bbox[2] > img_height: 104 | bbox[2] = img_height 105 | if bbox[3] > img_height: 106 | bbox[3] = img_height 107 | return bbox 108 | 109 | def retifyBBoxSize(img_height,img_width,bbox): 110 | if bbox[0] <0: 111 | bbox[0] = 0 112 | if bbox[1] <0: 113 | bbox[1] = 0 114 | if bbox[2] <0: 115 | bbox[2] = 0 116 | if bbox[3] <0: 117 | bbox[3] = 0 118 | if bbox[0] > img_width: 119 | bbox[0] = img_width 120 | if bbox[1] > img_width: 121 | bbox[1] = img_width 122 | if bbox[2] > img_height: 123 | bbox[2] = img_height 124 | if bbox[3] > img_height: 125 | bbox[3] = img_height 126 | return bbox 127 | 128 | def getCutSize(bbox,left,right,top,bottom): #left, right, top, and bottom 129 | 130 | box_width = bbox[1] - bbox[0] 131 | box_height = bbox[3] - bbox[2] 132 | cut_size=np.zeros((4)) 133 | cut_size[0] = bbox[0] + left * box_width 134 | cut_size[1] = bbox[1] + (right - 1) * box_width 135 | cut_size[2] = bbox[2] + top * box_height 136 | cut_size[3] = bbox[3] + (bottom-1) * box_height 137 | return cut_size 138 | 139 | 140 | def detectFace(img): 141 | detector = dlib.get_frontal_face_detector() 142 | dets = detector(img,1) 143 | bboxs = np.zeros((len(dets),4)) 144 | for i, d in enumerate(dets): 145 | bboxs[i,0] = d.left(); 146 | bboxs[i,1] = d.right(); 147 | bboxs[i,2] = d.top(); 148 | bboxs[i,3] = d.bottom(); 149 | return bboxs; 150 | 151 | 152 | def predict_image_webcam(): 153 | vgg_point_MODEL_FILE = 'model/deploy.prototxt' 154 | vgg_point_PRETRAINED = 'model/68point_dlib_with_pose.caffemodel' 155 | mean_filename='model/VGG_mean.binaryproto' 156 | vgg_point_net=caffe.Net(vgg_point_MODEL_FILE,vgg_point_PRETRAINED,caffe.TEST) 157 | # caffe.set_mode_cpu() 158 | caffe.set_mode_gpu() 159 | caffe.set_device(0) 160 | proto_data = open(mean_filename, "rb").read() 161 | a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data) 162 | mean = caffe.io.blobproto_to_array(a)[0] 163 | cap = cv2.VideoCapture(0) 164 | 165 | while True: 166 | ret, colorImage = cap.read() 167 | bboxs = detectFace(colorImage) 168 | faceNum = bboxs.shape[0] 169 | faces = np.zeros((1,3,vgg_height,vgg_width)) 170 | predictpoints = np.zeros((faceNum,pointNum*2)) 171 | predictpose = np.zeros((faceNum,3)) 172 | imgsize = np.zeros((2)) 173 | imgsize[0] = colorImage.shape[0]-1 174 | imgsize[1] = colorImage.shape[1]-1 175 | TotalSize = np.zeros((faceNum,2)) 176 | for i in range(0,faceNum): 177 | TotalSize[i] = imgsize 178 | for i in range(0,faceNum): 179 | bbox = bboxs[i] 180 | colorface = getRGBTestPart(bbox,M_left,M_right,M_top,M_bottom,colorImage,vgg_height,vgg_width) 181 | normalface = np.zeros(mean.shape) 182 | normalface[0] = colorface[:,:,0] 183 | normalface[1] = colorface[:,:,1] 184 | normalface[2] = colorface[:,:,2] 185 | normalface = normalface - mean 186 | faces[0] = normalface 187 | 188 | blobName = '68point' 189 | data4DL = np.zeros([faces.shape[0],1,1,1]) 190 | vgg_point_net.set_input_arrays(faces.astype(np.float32),data4DL.astype(np.float32)) 191 | vgg_point_net.forward() 192 | predictpoints[i] = vgg_point_net.blobs[blobName].data[0] 193 | 194 | blobName = 'poselayer' 195 | pose_prediction = vgg_point_net.blobs[blobName].data 196 | predictpose[i] = pose_prediction * 50 197 | 198 | predictpoints = predictpoints * vgg_height/2 + vgg_width/2 199 | level1Point = batchRecoverPart(predictpoints,bboxs,TotalSize,M_left,M_right,M_top,M_bottom,vgg_height,vgg_width) 200 | 201 | show_image(colorImage, level1Point, bboxs, predictpose) 202 | if cv2.waitKey(1) & 0xFF == ord('q'): 203 | break 204 | 205 | if __name__ == '__main__': 206 | predict_image_webcam() 207 | -------------------------------------------------------------------------------- /model/VGG_mean.binaryproto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/model/VGG_mean.binaryproto -------------------------------------------------------------------------------- /model/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "dlib_vgg" 2 | layers { 3 | name: "data" 4 | type: MEMORY_DATA 5 | top: "data" 6 | top: "label" 7 | memory_data_param { 8 | batch_size: 1 #batch size, so how many prediction youu want to do at once. Best is "1", but higher number get better performance 9 | channels: 3 10 | height: 224 11 | width: 224 12 | 13 | } 14 | } 15 | 16 | 17 | layers { 18 | bottom: "data" 19 | top: "conv1" 20 | name: "conv1" 21 | type: CONVOLUTION 22 | convolution_param { 23 | num_output: 96 24 | kernel_size: 7 25 | stride: 2 26 | } 27 | } 28 | layers { 29 | bottom: "conv1" 30 | top: "conv1" 31 | name: "relu1" 32 | type: RELU 33 | } 34 | layers { 35 | bottom: "conv1" 36 | top: "norm1" 37 | name: "norm1" 38 | type: LRN 39 | lrn_param { 40 | local_size: 5 41 | alpha: 0.0005 42 | beta: 0.75 43 | k: 2 44 | } 45 | } 46 | layers { 47 | bottom: "norm1" 48 | top: "pool1" 49 | name: "pool1" 50 | type: POOLING 51 | pooling_param { 52 | pool: MAX 53 | kernel_size: 3 54 | stride: 3 55 | } 56 | } 57 | layers { 58 | bottom: "pool1" 59 | top: "conv2" 60 | name: "conv2" 61 | type: CONVOLUTION 62 | convolution_param { 63 | num_output: 256 64 | kernel_size: 5 65 | } 66 | } 67 | layers { 68 | bottom: "conv2" 69 | top: "conv2" 70 | name: "relu2" 71 | type: RELU 72 | } 73 | layers { 74 | bottom: "conv2" 75 | top: "pool2" 76 | name: "pool2" 77 | type: POOLING 78 | pooling_param { 79 | pool: MAX 80 | kernel_size: 2 81 | stride: 2 82 | } 83 | } 84 | layers { 85 | bottom: "pool2" 86 | top: "conv3" 87 | name: "conv3" 88 | type: CONVOLUTION 89 | convolution_param { 90 | num_output: 512 91 | pad: 1 92 | kernel_size: 3 93 | } 94 | } 95 | layers { 96 | bottom: "conv3" 97 | top: "conv3" 98 | name: "relu3" 99 | type: RELU 100 | } 101 | layers { 102 | bottom: "conv3" 103 | top: "conv4" 104 | name: "conv4" 105 | type: CONVOLUTION 106 | convolution_param { 107 | num_output: 512 108 | pad: 1 109 | kernel_size: 3 110 | } 111 | } 112 | layers { 113 | bottom: "conv4" 114 | top: "conv4" 115 | name: "relu4" 116 | type: RELU 117 | } 118 | layers { 119 | bottom: "conv4" 120 | top: "conv5" 121 | name: "conv5" 122 | type: CONVOLUTION 123 | convolution_param { 124 | num_output: 512 125 | pad: 1 126 | kernel_size: 3 127 | } 128 | } 129 | layers { 130 | bottom: "conv5" 131 | top: "conv5" 132 | name: "relu5" 133 | type: RELU 134 | } 135 | layers { 136 | bottom: "conv5" 137 | top: "pool5" 138 | name: "pool5" 139 | type: POOLING 140 | pooling_param { 141 | pool: MAX 142 | kernel_size: 3 143 | stride: 3 144 | } 145 | } 146 | layers { 147 | bottom: "pool5" 148 | top: "fc6" 149 | name: "fc6" 150 | type: INNER_PRODUCT 151 | inner_product_param { 152 | num_output: 4096 153 | } 154 | } 155 | layers { 156 | bottom: "fc6" 157 | top: "fc6" 158 | name: "relu6" 159 | type: RELU 160 | } 161 | layers { 162 | bottom: "fc6" 163 | top: "fc6" 164 | name: "drop6" 165 | type: DROPOUT 166 | dropout_param { 167 | dropout_ratio: 0.5 168 | } 169 | } 170 | layers { 171 | bottom: "fc6" 172 | top: "fc7" 173 | name: "fc7" 174 | type: INNER_PRODUCT 175 | inner_product_param { 176 | num_output: 4096 177 | } 178 | } 179 | layers { 180 | bottom: "fc7" 181 | top: "fc7" 182 | name: "relu7" 183 | type: RELU 184 | } 185 | layers { 186 | bottom: "fc7" 187 | top: "fc7" 188 | name: "drop7" 189 | type: DROPOUT 190 | dropout_param { 191 | dropout_ratio: 0.5 192 | } 193 | } 194 | layers { 195 | bottom: "fc7" 196 | top: "68point" 197 | name: "68point" 198 | type: INNER_PRODUCT 199 | inner_product_param { 200 | num_output: 136 201 | } 202 | } 203 | 204 | 205 | layers { 206 | bottom: "fc7" 207 | top: "poselayer" 208 | name: "poselayer" 209 | type: INNER_PRODUCT 210 | inner_product_param { 211 | num_output: 3 212 | } 213 | } 214 | 215 | 216 | 217 | -------------------------------------------------------------------------------- /model/deploy.prototxt~: -------------------------------------------------------------------------------- 1 | name: "dlib_vgg" 2 | layers { 3 | name: "data" 4 | type: MEMORY_DATA 5 | top: "data" 6 | top: "label" 7 | memory_data_param { 8 | batch_size: 3 #batch size, so how many prediction youu want to do at once. Best is "1", but higher number get better performance 9 | channels: 3 10 | height: 224 11 | width: 224 12 | 13 | } 14 | } 15 | 16 | 17 | layers { 18 | bottom: "data" 19 | top: "conv1" 20 | name: "conv1" 21 | type: CONVOLUTION 22 | convolution_param { 23 | num_output: 96 24 | kernel_size: 7 25 | stride: 2 26 | } 27 | } 28 | layers { 29 | bottom: "conv1" 30 | top: "conv1" 31 | name: "relu1" 32 | type: RELU 33 | } 34 | layers { 35 | bottom: "conv1" 36 | top: "norm1" 37 | name: "norm1" 38 | type: LRN 39 | lrn_param { 40 | local_size: 5 41 | alpha: 0.0005 42 | beta: 0.75 43 | k: 2 44 | } 45 | } 46 | layers { 47 | bottom: "norm1" 48 | top: "pool1" 49 | name: "pool1" 50 | type: POOLING 51 | pooling_param { 52 | pool: MAX 53 | kernel_size: 3 54 | stride: 3 55 | } 56 | } 57 | layers { 58 | bottom: "pool1" 59 | top: "conv2" 60 | name: "conv2" 61 | type: CONVOLUTION 62 | convolution_param { 63 | num_output: 256 64 | kernel_size: 5 65 | } 66 | } 67 | layers { 68 | bottom: "conv2" 69 | top: "conv2" 70 | name: "relu2" 71 | type: RELU 72 | } 73 | layers { 74 | bottom: "conv2" 75 | top: "pool2" 76 | name: "pool2" 77 | type: POOLING 78 | pooling_param { 79 | pool: MAX 80 | kernel_size: 2 81 | stride: 2 82 | } 83 | } 84 | layers { 85 | bottom: "pool2" 86 | top: "conv3" 87 | name: "conv3" 88 | type: CONVOLUTION 89 | convolution_param { 90 | num_output: 512 91 | pad: 1 92 | kernel_size: 3 93 | } 94 | } 95 | layers { 96 | bottom: "conv3" 97 | top: "conv3" 98 | name: "relu3" 99 | type: RELU 100 | } 101 | layers { 102 | bottom: "conv3" 103 | top: "conv4" 104 | name: "conv4" 105 | type: CONVOLUTION 106 | convolution_param { 107 | num_output: 512 108 | pad: 1 109 | kernel_size: 3 110 | } 111 | } 112 | layers { 113 | bottom: "conv4" 114 | top: "conv4" 115 | name: "relu4" 116 | type: RELU 117 | } 118 | layers { 119 | bottom: "conv4" 120 | top: "conv5" 121 | name: "conv5" 122 | type: CONVOLUTION 123 | convolution_param { 124 | num_output: 512 125 | pad: 1 126 | kernel_size: 3 127 | } 128 | } 129 | layers { 130 | bottom: "conv5" 131 | top: "conv5" 132 | name: "relu5" 133 | type: RELU 134 | } 135 | layers { 136 | bottom: "conv5" 137 | top: "pool5" 138 | name: "pool5" 139 | type: POOLING 140 | pooling_param { 141 | pool: MAX 142 | kernel_size: 3 143 | stride: 3 144 | } 145 | } 146 | layers { 147 | bottom: "pool5" 148 | top: "fc6" 149 | name: "fc6" 150 | type: INNER_PRODUCT 151 | inner_product_param { 152 | num_output: 4096 153 | } 154 | } 155 | layers { 156 | bottom: "fc6" 157 | top: "fc6" 158 | name: "relu6" 159 | type: RELU 160 | } 161 | layers { 162 | bottom: "fc6" 163 | top: "fc6" 164 | name: "drop6" 165 | type: DROPOUT 166 | dropout_param { 167 | dropout_ratio: 0.5 168 | } 169 | } 170 | layers { 171 | bottom: "fc6" 172 | top: "fc7" 173 | name: "fc7" 174 | type: INNER_PRODUCT 175 | inner_product_param { 176 | num_output: 4096 177 | } 178 | } 179 | layers { 180 | bottom: "fc7" 181 | top: "fc7" 182 | name: "relu7" 183 | type: RELU 184 | } 185 | layers { 186 | bottom: "fc7" 187 | top: "fc7" 188 | name: "drop7" 189 | type: DROPOUT 190 | dropout_param { 191 | dropout_ratio: 0.5 192 | } 193 | } 194 | layers { 195 | bottom: "fc7" 196 | top: "68point" 197 | name: "68point" 198 | type: INNER_PRODUCT 199 | inner_product_param { 200 | num_output: 136 201 | } 202 | } 203 | 204 | 205 | layers { 206 | bottom: "fc7" 207 | top: "poselayer" 208 | name: "poselayer" 209 | type: INNER_PRODUCT 210 | inner_product_param { 211 | num_output: 3 212 | } 213 | } 214 | 215 | 216 | 217 | -------------------------------------------------------------------------------- /result/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/result/1.png -------------------------------------------------------------------------------- /result/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/result/2.png -------------------------------------------------------------------------------- /result/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/result/3.png -------------------------------------------------------------------------------- /testList.txt: -------------------------------------------------------------------------------- 1 | img/1.jpg 2 | img/2.jpg 3 | img/3.jpg 4 | img/4.jpg 5 | img/5.jpg 6 | img/6.jpg 7 | img/7.jpg 8 | img/2008_007676.jpg 9 | img/2009_004587.jpg 10 | img/2007_007763.jpg 11 | img/2008_001322.jpg 12 | img/2008_001009.jpg 13 | img/2008_002079.jpg 14 | img/2008_002470.jpg 15 | img/2008_002506.jpg 16 | img/2008_004176.jpg 17 | img/2008_007676.jpg 18 | img/2009_004587.jpg 19 | img/image_004_1.jpg 20 | img/image_003_1.jpg 21 | img/image_09.jpg 22 | img/image_018.jpg 23 | img/image_019_1.jpg 24 | img/image_020_1.jpg 25 | -------------------------------------------------------------------------------- /train/deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "with_pose" 2 | 3 | layer { 4 | name: "data" 5 | type: "MemoryData" 6 | top: "data" 7 | top: "label" 8 | memory_data_param { 9 | batch_size: 1 #batch size, so how many prediction youu want to do at once. Best is "1", but higher number get better performance 10 | channels: 3 11 | height: 224 12 | width: 224 13 | 14 | } 15 | } 16 | 17 | 18 | 19 | 20 | layer { 21 | bottom: "data" 22 | top: "conv1" 23 | name: "conv1" 24 | type: "Convolution" 25 | convolution_param { 26 | num_output: 96 27 | kernel_size: 7 28 | stride: 2 29 | } 30 | } 31 | layer { 32 | bottom: "conv1" 33 | top: "conv1" 34 | name: "relu1" 35 | type: "ReLU" 36 | } 37 | layer { 38 | bottom: "conv1" 39 | top: "norm1" 40 | name: "norm1" 41 | type: "LRN" 42 | lrn_param { 43 | local_size: 5 44 | alpha: 0.0005 45 | beta: 0.75 46 | k: 2 47 | } 48 | } 49 | layer { 50 | bottom: "norm1" 51 | top: "pool1" 52 | name: "pool1" 53 | type: "Pooling" 54 | pooling_param { 55 | pool: MAX 56 | kernel_size: 3 57 | stride: 3 58 | } 59 | } 60 | layer { 61 | bottom: "pool1" 62 | top: "conv2" 63 | name: "conv2" 64 | type: "Convolution" 65 | convolution_param { 66 | num_output: 256 67 | kernel_size: 5 68 | } 69 | } 70 | layer { 71 | bottom: "conv2" 72 | top: "conv2" 73 | name: "relu2" 74 | type: "ReLU" 75 | } 76 | layer { 77 | bottom: "conv2" 78 | top: "pool2" 79 | name: "pool2" 80 | type: "Pooling" 81 | pooling_param { 82 | pool: MAX 83 | kernel_size: 2 84 | stride: 2 85 | } 86 | } 87 | layer { 88 | bottom: "pool2" 89 | top: "conv3" 90 | name: "conv3" 91 | type: "Convolution" 92 | convolution_param { 93 | num_output: 512 94 | pad: 1 95 | kernel_size: 3 96 | } 97 | } 98 | layer { 99 | bottom: "conv3" 100 | top: "conv3" 101 | name: "relu3" 102 | type: "ReLU" 103 | } 104 | layer { 105 | bottom: "conv3" 106 | top: "conv4" 107 | name: "conv4" 108 | type: "Convolution" 109 | convolution_param { 110 | num_output: 512 111 | pad: 1 112 | kernel_size: 3 113 | } 114 | } 115 | layer { 116 | bottom: "conv4" 117 | top: "conv4" 118 | name: "relu4" 119 | type: "ReLU" 120 | } 121 | 122 | layer { 123 | bottom: "conv4" 124 | top: "conv5" 125 | name: "conv5" 126 | type: "Convolution" 127 | 128 | convolution_param { 129 | num_output: 512 130 | pad: 1 131 | kernel_size: 3 132 | } 133 | } 134 | layer { 135 | bottom: "conv5" 136 | top: "conv5" 137 | name: "relu5" 138 | type: "ReLU" 139 | } 140 | layer { 141 | bottom: "conv5" 142 | top: "pool5" 143 | name: "pool5" 144 | type: "Pooling" 145 | pooling_param { 146 | pool: MAX 147 | kernel_size: 3 148 | stride: 3 149 | } 150 | } 151 | layer { 152 | bottom: "pool5" 153 | top: "fc6" 154 | name: "fc6" 155 | type: "InnerProduct" 156 | 157 | inner_product_param { 158 | num_output: 4096 159 | } 160 | } 161 | layer { 162 | bottom: "fc6" 163 | top: "fc6" 164 | name: "relu6" 165 | type: "ReLU" 166 | } 167 | layer { 168 | bottom: "fc6" 169 | top: "fc6" 170 | name: "drop6" 171 | type: "Dropout" 172 | dropout_param { 173 | dropout_ratio: 0.5 174 | } 175 | } 176 | layer { 177 | bottom: "fc6" 178 | top: "fc7" 179 | name: "fc7" 180 | type: "InnerProduct" 181 | 182 | inner_product_param { 183 | num_output: 4096 184 | } 185 | } 186 | layer { 187 | bottom: "fc7" 188 | top: "fc7" 189 | name: "relu7" 190 | type: "ReLU" 191 | } 192 | layer { 193 | bottom: "fc7" 194 | top: "fc7" 195 | name: "drop7" 196 | type: "Dropout" 197 | dropout_param { 198 | dropout_ratio: 0.5 199 | } 200 | } 201 | layer { 202 | bottom: "fc7" 203 | top: "68point" 204 | name: "68point" 205 | type: "InnerProduct" 206 | inner_product_param { 207 | num_output: 136 208 | } 209 | } 210 | 211 | 212 | 213 | layer { 214 | bottom: "conv4" 215 | top: "conv5_b" 216 | name: "conv5_b" 217 | 218 | type: "Convolution" 219 | convolution_param { 220 | num_output: 512 221 | pad: 1 222 | kernel_size: 3 223 | } 224 | } 225 | layer { 226 | bottom: "conv5_b" 227 | top: "conv5_b" 228 | name: "relu5_b" 229 | type: "ReLU" 230 | } 231 | layer { 232 | bottom: "conv5_b" 233 | top: "pool5_b" 234 | name: "pool5_b" 235 | type: "Pooling" 236 | pooling_param { 237 | pool: MAX 238 | kernel_size: 3 239 | stride: 3 240 | } 241 | } 242 | layer { 243 | bottom: "pool5_b" 244 | top: "fc6_b" 245 | name: "fc6_b" 246 | type: "InnerProduct" 247 | 248 | inner_product_param { 249 | num_output: 4096 250 | } 251 | } 252 | layer { 253 | bottom: "fc6_b" 254 | top: "fc6_b" 255 | name: "relu6_b" 256 | type: "ReLU" 257 | } 258 | layer { 259 | bottom: "fc6_b" 260 | top: "fc6_b" 261 | name: "drop6_b" 262 | type: "Dropout" 263 | dropout_param { 264 | dropout_ratio: 0.5 265 | } 266 | } 267 | layer { 268 | bottom: "fc6_b" 269 | top: "fc7_b" 270 | name: "fc7_b" 271 | type: "InnerProduct" 272 | 273 | inner_product_param { 274 | num_output: 4096 275 | } 276 | } 277 | layer { 278 | bottom: "fc7_b" 279 | top: "fc7_b" 280 | name: "relu7_b" 281 | type: "ReLU" 282 | } 283 | layer { 284 | bottom: "fc7_b" 285 | top: "fc7_b" 286 | name: "drop7_b" 287 | type: "Dropout" 288 | dropout_param { 289 | dropout_ratio: 0.5 290 | } 291 | } 292 | layer { 293 | bottom: "fc7_b" 294 | top: "poselayer" 295 | name: "poselayer" 296 | type: "InnerProduct" 297 | inner_product_param { 298 | num_output: 3 299 | } 300 | } 301 | -------------------------------------------------------------------------------- /train/solver.prototxt: -------------------------------------------------------------------------------- 1 | # The training protocol buffer definition 2 | net: "train_val.prototxt" 3 | # The testing protocol buffer definition 4 | # test_iter specifies how many forward passes the test should carry out. 5 | # In the case of facialpoint, we have test batch size 80 and 43 test iterations, 6 | # covering the full 10,000 testing images. 7 | test_iter: 34 8 | # Carry out testing every 500 training iterations. 9 | test_interval: 1000 10 | # The base learning rate, momentum and the weight decay of the network. 11 | base_lr: 0.0001 12 | weight_decay : 0.0005 13 | solver_type : NESTEROV 14 | momentum: 0.9 15 | # The learning rate policy 16 | lr_policy: "fixed" 17 | gamma: 0.0001 18 | power: 0.75 19 | stepsize: 50000 20 | # Display every 100 iterations 21 | display: 200 22 | # The maximum number of iterations 23 | max_iter: 1700000 24 | # snapshot intermediate results 25 | snapshot: 10000 26 | snapshot_prefix: "../model/with_pose/" 27 | # solver mode: CPU or GPU 28 | solver_mode: GPU 29 | 30 | -------------------------------------------------------------------------------- /train/train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "sub2_vgg" 2 | layer { 3 | name: "MyData" 4 | type: "HDF5Data" 5 | top: "data" 6 | top: "label" 7 | top: "pose" 8 | hdf5_data_param { 9 | source: "/home/hkk/DATACENTER/hdf5/box_train_bgr_data_list.txt" 10 | batch_size: 70 11 | shuffle: true 12 | } 13 | include: { phase: TRAIN } 14 | } 15 | layer { 16 | name: "MyData" 17 | type: "HDF5Data" 18 | top: "data" 19 | top: "label" 20 | top: "pose" 21 | hdf5_data_param { 22 | source: "/home/hkk/DATACENTER/hdf5/box_train_bgr_data_list.txt" 23 | batch_size: 20 24 | } 25 | include: { phase: TEST } 26 | } 27 | 28 | layer { 29 | bottom: "data" 30 | top: "conv1" 31 | name: "conv1" 32 | type: "Convolution" 33 | convolution_param { 34 | num_output: 96 35 | kernel_size: 7 36 | stride: 2 37 | } 38 | } 39 | layer { 40 | bottom: "conv1" 41 | top: "conv1" 42 | name: "relu1" 43 | type: "ReLU" 44 | } 45 | layer { 46 | bottom: "conv1" 47 | top: "norm1" 48 | name: "norm1" 49 | type: "LRN" 50 | lrn_param { 51 | local_size: 5 52 | alpha: 0.0005 53 | beta: 0.75 54 | k: 2 55 | } 56 | } 57 | layer { 58 | bottom: "norm1" 59 | top: "pool1" 60 | name: "pool1" 61 | type: "Pooling" 62 | pooling_param { 63 | pool: MAX 64 | kernel_size: 3 65 | stride: 3 66 | } 67 | } 68 | layer { 69 | bottom: "pool1" 70 | top: "conv2" 71 | name: "conv2" 72 | type: "Convolution" 73 | convolution_param { 74 | num_output: 256 75 | kernel_size: 5 76 | } 77 | } 78 | layer { 79 | bottom: "conv2" 80 | top: "conv2" 81 | name: "relu2" 82 | type: "ReLU" 83 | } 84 | layer { 85 | bottom: "conv2" 86 | top: "pool2" 87 | name: "pool2" 88 | type: "Pooling" 89 | pooling_param { 90 | pool: MAX 91 | kernel_size: 2 92 | stride: 2 93 | } 94 | } 95 | layer { 96 | bottom: "pool2" 97 | top: "conv3" 98 | name: "conv3" 99 | type: "Convolution" 100 | convolution_param { 101 | num_output: 512 102 | pad: 1 103 | kernel_size: 3 104 | } 105 | } 106 | layer { 107 | bottom: "conv3" 108 | top: "conv3" 109 | name: "relu3" 110 | type: "ReLU" 111 | } 112 | layer { 113 | bottom: "conv3" 114 | top: "conv4" 115 | name: "conv4" 116 | type: "Convolution" 117 | convolution_param { 118 | num_output: 512 119 | pad: 1 120 | kernel_size: 3 121 | } 122 | } 123 | layer { 124 | bottom: "conv4" 125 | top: "conv4" 126 | name: "relu4" 127 | type: "ReLU" 128 | } 129 | 130 | layer { 131 | bottom: "conv4" 132 | top: "conv5" 133 | name: "conv5" 134 | type: "Convolution" 135 | 136 | convolution_param { 137 | num_output: 512 138 | pad: 1 139 | kernel_size: 3 140 | } 141 | } 142 | layer { 143 | bottom: "conv5" 144 | top: "conv5" 145 | name: "relu5" 146 | type: "ReLU" 147 | } 148 | layer { 149 | bottom: "conv5" 150 | top: "pool5" 151 | name: "pool5" 152 | type: "Pooling" 153 | pooling_param { 154 | pool: MAX 155 | kernel_size: 3 156 | stride: 3 157 | } 158 | } 159 | layer { 160 | bottom: "pool5" 161 | top: "fc6" 162 | name: "fc6" 163 | type: "InnerProduct" 164 | 165 | inner_product_param { 166 | num_output: 4096 167 | } 168 | } 169 | layer { 170 | bottom: "fc6" 171 | top: "fc6" 172 | name: "relu6" 173 | type: "ReLU" 174 | } 175 | layer { 176 | bottom: "fc6" 177 | top: "fc6" 178 | name: "drop6" 179 | type: "Dropout" 180 | dropout_param { 181 | dropout_ratio: 0.5 182 | } 183 | } 184 | layer { 185 | bottom: "fc6" 186 | top: "fc7" 187 | name: "fc7" 188 | type: "InnerProduct" 189 | 190 | inner_product_param { 191 | num_output: 4096 192 | } 193 | } 194 | layer { 195 | bottom: "fc7" 196 | top: "fc7" 197 | name: "relu7" 198 | type: "ReLU" 199 | } 200 | layer { 201 | bottom: "fc7" 202 | top: "fc7" 203 | name: "drop7" 204 | type: "Dropout" 205 | dropout_param { 206 | dropout_ratio: 0.5 207 | } 208 | } 209 | layer { 210 | bottom: "fc7" 211 | top: "68point" 212 | name: "68point" 213 | type: "InnerProduct" 214 | inner_product_param { 215 | num_output: 136 216 | } 217 | } 218 | layer { 219 | name: "loss" 220 | type: "EuclideanLoss" 221 | bottom: "68point" 222 | bottom: "label" 223 | top: "loss" 224 | loss_weight: 1 225 | } 226 | 227 | 228 | layer { 229 | bottom: "conv4" 230 | top: "conv5_b" 231 | name: "conv5_b" 232 | 233 | type: "Convolution" 234 | convolution_param { 235 | num_output: 512 236 | pad: 1 237 | kernel_size: 3 238 | } 239 | } 240 | layer { 241 | bottom: "conv5_b" 242 | top: "conv5_b" 243 | name: "relu5_b" 244 | type: "ReLU" 245 | } 246 | layer { 247 | bottom: "conv5_b" 248 | top: "pool5_b" 249 | name: "pool5_b" 250 | type: "Pooling" 251 | pooling_param { 252 | pool: MAX 253 | kernel_size: 3 254 | stride: 3 255 | } 256 | } 257 | layer { 258 | bottom: "pool5_b" 259 | top: "fc6_b" 260 | name: "fc6_b" 261 | type: "InnerProduct" 262 | 263 | inner_product_param { 264 | num_output: 4096 265 | } 266 | } 267 | layer { 268 | bottom: "fc6_b" 269 | top: "fc6_b" 270 | name: "relu6_b" 271 | type: "ReLU" 272 | } 273 | layer { 274 | bottom: "fc6_b" 275 | top: "fc6_b" 276 | name: "drop6_b" 277 | type: "Dropout" 278 | dropout_param { 279 | dropout_ratio: 0.5 280 | } 281 | } 282 | layer { 283 | bottom: "fc6_b" 284 | top: "fc7_b" 285 | name: "fc7_b" 286 | type: "InnerProduct" 287 | 288 | inner_product_param { 289 | num_output: 4096 290 | } 291 | } 292 | layer { 293 | bottom: "fc7_b" 294 | top: "fc7_b" 295 | name: "relu7_b" 296 | type: "ReLU" 297 | } 298 | layer { 299 | bottom: "fc7_b" 300 | top: "fc7_b" 301 | name: "drop7_b" 302 | type: "Dropout" 303 | dropout_param { 304 | dropout_ratio: 0.5 305 | } 306 | } 307 | layer { 308 | bottom: "fc7_b" 309 | top: "poselayer" 310 | name: "poselayer" 311 | type: "InnerProduct" 312 | inner_product_param { 313 | num_output: 3 314 | } 315 | } 316 | layer { 317 | name: "poseLoss" 318 | type: "EuclideanLoss" 319 | bottom: "poselayer" 320 | bottom: "pose" 321 | top: "poseLoss" 322 | loss_weight: 3 323 | } 324 | --------------------------------------------------------------------------------