├── README.md
├── README.md~
├── img
    ├── 1.jpg
    ├── 2.jpg
    ├── 2007_007763.jpg
    ├── 2008_001009.jpg
    ├── 2008_001322.jpg
    ├── 2008_002079.jpg
    ├── 2008_002470.jpg
    ├── 2008_002506.jpg
    ├── 2008_004176.jpg
    ├── 2008_007676.jpg
    ├── 2009_004587.jpg
    ├── 3.jpg
    ├── 4.jpg
    ├── 5.jpg
    ├── 6.jpg
    ├── 7.jpg
    ├── image_003_1.jpg
    ├── image_004_1.jpg
    ├── image_019_1.jpg
    ├── image_020_1.jpg
    └── image_09.jpg
├── landmarkPredict.py
├── landmarkPredict_webcam.py
├── model
    ├── VGG_mean.binaryproto
    ├── deploy.prototxt
    └── deploy.prototxt~
├── result
    ├── 1.png
    ├── 2.png
    └── 3.png
├── testList.txt
└── train
    ├── deploy.prototxt
    ├── solver.prototxt
    └── train_val.prototxt


/README.md:
--------------------------------------------------------------------------------
 1 | # face-landmark-localization
 2 | This is a project predict face landmarks (68 points) and head pose (3d pose, yaw,roll,pitch).
 3 | 
 4 | 
 5 | ## Install
 6 | - [caffe](https://github.com/BVLC/caffe)
 7 | - [dlib face detector](http://dlib.net/)<p>
 8 | you can down [dlib18.17](http://pan.baidu.com/s/1gey9Wd1) <p>
 9 | cd your dlib folder<p>
10 | cd python_example<p>
11 | ./compile_dlib_python_module.bat<p>
12 |  add dlib.so to the python path<p>
13 | if using dlib18.18, you can follow the [official instruction](http://dlib.net/)
14 | - opencv<p>
15 | 
16 | ## Usage
17 | 
18 | - Command : python landmarkPredict.py predictImage  testList.txt<p>
19 | (testList.txt is a file contain the path of the images.)
20 | 
21 | 
22 | ## Model
23 | 
24 | - You can download the pre-trained model from [dropbox](https://www.dropbox.com/s/z89prwhg0jpgp47/68point_dlib_with_pose.caffemodel?dl=0) or [baidu yun](http://pan.baidu.com/s/1c14aFyK)
25 | 
26 | ## Result
27 | ![](result/1.png)
28 | ![](result/2.png)
29 | ![](result/3.png)
30 | 


--------------------------------------------------------------------------------
/README.md~:
--------------------------------------------------------------------------------
 1 | # face-landmark-localization
 2 | This is a project predict face landmarks (68 points) and head pose (3d pose, yaw,roll,pitch).
 3 | 
 4 | 
 5 | ## Install
 6 | - [caffe](https://github.com/BVLC/caffe)
 7 | - [dlib face detector](http://dlib.net/)<p>
 8 | 
 9 | cd your dlib folder<p>
10 | cd python_example<p>
11 | ./compile_dlib_python_module.bat<p>
12 |  add dlib.so to the python path<p>
13 | - opencv<p>
14 | ## Usage
15 | 
16 | - Command : python landmarkPredict.py predictImage  testList.txt<p>
17 | (testList.txt is a file contain the path of the images.)
18 | 
19 | 
20 | ## Model
21 | 
22 | - You can download the pre-trained model from [here](http://pan.baidu.com/s/1mhf274c) 
23 | 
24 | ## Result
25 | ![](result/1.png)
26 | ![](result/2.png)
27 | ![](result/3.png)
28 | 


--------------------------------------------------------------------------------
/img/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/1.jpg


--------------------------------------------------------------------------------
/img/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2.jpg


--------------------------------------------------------------------------------
/img/2007_007763.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2007_007763.jpg


--------------------------------------------------------------------------------
/img/2008_001009.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_001009.jpg


--------------------------------------------------------------------------------
/img/2008_001322.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_001322.jpg


--------------------------------------------------------------------------------
/img/2008_002079.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_002079.jpg


--------------------------------------------------------------------------------
/img/2008_002470.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_002470.jpg


--------------------------------------------------------------------------------
/img/2008_002506.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_002506.jpg


--------------------------------------------------------------------------------
/img/2008_004176.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_004176.jpg


--------------------------------------------------------------------------------
/img/2008_007676.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2008_007676.jpg


--------------------------------------------------------------------------------
/img/2009_004587.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/2009_004587.jpg


--------------------------------------------------------------------------------
/img/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/3.jpg


--------------------------------------------------------------------------------
/img/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/4.jpg


--------------------------------------------------------------------------------
/img/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/5.jpg


--------------------------------------------------------------------------------
/img/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/6.jpg


--------------------------------------------------------------------------------
/img/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/7.jpg


--------------------------------------------------------------------------------
/img/image_003_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/image_003_1.jpg


--------------------------------------------------------------------------------
/img/image_004_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/image_004_1.jpg


--------------------------------------------------------------------------------
/img/image_019_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/image_019_1.jpg


--------------------------------------------------------------------------------
/img/image_020_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/image_020_1.jpg


--------------------------------------------------------------------------------
/img/image_09.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/img/image_09.jpg


--------------------------------------------------------------------------------
/landmarkPredict.py:
--------------------------------------------------------------------------------
  1 | #usage :python landmarkPredict.py predictImage  testList.txt
  2 | 
  3 | import os
  4 | import sys
  5 | import numpy as np
  6 | import cv2
  7 | import caffe
  8 | import dlib
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | system_height = 650
 12 | system_width = 1280
 13 | channels = 1
 14 | test_num = 1
 15 | pointNum = 68
 16 | 
 17 | S0_width = 60
 18 | S0_height = 60
 19 | vgg_height = 224
 20 | vgg_width = 224
 21 | M_left = -0.15
 22 | M_right = +1.15
 23 | M_top = -0.10
 24 | M_bottom = +1.25
 25 | pose_name = ['Pitch', 'Yaw', 'Roll']     # respect to  ['head down','out of plane left','in plane right']
 26 | 
 27 | def recover_coordinate(largetBBox, facepoint, width, height):
 28 |     point = np.zeros(np.shape(facepoint))
 29 |     cut_width = largetBBox[1] - largetBBox[0]
 30 |     cut_height = largetBBox[3] - largetBBox[2]
 31 |     scale_x = cut_width*1.0/width;
 32 |     scale_y = cut_height*1.0/height;
 33 |     point[0::2]=[float(j * scale_x + largetBBox[0]) for j in facepoint[0::2]]
 34 |     point[1::2]=[float(j * scale_y + largetBBox[2]) for j in facepoint[1::2]]
 35 |     return point
 36 | 
 37 | def show_image(img, facepoint, bboxs, headpose):
 38 |     plt.figure(figsize=(20,10))
 39 |     for faceNum in range(0,facepoint.shape[0]):
 40 |         cv2.rectangle(img, (int(bboxs[faceNum,0]), int(bboxs[faceNum,2])), (int(bboxs[faceNum,1]), int(bboxs[faceNum,3])), (0,0,255), 2)
 41 |         for p in range(0,3):
 42 |             plt.text(int(bboxs[faceNum,0]), int(bboxs[faceNum,2])-p*30,
 43 |                 '{:s} {:.2f}'.format(pose_name[p], headpose[faceNum,p]),
 44 |                 bbox=dict(facecolor='blue', alpha=0.5),
 45 |                 fontsize=12, color='white')
 46 |         for i in range(0,facepoint.shape[1]/2):
 47 |             cv2.circle(img,(int(round(facepoint[faceNum,i*2])),int(round(facepoint[faceNum,i*2+1]))),1,(0,255,0),2)
 48 |     height = img.shape[0]
 49 |     width = img.shape[1]
 50 |     if height > system_height or width > system_width:
 51 |         height_radius = system_height*1.0/height
 52 |         width_radius = system_width*1.0/width
 53 |         radius = min(height_radius,width_radius)
 54 |         img = cv2.resize(img, (0,0), fx=radius, fy=radius)
 55 | 
 56 |     img = img[:,:,[2,1,0]]
 57 |     plt.imshow(img)
 58 |     plt.show()
 59 | 
 60 | 
 61 | def recoverPart(point,bbox,left,right,top,bottom,img_height,img_width,height,width):
 62 |     largeBBox = getCutSize(bbox,left,right,top,bottom)
 63 |     retiBBox = retifyBBoxSize(img_height,img_width,largeBBox)
 64 |     recover = recover_coordinate(retiBBox,point,height,width)
 65 |     recover=recover.astype('float32')
 66 |     return recover
 67 | 
 68 | 
 69 | def getRGBTestPart(bbox,left,right,top,bottom,img,height,width):
 70 |     largeBBox = getCutSize(bbox,left,right,top,bottom)
 71 |     retiBBox = retifyBBox(img,largeBBox)
 72 |     # cv2.rectangle(img, (int(retiBBox[0]), int(retiBBox[2])), (int(retiBBox[1]), int(retiBBox[3])), (0,0,255), 2)
 73 |     # cv2.imshow('f',img)
 74 |     # cv2.waitKey(0)
 75 |     face = img[int(retiBBox[2]):int(retiBBox[3]), int(retiBBox[0]):int(retiBBox[1]), :]
 76 |     face = cv2.resize(face,(height,width),interpolation = cv2.INTER_AREA)
 77 |     face=face.astype('float32')
 78 |     return face
 79 | 
 80 | def batchRecoverPart(predictPoint,totalBBox,totalSize,left,right,top,bottom,height,width):
 81 |     recoverPoint = np.zeros(predictPoint.shape)
 82 |     for i in range(0,predictPoint.shape[0]):
 83 |         recoverPoint[i] = recoverPart(predictPoint[i],totalBBox[i],left,right,top,bottom,totalSize[i,0],totalSize[i,1],height,width)
 84 |     return recoverPoint
 85 | 
 86 | 
 87 | 
 88 | def retifyBBox(img,bbox):
 89 |     img_height = np.shape(img)[0] - 1
 90 |     img_width = np.shape(img)[1] - 1
 91 |     if bbox[0] <0:
 92 |         bbox[0] = 0
 93 |     if bbox[1] <0:
 94 |         bbox[1] = 0
 95 |     if bbox[2] <0:
 96 |         bbox[2] = 0
 97 |     if bbox[3] <0:
 98 |         bbox[3] = 0
 99 |     if bbox[0] > img_width:
100 |         bbox[0] = img_width
101 |     if bbox[1] > img_width:
102 |         bbox[1] = img_width
103 |     if bbox[2]  > img_height:
104 |         bbox[2] = img_height
105 |     if bbox[3]  > img_height:
106 |         bbox[3] = img_height
107 |     return bbox
108 | 
109 | def retifyBBoxSize(img_height,img_width,bbox):
110 |     if bbox[0] <0:
111 |         bbox[0] = 0
112 |     if bbox[1] <0:
113 |         bbox[1] = 0
114 |     if bbox[2] <0:
115 |         bbox[2] = 0
116 |     if bbox[3] <0:
117 |         bbox[3] = 0
118 |     if bbox[0] > img_width:
119 |         bbox[0] = img_width
120 |     if bbox[1] > img_width:
121 |         bbox[1] = img_width
122 |     if bbox[2]  > img_height:
123 |         bbox[2] = img_height
124 |     if bbox[3]  > img_height:
125 |         bbox[3] = img_height
126 |     return bbox
127 | 
128 | def getCutSize(bbox,left,right,top,bottom):   #left, right, top, and bottom
129 | 
130 |     box_width = bbox[1] - bbox[0]
131 |     box_height = bbox[3] - bbox[2]
132 |     cut_size=np.zeros((4))
133 |     cut_size[0] = bbox[0] + left * box_width
134 |     cut_size[1] = bbox[1] + (right - 1) * box_width
135 |     cut_size[2] = bbox[2] + top * box_height
136 |     cut_size[3] = bbox[3] + (bottom-1) * box_height
137 |     return cut_size
138 | 
139 | 
140 | def detectFace(img):
141 |     detector = dlib.get_frontal_face_detector()
142 |     dets = detector(img,1)
143 |     bboxs = np.zeros((len(dets),4))
144 |     for i, d in enumerate(dets):
145 |         bboxs[i,0] = d.left();
146 |         bboxs[i,1] = d.right();
147 |         bboxs[i,2] = d.top();
148 |         bboxs[i,3] = d.bottom();
149 |     return bboxs;
150 | 
151 | 
152 | def predictImage(filename):
153 |     vgg_point_MODEL_FILE = 'model/deploy.prototxt'
154 |     vgg_point_PRETRAINED = 'model/68point_dlib_with_pose.caffemodel'
155 |     mean_filename='model/VGG_mean.binaryproto'
156 |     vgg_point_net=caffe.Net(vgg_point_MODEL_FILE,vgg_point_PRETRAINED,caffe.TEST)
157 |     # caffe.set_mode_cpu()
158 |     caffe.set_mode_gpu()
159 |     caffe.set_device(0)
160 |     f = open(filename)
161 |     line = f.readline()
162 |     index = 0
163 |     proto_data = open(mean_filename, "rb").read()
164 |     a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data)
165 |     mean = caffe.io.blobproto_to_array(a)[0]
166 | 
167 |     while line:
168 |         print index
169 |         line = line.strip()
170 |         info = line.split(' ')
171 |         imgPath = info[0]
172 |         print imgPath
173 |         num = 1
174 |         colorImage = cv2.imread(imgPath)
175 |         bboxs = detectFace(colorImage)
176 |         faceNum = bboxs.shape[0]
177 |         faces = np.zeros((1,3,vgg_height,vgg_width))
178 |         predictpoints = np.zeros((faceNum,pointNum*2))
179 |         predictpose = np.zeros((faceNum,3))
180 |         imgsize = np.zeros((2))
181 |         imgsize[0] = colorImage.shape[0]-1
182 |         imgsize[1] = colorImage.shape[1]-1
183 |         TotalSize = np.zeros((faceNum,2))
184 |         for i in range(0,faceNum):
185 |             TotalSize[i] = imgsize
186 |         for i in range(0,faceNum):
187 |             bbox = bboxs[i]
188 |             colorface = getRGBTestPart(bbox,M_left,M_right,M_top,M_bottom,colorImage,vgg_height,vgg_width)
189 |             normalface = np.zeros(mean.shape)
190 |             normalface[0] = colorface[:,:,0]
191 |             normalface[1] = colorface[:,:,1]
192 |             normalface[2] = colorface[:,:,2]
193 |             normalface = normalface - mean
194 |             faces[0] = normalface
195 | 
196 |             blobName = '68point'
197 |             data4DL = np.zeros([faces.shape[0],1,1,1])
198 |             vgg_point_net.set_input_arrays(faces.astype(np.float32),data4DL.astype(np.float32))
199 |             vgg_point_net.forward()
200 |             predictpoints[i] = vgg_point_net.blobs[blobName].data[0]
201 | 
202 |             blobName = 'poselayer'
203 |             pose_prediction = vgg_point_net.blobs[blobName].data
204 |             predictpose[i] = pose_prediction * 50
205 | 
206 |         predictpoints = predictpoints * vgg_height/2 + vgg_width/2
207 |         level1Point = batchRecoverPart(predictpoints,bboxs,TotalSize,M_left,M_right,M_top,M_bottom,vgg_height,vgg_width)
208 | 
209 |         show_image(colorImage, level1Point, bboxs, predictpose)
210 |         line = f.readline()
211 |         index = index + 1
212 | 
213 | 
214 | if __name__ == '__main__':
215 |     if len(sys.argv) < 2:
216 |         print(__doc__)
217 |     else:
218 |         func = globals()[sys.argv[1]]
219 |         func(*sys.argv[2:])


--------------------------------------------------------------------------------
/landmarkPredict_webcam.py:
--------------------------------------------------------------------------------
  1 | #usage :python landmarkPredict.py predictImage  testList.txt
  2 | 
  3 | import os
  4 | import sys
  5 | import numpy as np
  6 | import cv2
  7 | import caffe
  8 | import dlib
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | system_height = 650
 12 | system_width = 1280
 13 | channels = 1
 14 | test_num = 1
 15 | pointNum = 68
 16 | 
 17 | S0_width = 60
 18 | S0_height = 60
 19 | vgg_height = 224
 20 | vgg_width = 224
 21 | M_left = -0.15
 22 | M_right = +1.15
 23 | M_top = -0.10
 24 | M_bottom = +1.25
 25 | pose_name = ['Pitch', 'Yaw', 'Roll']     # respect to  ['head down','out of plane left','in plane right']
 26 | 
 27 | def recover_coordinate(largetBBox, facepoint, width, height):
 28 |     point = np.zeros(np.shape(facepoint))
 29 |     cut_width = largetBBox[1] - largetBBox[0]
 30 |     cut_height = largetBBox[3] - largetBBox[2]
 31 |     scale_x = cut_width*1.0/width;
 32 |     scale_y = cut_height*1.0/height;
 33 |     point[0::2]=[float(j * scale_x + largetBBox[0]) for j in facepoint[0::2]]
 34 |     point[1::2]=[float(j * scale_y + largetBBox[2]) for j in facepoint[1::2]]
 35 |     return point
 36 | 
 37 | def show_image(img, facepoint, bboxs, headpose):
 38 |     plt.figure(figsize=(20,10))
 39 |     for faceNum in range(0,facepoint.shape[0]):
 40 |         cv2.rectangle(img, (int(bboxs[faceNum,0]), int(bboxs[faceNum,2])), (int(bboxs[faceNum,1]), int(bboxs[faceNum,3])), (0,0,255), 2)
 41 |         for p in range(0,3):
 42 |             plt.text(int(bboxs[faceNum,0]), int(bboxs[faceNum,2])-p*30,
 43 |                 '{:s} {:.2f}'.format(pose_name[p], headpose[faceNum,p]),
 44 |                 bbox=dict(facecolor='blue', alpha=0.5),
 45 |                 fontsize=12, color='white')
 46 |         for i in range(0,facepoint.shape[1]/2):
 47 |             cv2.circle(img,(int(round(facepoint[faceNum,i*2])),int(round(facepoint[faceNum,i*2+1]))),1,(0,255,0),2)
 48 |     height = img.shape[0]
 49 |     width = img.shape[1]
 50 |     if height > system_height or width > system_width:
 51 |         height_radius = system_height*1.0/height
 52 |         width_radius = system_width*1.0/width
 53 |         radius = min(height_radius,width_radius)
 54 |         img = cv2.resize(img, (0,0), fx=radius, fy=radius)
 55 | 
 56 |     img = img[:,:,[2,1,0]]
 57 |     cv2.imshow('frame',img)
 58 | 
 59 | 
 60 | 
 61 | def recoverPart(point,bbox,left,right,top,bottom,img_height,img_width,height,width):
 62 |     largeBBox = getCutSize(bbox,left,right,top,bottom)
 63 |     retiBBox = retifyBBoxSize(img_height,img_width,largeBBox)
 64 |     recover = recover_coordinate(retiBBox,point,height,width)
 65 |     recover=recover.astype('float32')
 66 |     return recover
 67 | 
 68 | 
 69 | def getRGBTestPart(bbox,left,right,top,bottom,img,height,width):
 70 |     largeBBox = getCutSize(bbox,left,right,top,bottom)
 71 |     retiBBox = retifyBBox(img,largeBBox)
 72 |     # cv2.rectangle(img, (int(retiBBox[0]), int(retiBBox[2])), (int(retiBBox[1]), int(retiBBox[3])), (0,0,255), 2)
 73 |     # cv2.imshow('f',img)
 74 |     # cv2.waitKey(0)
 75 |     face = img[int(retiBBox[2]):int(retiBBox[3]), int(retiBBox[0]):int(retiBBox[1]), :]
 76 |     face = cv2.resize(face,(height,width),interpolation = cv2.INTER_AREA)
 77 |     face=face.astype('float32')
 78 |     return face
 79 | 
 80 | def batchRecoverPart(predictPoint,totalBBox,totalSize,left,right,top,bottom,height,width):
 81 |     recoverPoint = np.zeros(predictPoint.shape)
 82 |     for i in range(0,predictPoint.shape[0]):
 83 |         recoverPoint[i] = recoverPart(predictPoint[i],totalBBox[i],left,right,top,bottom,totalSize[i,0],totalSize[i,1],height,width)
 84 |     return recoverPoint
 85 | 
 86 | 
 87 | 
 88 | def retifyBBox(img,bbox):
 89 |     img_height = np.shape(img)[0] - 1
 90 |     img_width = np.shape(img)[1] - 1
 91 |     if bbox[0] <0:
 92 |         bbox[0] = 0
 93 |     if bbox[1] <0:
 94 |         bbox[1] = 0
 95 |     if bbox[2] <0:
 96 |         bbox[2] = 0
 97 |     if bbox[3] <0:
 98 |         bbox[3] = 0
 99 |     if bbox[0] > img_width:
100 |         bbox[0] = img_width
101 |     if bbox[1] > img_width:
102 |         bbox[1] = img_width
103 |     if bbox[2]  > img_height:
104 |         bbox[2] = img_height
105 |     if bbox[3]  > img_height:
106 |         bbox[3] = img_height
107 |     return bbox
108 | 
109 | def retifyBBoxSize(img_height,img_width,bbox):
110 |     if bbox[0] <0:
111 |         bbox[0] = 0
112 |     if bbox[1] <0:
113 |         bbox[1] = 0
114 |     if bbox[2] <0:
115 |         bbox[2] = 0
116 |     if bbox[3] <0:
117 |         bbox[3] = 0
118 |     if bbox[0] > img_width:
119 |         bbox[0] = img_width
120 |     if bbox[1] > img_width:
121 |         bbox[1] = img_width
122 |     if bbox[2]  > img_height:
123 |         bbox[2] = img_height
124 |     if bbox[3]  > img_height:
125 |         bbox[3] = img_height
126 |     return bbox
127 | 
128 | def getCutSize(bbox,left,right,top,bottom):   #left, right, top, and bottom
129 | 
130 |     box_width = bbox[1] - bbox[0]
131 |     box_height = bbox[3] - bbox[2]
132 |     cut_size=np.zeros((4))
133 |     cut_size[0] = bbox[0] + left * box_width
134 |     cut_size[1] = bbox[1] + (right - 1) * box_width
135 |     cut_size[2] = bbox[2] + top * box_height
136 |     cut_size[3] = bbox[3] + (bottom-1) * box_height
137 |     return cut_size
138 | 
139 | 
140 | def detectFace(img):
141 |     detector = dlib.get_frontal_face_detector()
142 |     dets = detector(img,1)
143 |     bboxs = np.zeros((len(dets),4))
144 |     for i, d in enumerate(dets):
145 |         bboxs[i,0] = d.left();
146 |         bboxs[i,1] = d.right();
147 |         bboxs[i,2] = d.top();
148 |         bboxs[i,3] = d.bottom();
149 |     return bboxs;
150 | 
151 | 
152 | def predict_image_webcam():
153 |     vgg_point_MODEL_FILE = 'model/deploy.prototxt'
154 |     vgg_point_PRETRAINED = 'model/68point_dlib_with_pose.caffemodel'
155 |     mean_filename='model/VGG_mean.binaryproto'
156 |     vgg_point_net=caffe.Net(vgg_point_MODEL_FILE,vgg_point_PRETRAINED,caffe.TEST)
157 |     # caffe.set_mode_cpu()
158 |     caffe.set_mode_gpu()
159 |     caffe.set_device(0)
160 |     proto_data = open(mean_filename, "rb").read()
161 |     a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data)
162 |     mean = caffe.io.blobproto_to_array(a)[0]
163 |     cap = cv2.VideoCapture(0)
164 | 
165 |     while True:
166 |         ret, colorImage = cap.read()
167 |         bboxs = detectFace(colorImage)
168 |         faceNum = bboxs.shape[0]
169 |         faces = np.zeros((1,3,vgg_height,vgg_width))
170 |         predictpoints = np.zeros((faceNum,pointNum*2))
171 |         predictpose = np.zeros((faceNum,3))
172 |         imgsize = np.zeros((2))
173 |         imgsize[0] = colorImage.shape[0]-1
174 |         imgsize[1] = colorImage.shape[1]-1
175 |         TotalSize = np.zeros((faceNum,2))
176 |         for i in range(0,faceNum):
177 |             TotalSize[i] = imgsize
178 |         for i in range(0,faceNum):
179 |             bbox = bboxs[i]
180 |             colorface = getRGBTestPart(bbox,M_left,M_right,M_top,M_bottom,colorImage,vgg_height,vgg_width)
181 |             normalface = np.zeros(mean.shape)
182 |             normalface[0] = colorface[:,:,0]
183 |             normalface[1] = colorface[:,:,1]
184 |             normalface[2] = colorface[:,:,2]
185 |             normalface = normalface - mean
186 |             faces[0] = normalface
187 | 
188 |             blobName = '68point'
189 |             data4DL = np.zeros([faces.shape[0],1,1,1])
190 |             vgg_point_net.set_input_arrays(faces.astype(np.float32),data4DL.astype(np.float32))
191 |             vgg_point_net.forward()
192 |             predictpoints[i] = vgg_point_net.blobs[blobName].data[0]
193 | 
194 |             blobName = 'poselayer'
195 |             pose_prediction = vgg_point_net.blobs[blobName].data
196 |             predictpose[i] = pose_prediction * 50
197 | 
198 |         predictpoints = predictpoints * vgg_height/2 + vgg_width/2
199 |         level1Point = batchRecoverPart(predictpoints,bboxs,TotalSize,M_left,M_right,M_top,M_bottom,vgg_height,vgg_width)
200 | 
201 |         show_image(colorImage, level1Point, bboxs, predictpose)
202 |         if cv2.waitKey(1) & 0xFF == ord('q'):
203 |             break
204 | 
205 | if __name__ == '__main__':
206 |     predict_image_webcam()
207 | 


--------------------------------------------------------------------------------
/model/VGG_mean.binaryproto:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/model/VGG_mean.binaryproto


--------------------------------------------------------------------------------
/model/deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "dlib_vgg"
  2 | layers {
  3 |   name: "data"
  4 |   type: MEMORY_DATA
  5 |   top: "data"
  6 |   top: "label"
  7 |   memory_data_param {
  8 |     batch_size: 1 #batch size, so how many prediction youu want to do at once. Best is "1", but higher number get better performance
  9 |     channels: 3
 10 |     height: 224
 11 |     width: 224 
 12 | 
 13 |   }
 14 | }
 15 | 
 16 | 
 17 | layers {
 18 |   bottom: "data"
 19 |   top: "conv1"
 20 |   name: "conv1"
 21 |   type: CONVOLUTION
 22 |   convolution_param {
 23 |     num_output: 96
 24 |     kernel_size: 7
 25 |     stride: 2
 26 |   }
 27 | }
 28 | layers {
 29 |   bottom: "conv1"
 30 |   top: "conv1"
 31 |   name: "relu1"
 32 |   type: RELU
 33 | }
 34 | layers {
 35 |   bottom: "conv1"
 36 |   top: "norm1"
 37 |   name: "norm1"
 38 |   type: LRN
 39 |   lrn_param {
 40 |     local_size: 5
 41 |     alpha: 0.0005
 42 |     beta: 0.75
 43 |     k: 2
 44 |   }
 45 | }
 46 | layers {
 47 |   bottom: "norm1"
 48 |   top: "pool1"
 49 |   name: "pool1"
 50 |   type: POOLING
 51 |   pooling_param {
 52 |     pool: MAX
 53 |     kernel_size: 3
 54 |     stride: 3
 55 |   }
 56 | }
 57 | layers {
 58 |   bottom: "pool1"
 59 |   top: "conv2"
 60 |   name: "conv2"
 61 |   type: CONVOLUTION
 62 |   convolution_param {
 63 |     num_output: 256
 64 |     kernel_size: 5
 65 |   }
 66 | }
 67 | layers {
 68 |   bottom: "conv2"
 69 |   top: "conv2"
 70 |   name: "relu2"
 71 |   type: RELU
 72 | }
 73 | layers {
 74 |   bottom: "conv2"
 75 |   top: "pool2"
 76 |   name: "pool2"
 77 |   type: POOLING
 78 |   pooling_param {
 79 |     pool: MAX
 80 |     kernel_size: 2
 81 |     stride: 2
 82 |   }
 83 | }
 84 | layers {
 85 |   bottom: "pool2"
 86 |   top: "conv3"
 87 |   name: "conv3"
 88 |   type: CONVOLUTION
 89 |   convolution_param {
 90 |     num_output: 512
 91 |     pad: 1
 92 |     kernel_size: 3
 93 |   }
 94 | }
 95 | layers {
 96 |   bottom: "conv3"
 97 |   top: "conv3"
 98 |   name: "relu3"
 99 |   type: RELU
100 | }
101 | layers {
102 |   bottom: "conv3"
103 |   top: "conv4"
104 |   name: "conv4"
105 |   type: CONVOLUTION
106 |   convolution_param {
107 |     num_output: 512
108 |     pad: 1
109 |     kernel_size: 3
110 |   }
111 | }
112 | layers {
113 |   bottom: "conv4"
114 |   top: "conv4"
115 |   name: "relu4"
116 |   type: RELU
117 | }
118 | layers {
119 |   bottom: "conv4"
120 |   top: "conv5"
121 |   name: "conv5"
122 |   type: CONVOLUTION
123 |   convolution_param {
124 |     num_output: 512
125 |     pad: 1
126 |     kernel_size: 3
127 |   }
128 | }
129 | layers {
130 |   bottom: "conv5"
131 |   top: "conv5"
132 |   name: "relu5"
133 |   type: RELU
134 | }
135 | layers {
136 |   bottom: "conv5"
137 |   top: "pool5"
138 |   name: "pool5"
139 |   type: POOLING
140 |   pooling_param {
141 |     pool: MAX
142 |     kernel_size: 3
143 |     stride: 3
144 |   }
145 | }
146 | layers {
147 |   bottom: "pool5"
148 |   top: "fc6"
149 |   name: "fc6"
150 |   type: INNER_PRODUCT
151 |   inner_product_param {
152 |     num_output: 4096
153 |   }
154 | }
155 | layers {
156 |   bottom: "fc6"
157 |   top: "fc6"
158 |   name: "relu6"
159 |   type: RELU
160 | }
161 | layers {
162 |   bottom: "fc6"
163 |   top: "fc6"
164 |   name: "drop6"
165 |   type: DROPOUT
166 |   dropout_param {
167 |     dropout_ratio: 0.5
168 |   }
169 | }
170 | layers {
171 |   bottom: "fc6"
172 |   top: "fc7"
173 |   name: "fc7"
174 |   type: INNER_PRODUCT
175 |   inner_product_param {
176 |     num_output: 4096
177 |   }
178 | }
179 | layers {
180 |   bottom: "fc7"
181 |   top: "fc7"
182 |   name: "relu7"
183 |   type: RELU
184 | }
185 | layers {
186 |   bottom: "fc7"
187 |   top: "fc7"
188 |   name: "drop7"
189 |   type: DROPOUT
190 |   dropout_param {
191 |     dropout_ratio: 0.5
192 |   }
193 | }
194 | layers {
195 |   bottom: "fc7"
196 |   top: "68point"
197 |   name: "68point"
198 |   type: INNER_PRODUCT
199 |   inner_product_param {
200 |     num_output: 136
201 |   }
202 | }
203 | 
204 | 
205 | layers {
206 |   bottom: "fc7"
207 |   top: "poselayer"
208 |   name: "poselayer"
209 |   type: INNER_PRODUCT
210 |   inner_product_param {
211 |     num_output: 3
212 |   }
213 | }
214 | 
215 | 
216 |  
217 | 


--------------------------------------------------------------------------------
/model/deploy.prototxt~:
--------------------------------------------------------------------------------
  1 | name: "dlib_vgg"
  2 | layers {
  3 |   name: "data"
  4 |   type: MEMORY_DATA
  5 |   top: "data"
  6 |   top: "label"
  7 |   memory_data_param {
  8 |     batch_size: 3 #batch size, so how many prediction youu want to do at once. Best is "1", but higher number get better performance
  9 |     channels: 3
 10 |     height: 224
 11 |     width: 224 
 12 | 
 13 |   }
 14 | }
 15 | 
 16 | 
 17 | layers {
 18 |   bottom: "data"
 19 |   top: "conv1"
 20 |   name: "conv1"
 21 |   type: CONVOLUTION
 22 |   convolution_param {
 23 |     num_output: 96
 24 |     kernel_size: 7
 25 |     stride: 2
 26 |   }
 27 | }
 28 | layers {
 29 |   bottom: "conv1"
 30 |   top: "conv1"
 31 |   name: "relu1"
 32 |   type: RELU
 33 | }
 34 | layers {
 35 |   bottom: "conv1"
 36 |   top: "norm1"
 37 |   name: "norm1"
 38 |   type: LRN
 39 |   lrn_param {
 40 |     local_size: 5
 41 |     alpha: 0.0005
 42 |     beta: 0.75
 43 |     k: 2
 44 |   }
 45 | }
 46 | layers {
 47 |   bottom: "norm1"
 48 |   top: "pool1"
 49 |   name: "pool1"
 50 |   type: POOLING
 51 |   pooling_param {
 52 |     pool: MAX
 53 |     kernel_size: 3
 54 |     stride: 3
 55 |   }
 56 | }
 57 | layers {
 58 |   bottom: "pool1"
 59 |   top: "conv2"
 60 |   name: "conv2"
 61 |   type: CONVOLUTION
 62 |   convolution_param {
 63 |     num_output: 256
 64 |     kernel_size: 5
 65 |   }
 66 | }
 67 | layers {
 68 |   bottom: "conv2"
 69 |   top: "conv2"
 70 |   name: "relu2"
 71 |   type: RELU
 72 | }
 73 | layers {
 74 |   bottom: "conv2"
 75 |   top: "pool2"
 76 |   name: "pool2"
 77 |   type: POOLING
 78 |   pooling_param {
 79 |     pool: MAX
 80 |     kernel_size: 2
 81 |     stride: 2
 82 |   }
 83 | }
 84 | layers {
 85 |   bottom: "pool2"
 86 |   top: "conv3"
 87 |   name: "conv3"
 88 |   type: CONVOLUTION
 89 |   convolution_param {
 90 |     num_output: 512
 91 |     pad: 1
 92 |     kernel_size: 3
 93 |   }
 94 | }
 95 | layers {
 96 |   bottom: "conv3"
 97 |   top: "conv3"
 98 |   name: "relu3"
 99 |   type: RELU
100 | }
101 | layers {
102 |   bottom: "conv3"
103 |   top: "conv4"
104 |   name: "conv4"
105 |   type: CONVOLUTION
106 |   convolution_param {
107 |     num_output: 512
108 |     pad: 1
109 |     kernel_size: 3
110 |   }
111 | }
112 | layers {
113 |   bottom: "conv4"
114 |   top: "conv4"
115 |   name: "relu4"
116 |   type: RELU
117 | }
118 | layers {
119 |   bottom: "conv4"
120 |   top: "conv5"
121 |   name: "conv5"
122 |   type: CONVOLUTION
123 |   convolution_param {
124 |     num_output: 512
125 |     pad: 1
126 |     kernel_size: 3
127 |   }
128 | }
129 | layers {
130 |   bottom: "conv5"
131 |   top: "conv5"
132 |   name: "relu5"
133 |   type: RELU
134 | }
135 | layers {
136 |   bottom: "conv5"
137 |   top: "pool5"
138 |   name: "pool5"
139 |   type: POOLING
140 |   pooling_param {
141 |     pool: MAX
142 |     kernel_size: 3
143 |     stride: 3
144 |   }
145 | }
146 | layers {
147 |   bottom: "pool5"
148 |   top: "fc6"
149 |   name: "fc6"
150 |   type: INNER_PRODUCT
151 |   inner_product_param {
152 |     num_output: 4096
153 |   }
154 | }
155 | layers {
156 |   bottom: "fc6"
157 |   top: "fc6"
158 |   name: "relu6"
159 |   type: RELU
160 | }
161 | layers {
162 |   bottom: "fc6"
163 |   top: "fc6"
164 |   name: "drop6"
165 |   type: DROPOUT
166 |   dropout_param {
167 |     dropout_ratio: 0.5
168 |   }
169 | }
170 | layers {
171 |   bottom: "fc6"
172 |   top: "fc7"
173 |   name: "fc7"
174 |   type: INNER_PRODUCT
175 |   inner_product_param {
176 |     num_output: 4096
177 |   }
178 | }
179 | layers {
180 |   bottom: "fc7"
181 |   top: "fc7"
182 |   name: "relu7"
183 |   type: RELU
184 | }
185 | layers {
186 |   bottom: "fc7"
187 |   top: "fc7"
188 |   name: "drop7"
189 |   type: DROPOUT
190 |   dropout_param {
191 |     dropout_ratio: 0.5
192 |   }
193 | }
194 | layers {
195 |   bottom: "fc7"
196 |   top: "68point"
197 |   name: "68point"
198 |   type: INNER_PRODUCT
199 |   inner_product_param {
200 |     num_output: 136
201 |   }
202 | }
203 | 
204 | 
205 | layers {
206 |   bottom: "fc7"
207 |   top: "poselayer"
208 |   name: "poselayer"
209 |   type: INNER_PRODUCT
210 |   inner_product_param {
211 |     num_output: 3
212 |   }
213 | }
214 | 
215 | 
216 |  
217 | 


--------------------------------------------------------------------------------
/result/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/result/1.png


--------------------------------------------------------------------------------
/result/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/result/2.png


--------------------------------------------------------------------------------
/result/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qiexing/face-landmark-localization/2a57a0c528ac1c336c439dc20af320d68bb2e4dc/result/3.png


--------------------------------------------------------------------------------
/testList.txt:
--------------------------------------------------------------------------------
 1 | img/1.jpg
 2 | img/2.jpg
 3 | img/3.jpg
 4 | img/4.jpg
 5 | img/5.jpg
 6 | img/6.jpg
 7 | img/7.jpg
 8 | img/2008_007676.jpg
 9 | img/2009_004587.jpg
10 | img/2007_007763.jpg
11 | img/2008_001322.jpg
12 | img/2008_001009.jpg
13 | img/2008_002079.jpg
14 | img/2008_002470.jpg
15 | img/2008_002506.jpg
16 | img/2008_004176.jpg
17 | img/2008_007676.jpg
18 | img/2009_004587.jpg
19 | img/image_004_1.jpg
20 | img/image_003_1.jpg
21 | img/image_09.jpg
22 | img/image_018.jpg
23 | img/image_019_1.jpg
24 | img/image_020_1.jpg
25 | 


--------------------------------------------------------------------------------
/train/deploy.prototxt:
--------------------------------------------------------------------------------
  1 | name: "with_pose"
  2 | 
  3 | layer {
  4 |   name: "data"
  5 |   type: "MemoryData"
  6 |   top: "data"
  7 |   top: "label"
  8 |   memory_data_param {
  9 |     batch_size: 1 #batch size, so how many prediction youu want to do at once. Best is "1", but higher number get better performance
 10 |     channels: 3
 11 |     height: 224
 12 |     width: 224
 13 | 
 14 |   }
 15 | }
 16 | 
 17 | 
 18 | 
 19 | 
 20 | layer {
 21 |   bottom: "data"
 22 |   top: "conv1"
 23 |   name: "conv1"
 24 |   type: "Convolution"
 25 |   convolution_param {
 26 |     num_output: 96
 27 |     kernel_size: 7
 28 |     stride: 2
 29 |   }
 30 | }
 31 | layer {
 32 |   bottom: "conv1"
 33 |   top: "conv1"
 34 |   name: "relu1"
 35 |   type: "ReLU"
 36 | }
 37 | layer {
 38 |   bottom: "conv1"
 39 |   top: "norm1"
 40 |   name: "norm1"
 41 |   type: "LRN"
 42 |   lrn_param {
 43 |     local_size: 5
 44 |     alpha: 0.0005
 45 |     beta: 0.75
 46 |     k: 2
 47 |   }
 48 | }
 49 | layer {
 50 |   bottom: "norm1"
 51 |   top: "pool1"
 52 |   name: "pool1"
 53 |   type: "Pooling"
 54 |   pooling_param {
 55 |     pool: MAX
 56 |     kernel_size: 3
 57 |     stride: 3
 58 |   }
 59 | }
 60 | layer {
 61 |   bottom: "pool1"
 62 |   top: "conv2"
 63 |   name: "conv2"
 64 |   type: "Convolution"
 65 |   convolution_param {
 66 |     num_output: 256
 67 |     kernel_size: 5
 68 |   }
 69 | }
 70 | layer {
 71 |   bottom: "conv2"
 72 |   top: "conv2"
 73 |   name: "relu2"
 74 |   type: "ReLU"
 75 | }
 76 | layer {
 77 |   bottom: "conv2"
 78 |   top: "pool2"
 79 |   name: "pool2"
 80 |   type: "Pooling"
 81 |   pooling_param {
 82 |     pool: MAX
 83 |     kernel_size: 2
 84 |     stride: 2
 85 |   }
 86 | }
 87 | layer {
 88 |   bottom: "pool2"
 89 |   top: "conv3"
 90 |   name: "conv3"
 91 |   type: "Convolution"
 92 |   convolution_param {
 93 |     num_output: 512
 94 |     pad: 1
 95 |     kernel_size: 3
 96 |   }
 97 | }
 98 | layer {
 99 |   bottom: "conv3"
100 |   top: "conv3"
101 |   name: "relu3"
102 |   type: "ReLU"
103 | }
104 | layer {
105 |   bottom: "conv3"
106 |   top: "conv4"
107 |   name: "conv4"
108 |   type: "Convolution"
109 |   convolution_param {
110 |     num_output: 512
111 |     pad: 1
112 |     kernel_size: 3
113 |   }
114 | }
115 | layer {
116 |   bottom: "conv4"
117 |   top: "conv4"
118 |   name: "relu4"
119 |   type: "ReLU"
120 | }
121 | 
122 | layer {
123 |   bottom: "conv4"
124 |   top: "conv5"
125 |   name: "conv5"
126 |   type: "Convolution"
127 | 
128 |   convolution_param {
129 |     num_output: 512
130 |     pad: 1
131 |     kernel_size: 3
132 |   }
133 | }
134 | layer {
135 |   bottom: "conv5"
136 |   top: "conv5"
137 |   name: "relu5"
138 |   type: "ReLU"
139 | }
140 | layer {
141 |   bottom: "conv5"
142 |   top: "pool5"
143 |   name: "pool5"
144 |   type: "Pooling"
145 |   pooling_param {
146 |     pool: MAX
147 |     kernel_size: 3
148 |     stride: 3
149 |   }
150 | }
151 | layer {
152 |   bottom: "pool5"
153 |   top: "fc6"
154 |   name: "fc6"
155 |   type: "InnerProduct"
156 | 
157 |   inner_product_param {
158 |     num_output: 4096
159 |   }
160 | }
161 | layer {
162 |   bottom: "fc6"
163 |   top: "fc6"
164 |   name: "relu6"
165 |   type: "ReLU"
166 | }
167 | layer {
168 |   bottom: "fc6"
169 |   top: "fc6"
170 |   name: "drop6"
171 |   type: "Dropout"
172 |   dropout_param {
173 |     dropout_ratio: 0.5
174 |   }
175 | }
176 | layer {
177 |   bottom: "fc6"
178 |   top: "fc7"
179 |   name: "fc7"
180 |   type: "InnerProduct"
181 | 
182 |   inner_product_param {
183 |     num_output: 4096
184 |   }
185 | }
186 | layer {
187 |   bottom: "fc7"
188 |   top: "fc7"
189 |   name: "relu7"
190 |   type: "ReLU"
191 | }
192 | layer {
193 |   bottom: "fc7"
194 |   top: "fc7"
195 |   name: "drop7"
196 |   type: "Dropout"
197 |   dropout_param {
198 |     dropout_ratio: 0.5
199 |   }
200 | }
201 | layer {
202 |   bottom: "fc7"
203 |   top: "68point"
204 |   name: "68point"
205 |   type: "InnerProduct"
206 |   inner_product_param {
207 |     num_output: 136
208 |   }
209 | }
210 | 
211 | 
212 | 
213 | layer {
214 |   bottom: "conv4"
215 |   top: "conv5_b"
216 |   name: "conv5_b"
217 | 
218 |   type: "Convolution"
219 |   convolution_param {
220 |     num_output: 512
221 |     pad: 1
222 |     kernel_size: 3
223 |   }
224 | }
225 | layer {
226 |   bottom: "conv5_b"
227 |   top: "conv5_b"
228 |   name: "relu5_b"
229 |   type: "ReLU"
230 | }
231 | layer {
232 |   bottom: "conv5_b"
233 |   top: "pool5_b"
234 |   name: "pool5_b"
235 |   type: "Pooling"
236 |   pooling_param {
237 |     pool: MAX
238 |     kernel_size: 3
239 |     stride: 3
240 |   }
241 | }
242 | layer {
243 |   bottom: "pool5_b"
244 |   top: "fc6_b"
245 |   name: "fc6_b"
246 |   type: "InnerProduct"
247 | 
248 |   inner_product_param {
249 |     num_output: 4096
250 |   }
251 | }
252 | layer {
253 |   bottom: "fc6_b"
254 |   top: "fc6_b"
255 |   name: "relu6_b"
256 |   type: "ReLU"
257 | }
258 | layer {
259 |   bottom: "fc6_b"
260 |   top: "fc6_b"
261 |   name: "drop6_b"
262 |   type: "Dropout"
263 |   dropout_param {
264 |     dropout_ratio: 0.5
265 |   }
266 | }
267 | layer {
268 |   bottom: "fc6_b"
269 |   top: "fc7_b"
270 |   name: "fc7_b"
271 |   type: "InnerProduct"
272 | 
273 |   inner_product_param {
274 |     num_output: 4096
275 |   }
276 | }
277 | layer {
278 |   bottom: "fc7_b"
279 |   top: "fc7_b"
280 |   name: "relu7_b"
281 |   type: "ReLU"
282 | }
283 | layer {
284 |   bottom: "fc7_b"
285 |   top: "fc7_b"
286 |   name: "drop7_b"
287 |   type: "Dropout"
288 |   dropout_param {
289 |     dropout_ratio: 0.5
290 |   }
291 | }
292 | layer {
293 |   bottom: "fc7_b"
294 |   top: "poselayer"
295 |   name: "poselayer"
296 |   type: "InnerProduct"
297 |   inner_product_param {
298 |     num_output: 3
299 |   }
300 | }
301 | 


--------------------------------------------------------------------------------
/train/solver.prototxt:
--------------------------------------------------------------------------------
 1 | # The training protocol buffer definition
 2 | net: "train_val.prototxt"
 3 | # The testing protocol buffer definition
 4 | # test_iter specifies how many forward passes the test should carry out.
 5 | # In the case of facialpoint, we have test batch size 80 and 43 test iterations,
 6 | # covering the full 10,000 testing images.
 7 | test_iter: 34
 8 | # Carry out testing every 500 training iterations.
 9 | test_interval: 1000
10 | # The base learning rate, momentum and the weight decay of the network.
11 | base_lr: 0.0001
12 | weight_decay : 0.0005
13 | solver_type : NESTEROV
14 | momentum: 0.9
15 | # The learning rate policy
16 | lr_policy: "fixed"
17 | gamma: 0.0001
18 | power: 0.75
19 | stepsize: 50000
20 | # Display every 100 iterations
21 | display: 200
22 | # The maximum number of iterations
23 | max_iter: 1700000
24 | # snapshot intermediate results
25 | snapshot: 10000
26 | snapshot_prefix: "../model/with_pose/"
27 | # solver mode: CPU or GPU
28 | solver_mode: GPU
29 | 
30 | 


--------------------------------------------------------------------------------
/train/train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "sub2_vgg"
  2 | layer {
  3 |   name: "MyData"
  4 |   type: "HDF5Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   top: "pose"
  8 |   hdf5_data_param {
  9 |     source: "/home/hkk/DATACENTER/hdf5/box_train_bgr_data_list.txt"
 10 |     batch_size: 70
 11 |     shuffle: true
 12 |   }
 13 |   include: { phase: TRAIN }
 14 | }
 15 | layer {
 16 |   name: "MyData"
 17 |   type: "HDF5Data"
 18 |   top: "data"
 19 |   top: "label"
 20 |   top: "pose"
 21 |   hdf5_data_param {
 22 |     source: "/home/hkk/DATACENTER/hdf5/box_train_bgr_data_list.txt"
 23 |     batch_size: 20
 24 |   }
 25 |   include: { phase: TEST }
 26 | }
 27 | 
 28 | layer {
 29 |   bottom: "data"
 30 |   top: "conv1"
 31 |   name: "conv1"
 32 |   type: "Convolution"
 33 |   convolution_param {
 34 |     num_output: 96
 35 |     kernel_size: 7
 36 |     stride: 2
 37 |   }
 38 | }
 39 | layer {
 40 |   bottom: "conv1"
 41 |   top: "conv1"
 42 |   name: "relu1"
 43 |   type: "ReLU"
 44 | }
 45 | layer {
 46 |   bottom: "conv1"
 47 |   top: "norm1"
 48 |   name: "norm1"
 49 |   type: "LRN"
 50 |   lrn_param {
 51 |     local_size: 5
 52 |     alpha: 0.0005
 53 |     beta: 0.75
 54 |     k: 2
 55 |   }
 56 | }
 57 | layer {
 58 |   bottom: "norm1"
 59 |   top: "pool1"
 60 |   name: "pool1"
 61 |   type: "Pooling"
 62 |   pooling_param {
 63 |     pool: MAX
 64 |     kernel_size: 3
 65 |     stride: 3
 66 |   }
 67 | }
 68 | layer {
 69 |   bottom: "pool1"
 70 |   top: "conv2"
 71 |   name: "conv2"
 72 |   type: "Convolution"
 73 |   convolution_param {
 74 |     num_output: 256
 75 |     kernel_size: 5
 76 |   }
 77 | }
 78 | layer {
 79 |   bottom: "conv2"
 80 |   top: "conv2"
 81 |   name: "relu2"
 82 |   type: "ReLU"
 83 | }
 84 | layer {
 85 |   bottom: "conv2"
 86 |   top: "pool2"
 87 |   name: "pool2"
 88 |   type: "Pooling"
 89 |   pooling_param {
 90 |     pool: MAX
 91 |     kernel_size: 2
 92 |     stride: 2
 93 |   }
 94 | }
 95 | layer {
 96 |   bottom: "pool2"
 97 |   top: "conv3"
 98 |   name: "conv3"
 99 |   type: "Convolution"
100 |   convolution_param {
101 |     num_output: 512
102 |     pad: 1
103 |     kernel_size: 3
104 |   }
105 | }
106 | layer {
107 |   bottom: "conv3"
108 |   top: "conv3"
109 |   name: "relu3"
110 |   type: "ReLU"
111 | }
112 | layer {
113 |   bottom: "conv3"
114 |   top: "conv4"
115 |   name: "conv4"
116 |   type: "Convolution"
117 |   convolution_param {
118 |     num_output: 512
119 |     pad: 1
120 |     kernel_size: 3
121 |   }
122 | }
123 | layer {
124 |   bottom: "conv4"
125 |   top: "conv4"
126 |   name: "relu4"
127 |   type: "ReLU"
128 | }
129 | 
130 | layer {
131 |   bottom: "conv4"
132 |   top: "conv5"
133 |   name: "conv5"
134 |   type: "Convolution"
135 | 
136 |   convolution_param {
137 |     num_output: 512
138 |     pad: 1
139 |     kernel_size: 3
140 |   }
141 | }
142 | layer {
143 |   bottom: "conv5"
144 |   top: "conv5"
145 |   name: "relu5"
146 |   type: "ReLU"
147 | }
148 | layer {
149 |   bottom: "conv5"
150 |   top: "pool5"
151 |   name: "pool5"
152 |   type: "Pooling"
153 |   pooling_param {
154 |     pool: MAX
155 |     kernel_size: 3
156 |     stride: 3
157 |   }
158 | }
159 | layer {
160 |   bottom: "pool5"
161 |   top: "fc6"
162 |   name: "fc6"
163 |   type: "InnerProduct"
164 | 
165 |   inner_product_param {
166 |     num_output: 4096
167 |   }
168 | }
169 | layer {
170 |   bottom: "fc6"
171 |   top: "fc6"
172 |   name: "relu6"
173 |   type: "ReLU"
174 | }
175 | layer {
176 |   bottom: "fc6"
177 |   top: "fc6"
178 |   name: "drop6"
179 |   type: "Dropout"
180 |   dropout_param {
181 |     dropout_ratio: 0.5
182 |   }
183 | }
184 | layer {
185 |   bottom: "fc6"
186 |   top: "fc7"
187 |   name: "fc7"
188 |   type: "InnerProduct"
189 | 
190 |   inner_product_param {
191 |     num_output: 4096
192 |   }
193 | }
194 | layer {
195 |   bottom: "fc7"
196 |   top: "fc7"
197 |   name: "relu7"
198 |   type: "ReLU"
199 | }
200 | layer {
201 |   bottom: "fc7"
202 |   top: "fc7"
203 |   name: "drop7"
204 |   type: "Dropout"
205 |   dropout_param {
206 |     dropout_ratio: 0.5
207 |   }
208 | }
209 | layer {
210 |   bottom: "fc7"
211 |   top: "68point"
212 |   name: "68point"
213 |   type: "InnerProduct"
214 |   inner_product_param {
215 |     num_output: 136
216 |   }
217 | }
218 | layer {
219 |   name: "loss"
220 |   type: "EuclideanLoss"
221 |   bottom: "68point"
222 |   bottom: "label"
223 |   top: "loss"
224 |   loss_weight: 1
225 | }
226 | 
227 | 
228 | layer {
229 |   bottom: "conv4"
230 |   top: "conv5_b"
231 |   name: "conv5_b"
232 | 
233 |   type: "Convolution"
234 |   convolution_param {
235 |     num_output: 512
236 |     pad: 1
237 |     kernel_size: 3
238 |   }
239 | }
240 | layer {
241 |   bottom: "conv5_b"
242 |   top: "conv5_b"
243 |   name: "relu5_b"
244 |   type: "ReLU"
245 | }
246 | layer {
247 |   bottom: "conv5_b"
248 |   top: "pool5_b"
249 |   name: "pool5_b"
250 |   type: "Pooling"
251 |   pooling_param {
252 |     pool: MAX
253 |     kernel_size: 3
254 |     stride: 3
255 |   }
256 | }
257 | layer {
258 |   bottom: "pool5_b"
259 |   top: "fc6_b"
260 |   name: "fc6_b"
261 |   type: "InnerProduct"
262 | 
263 |   inner_product_param {
264 |     num_output: 4096
265 |   }
266 | }
267 | layer {
268 |   bottom: "fc6_b"
269 |   top: "fc6_b"
270 |   name: "relu6_b"
271 |   type: "ReLU"
272 | }
273 | layer {
274 |   bottom: "fc6_b"
275 |   top: "fc6_b"
276 |   name: "drop6_b"
277 |   type: "Dropout"
278 |   dropout_param {
279 |     dropout_ratio: 0.5
280 |   }
281 | }
282 | layer {
283 |   bottom: "fc6_b"
284 |   top: "fc7_b"
285 |   name: "fc7_b"
286 |   type: "InnerProduct"
287 | 
288 |   inner_product_param {
289 |     num_output: 4096
290 |   }
291 | }
292 | layer {
293 |   bottom: "fc7_b"
294 |   top: "fc7_b"
295 |   name: "relu7_b"
296 |   type: "ReLU"
297 | }
298 | layer {
299 |   bottom: "fc7_b"
300 |   top: "fc7_b"
301 |   name: "drop7_b"
302 |   type: "Dropout"
303 |   dropout_param {
304 |     dropout_ratio: 0.5
305 |   }
306 | }
307 | layer {
308 |   bottom: "fc7_b"
309 |   top: "poselayer"
310 |   name: "poselayer"
311 |   type: "InnerProduct"
312 |   inner_product_param {
313 |     num_output: 3
314 |   }
315 | }
316 | layer {
317 |   name: "poseLoss"
318 |   type: "EuclideanLoss"
319 |   bottom: "poselayer"
320 |   bottom: "pose"
321 |   top: "poseLoss"
322 |   loss_weight: 3
323 | }
324 | 


--------------------------------------------------------------------------------