├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── __main__.py
├── dataset
    ├── __init__.py
    ├── __main__.py
    ├── extract_images.sh
    └── mouth_features.py
├── nets
    ├── __init__.py
    ├── __main__.py
    └── mouth_features.py
├── requirements.txt
├── split
    ├── __init__.py
    ├── __main__.py
    └── split_squence.py
└── train
    ├── __init__.py
    └── __main__.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | logs/*
3 | models/*
4 | shape_predictor_68_face_landmarks.dat
5 | haarcas/*


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Mitiku Yohannes
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Talking & Yawn Detection
 2 | This project is aimed to train model that detects talking and yawning from sequential images.
 3 | 
 4 | ## How to split dataset from videos
 5 | This proejct uses[Yawdd dataset](http://www.site.uottawa.ca/~shervin/yawning/). 
 6 | * First modify dataset/extract.sh file to give mirror folders for both male and female subjects that contain extracted videos. Also modify the output directory(dataset/yawn/images3/)
 7 | * Second split he extraced sequence images to smaller sequences by running the following command
 8 | ``` python -m split --images_path path-to-extracted-images --faces_path path-to-save-bounding-boxes-of-sequence-images --output_path path-to-save-output-sequences --sequence_length sequence-length-to-split```
 9 | 
10 | ### How to run training program
11 | 
12 | ``` python -m train --dataset_path path-to-splitted-dataset --faces_path pathes-to-bounding-boxes --sequence_length sequence-length ```
13 | 
14 | * **shape_predictor should be inside root directory of this project. Shape predictor can be downloaded to project using the following script.**
15 | ```
16 | cd /path-to-project
17 | wget "http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2"
18 | bzip2 -d shape_predictor_68_face_landmarks.dat.bz2
19 | ```
20 | 
21 |  [sp]: http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ymitiku/TalkingYawnDetection/b8ab7a5ecacce31555ff3fa2769df1fd455084fa/__init__.py


--------------------------------------------------------------------------------
/__main__.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import os
 3 | import dlib
 4 | 
 5 | detector = dlib.get_frontal_face_detector()
 6 | 
 7 | face_cascade = cv2.CascadeClassifier("haarcas/haarcascade_profileface.xml")
 8 | 
 9 | current_dir = "/dataset/yawn/splited-100/5-FemaleGlasses-Talking-0"
10 | for img_file in os.listdir(current_dir):
11 |     img = cv2.imread(os.path.join(current_dir,img_file))
12 |     img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
13 |   
14 |     faces = face_cascade.detectMultiScale(img_gray,1.5,5,minSize=(30,30),flags=cv2.CASCADE_SCALE_IMAGE)
15 |     if len(faces)==0:
16 |         faces =detector(img_gray)
17 |         if len(faces)==0:
18 |             continue
19 |         face = faces[0]
20 |         cv2.rectangle(img,(face.left(),face.top()),(face.right(),face.bottom()),(0,0,255),2)
21 |     else:
22 |         for (x,y,w,h) in faces:
23 |             cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
24 |     cv2.imshow("Image",img)
25 |     cv2.waitKey(0)
26 |     cv2.destroyAllWindows()
27 | 


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import dlib
  3 | import os
  4 | import numpy as np
  5 | import pandas as pd
  6 | import json
  7 | from threading import Thread
  8 | from sklearn.model_selection import train_test_split
  9 | 
 10 | 
 11 | class DriverActionDataset(object):
 12 |     def __init__(self,dataset_dir,bounding_box_dir,image_shape,max_sequence_length):
 13 |         self.dataset_dir = dataset_dir
 14 |         self.bounding_box_dir = bounding_box_dir
 15 |         self.image_shape = image_shape
 16 |         self.dataset_loaded = False
 17 |         self.max_sequence_length = max_sequence_length
 18 |         self.detector = dlib.get_frontal_face_detector()
 19 |         self.predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
 20 |     def get_attribute(self,folder_name):
 21 |         subj,gender_glasses,action_str,_ = folder_name.split("-")
 22 |         gender = -1
 23 |         glasses_str = None
 24 |         if gender_glasses[:4].lower() == "male":
 25 |             gender = 1
 26 |             glasses_str = gender_glasses[4:]
 27 |         elif gender_glasses[:6].lower() == "female":
 28 |             gender = 0
 29 |             glasses_str = gender_glasses[6:]
 30 |         else:
 31 |             raise Exception("Unable to parse gender from "+str(folder_name))
 32 |         glasses = -1
 33 |         
 34 |         if glasses_str[:9].lower() =="noglasses":
 35 |             glasses = 0
 36 |         elif glasses_str[:7].lower()  == "glasses" or glasses_str[:10].lower() == "sunglasses":
 37 |             glasses = 1
 38 |         else:
 39 |             raise Exception("Unable to parse glasses information from "+str(folder_name))
 40 |         
 41 |         actions_str = action_str.split("&")
 42 |         for i in range(len(actions_str)):
 43 |             if actions_str[i].lower()=="normal":
 44 |                 action = 0
 45 |             elif actions_str[i].lower() == "yawning":
 46 |                 action = 0
 47 |             elif actions_str[i].lower() == "talking":
 48 |                 action = 1
 49 |                 break
 50 |             else:
 51 |                 raise Exception("Unable to parse action information from " + str(folder_name))
 52 | 
 53 |         
 54 |         output = {"Subject":subj,"Gender":gender,"Glasses":glasses,"Action":action}
 55 |         
 56 |         return output
 57 |     def get_dlib_points(self,image,face,predictor):
 58 |         shape = predictor(image,face)
 59 |         dlib_points = np.zeros((68,2))
 60 |         for i,part in enumerate(shape.parts()):
 61 |             dlib_points[i] = [part.x,part.y]
 62 |         return dlib_points
 63 |     def get_right_eye_attributes(self,image,dlib_points):
 64 |         
 65 |         right_eye_dlib_points = np.concatenate((dlib_points[17:22],dlib_points[36:42]))
 66 |         assert len(right_eye_dlib_points)==11, "right _eye dlib points should be 11"
 67 |         right_eye_top_left = right_eye_dlib_points.min(axis=0)
 68 |         right_eye_bottom_right = right_eye_dlib_points.max(axis=0)
 69 | 
 70 |         right_eye_top = int(max(right_eye_top_left[1]-5,0))
 71 |         right_eye_left = int(max(right_eye_top_left[0]-5,0))
 72 |         right_eye_right  = int(min(right_eye_bottom_right[0]+5,image.shape[1]))
 73 |         right_eye_bottom = int(min(right_eye_bottom_right[1]+5,image.shape[0]))
 74 | 
 75 |         right_eye = image[right_eye_top:right_eye_bottom,right_eye_left:right_eye_right]
 76 | 
 77 |         # r_left_corner_top   = int(max(dlib_points[19][1],0))
 78 |         # r_left_corner_left  = int(max(dlib_points[19][0],0))
 79 |         # r_left_corner_right = int(min(dlib_points[27][0],image.shape[1]))
 80 |         # r_left_corner_bottom = int(min(dlib_points[41][1],image.shape[0]))
 81 | 
 82 |         # right_eye_left_corner = image[r_left_corner_top:r_left_corner_bottom,r_left_corner_left:r_left_corner_right]
 83 | 
 84 |         # r_right_corner_top   = int(max(dlib_points[19][1],0))
 85 |         # r_right_corner_left  = int(max(dlib_points[17][0]-5,0))
 86 |         # r_right_corner_right = int(min(dlib_points[19][0],image.shape[1]))
 87 |         # r_right_corner_bottom = int(min(dlib_points[41][1]+5,image.shape[0]))
 88 | 
 89 |         # right_eye_right_corner = image[r_right_corner_top:r_right_corner_bottom, r_right_corner_left:r_right_corner_right]
 90 |         
 91 |         right_eye = self.resize_to_output_shape(right_eye)
 92 |         # right_eye_left_corner = self.resize_to_output_shape(right_eye_left_corner)
 93 |         # right_eye_right_corner = self.resize_to_output_shape(right_eye_right_corner)
 94 | 
 95 |         # return right_eye,right_eye_left_corner,right_eye_right_corner
 96 |         return right_eye
 97 | 
 98 |     def get_left_eye_attributes(self,image,dlib_points):
 99 | 
100 |         left_eye_dlib_points = np.concatenate((dlib_points[22:27],dlib_points[42:48]))
101 |         assert len(left_eye_dlib_points)==11, "left _eye dlib points should be 11"
102 |         left_eye_top_left = left_eye_dlib_points.min(axis=0)
103 |         left_eye_bottom_right = left_eye_dlib_points.max(axis=0)
104 | 
105 |         left_eye_top = int(max(left_eye_top_left[1]-5,0))
106 |         left_eye_left = int(max(left_eye_top_left[0]-5,0))
107 |         left_eye_right  = int(min(left_eye_bottom_right[0]+5,image.shape[1]))
108 |         left_eye_bottom = int(min(left_eye_bottom_right[1]+5,image.shape[0]))
109 | 
110 | 
111 |         left_eye = image[left_eye_top:left_eye_bottom,left_eye_left:left_eye_right]
112 | 
113 |         # l_left_corner_top   = int(max(dlib_points[24][1],0))
114 |         # l_left_corner_left  = int(max(dlib_points[24][0],0))
115 |         # l_left_corner_right = int(min(dlib_points[26][0],image.shape[1]))
116 |         # l_left_corner_bottom = int(min(dlib_points[46][1],image.shape[0]))
117 | 
118 |         # left_eye_left_corner = image[l_left_corner_top:l_left_corner_bottom,l_left_corner_left:l_left_corner_right]
119 | 
120 |         # l_right_corner_top   = int(max(dlib_points[24][1],0))
121 |         # l_right_corner_left  = int(max(dlib_points[27][0],0))
122 |         # l_right_corner_right = int(min(dlib_points[24][0],image.shape[1]))
123 |         # l_right_corner_bottom = int(min(dlib_points[46][1],image.shape[0]))
124 |         
125 |         # left_eye_right_corner = image[l_right_corner_top:l_right_corner_bottom, l_right_corner_left:l_right_corner_right]
126 |         
127 |         left_eye = self.resize_to_output_shape(left_eye)
128 |         # left_eye_left_corner = self.resize_to_output_shape(left_eye_left_corner)
129 |         # left_eye_right_corner = self.resize_to_output_shape(left_eye_right_corner)
130 |         
131 |         # return left_eye,left_eye_left_corner,left_eye_right_corner 
132 |         return left_eye
133 |     def resize_to_output_shape(self,image):
134 |         if image is None:
135 |             return np.zeros((self.image_shape[0],self.image_shape[1],self.image_shape[2]))
136 |         try:
137 |             img = cv2.resize(image,(self.image_shape[0],self.image_shape[1]))
138 |         except:
139 |             print "img.shape",image.shape
140 |             return np.zeros((self.image_shape[0],self.image_shape[1],self.image_shape[2]))
141 |         return img
142 |     def get_nose_attributes(self,image,dlib_points):
143 |         nose_dlib_points = dlib_points[27:36]
144 |         assert len(nose_dlib_points)==9, "nose dlib points should be 9"
145 |         nose_top_left = nose_dlib_points.min(axis=0)
146 |         nose_bottom_right = nose_dlib_points.max(axis=0)
147 | 
148 |         nose_top = int(max(nose_top_left[1]-5,0))
149 |         nose_left = int(max(nose_top_left[0]-5,0))
150 |         nose_right  = int(min(nose_bottom_right[0]+5,image.shape[1]))
151 |         nose_bottom = int(min(nose_bottom_right[1]+5,image.shape[0]))
152 | 
153 | 
154 |         nose = image[nose_top:nose_bottom,nose_left:nose_right]
155 | 
156 |         # nose_left_corner_top   = int(max(dlib_points[27][1],0))
157 |         # nose_left_corner_left  = int(max(dlib_points[27][0],0))
158 |         # nose_left_corner_right = int(min(dlib_points[42][0],image.shape[1]))
159 |         # nose_left_corner_bottom = int(min(dlib_points[33][1],image.shape[0]))
160 | 
161 |         # nose_left_corner = image[nose_left_corner_top:nose_left_corner_bottom,nose_left_corner_left:nose_left_corner_right]
162 | 
163 |         # nose_right_corner_top   = int(max(dlib_points[27][1],0))
164 |         # nose_right_corner_left  = int(max(dlib_points[39][0],0))
165 |         # nose_right_corner_right = int(min(dlib_points[27][0],image.shape[1]))
166 |         # nose_right_corner_bottom = int(min(dlib_points[33][1],image.shape[0]))
167 |         
168 |         # nose_right_corner = image[nose_right_corner_top:nose_right_corner_bottom, nose_right_corner_left:nose_right_corner_right]
169 |         
170 |      
171 | 
172 |         nose = self.resize_to_output_shape(nose)
173 |         # nose_left_corner = self.resize_to_output_shape(nose_left_corner)
174 | 
175 |         # nose_right_corner = self.resize_to_output_shape(nose_right_corner)
176 | 
177 | 
178 |         # return nose,nose_left_corner,nose_right_corner
179 |         return nose
180 |     def get_bounding_boxes(self,sequence_path):
181 |         _,sequence_name = os.path.split(sequence_path)
182 |         org_squence_name = "-".join(sequence_name.split("-")[:3])
183 |         bbox_file_path = os.path.join(self.bounding_box_dir,org_squence_name+".json")
184 |         with open(bbox_file_path,"r") as bbox_file:
185 |             bboxes = json.load(bbox_file)
186 |             if bboxes is None or len(bboxes)==0:
187 |                 raise Exception("No bounding box for sequence:"+sequence_path)
188 |             else:
189 |                 return bboxes
190 |     def draw_dlib_points(self,image,kps,color=(255,255,0)):
191 |         for i in range(len(kps)):
192 |             cv2.circle(image,(int(kps[i][0]),int(kps[i][1])),1,color)
193 |     def get_mouth_attributes(self,image,dlib_points):
194 |         mouth_dlib_points = dlib_points[48:68]
195 |         assert len(mouth_dlib_points)==20, "Mouth dlib points should be 20"
196 |         mouth_top_left = mouth_dlib_points.min(axis=0)
197 |         mouth_bottom_right = mouth_dlib_points.max(axis=0)
198 | 
199 |         mouth_top = int(max(mouth_top_left[1]-5,0))
200 |         mouth_left = int(max(mouth_top_left[0]-5,0))
201 |         mouth_right  = int(min(mouth_bottom_right[0]+5,image.shape[1]))
202 |         mouth_bottom = int(min(mouth_bottom_right[1]+5,image.shape[0]))
203 | 
204 |         mouth = image[mouth_top:mouth_bottom,mouth_left:mouth_right]
205 |         # if mouth.shape[0]==0:
206 |         #     print dlib_points
207 |         #     self.draw_dlib_points(image,dlib_points)
208 |         #     self.draw_dlib_points(image,mouth_dlib_points,color=(255,0,0))
209 |             
210 |         #     cv2.imshow("Image",image)
211 |         #     cv2.waitKey(0)
212 |         #     cv2.destroyAllWindows()
213 |         # mouth_left_corner_top   = int(max(dlib_points[52][1],0))
214 |         # mouth_left_corner_left  = int(max(dlib_points[51][0],0))
215 |         # mouth_left_corner_right = int(min(dlib_points[54][0]+5,image.shape[1]))
216 |         # mouth_left_corner_bottom = int(min(dlib_points[57][1],image.shape[0]))
217 | 
218 |         # mouth_left_corner = image[mouth_left_corner_top:mouth_left_corner_bottom,mouth_left_corner_left:mouth_left_corner_right]
219 | 
220 |         # mouth_right_corner_top   = int(max(dlib_points[52][1],0))
221 |         # mouth_right_corner_left  = int(max(dlib_points[48][0],0))
222 |         # mouth_right_corner_right = int(min(dlib_points[57][0],image.shape[1]))
223 |         # mouth_right_corner_bottom = int(min(dlib_points[57][1],image.shape[0]))
224 |         
225 |         # mouth_right_corner = image[mouth_right_corner_top:mouth_right_corner_bottom, mouth_right_corner_left:mouth_right_corner_right]
226 |         
227 |         # mouth_top_corner_top   = int(max(dlib_points[50][1],0))
228 |         # mouth_top_corner_left  = int(max(dlib_points[48][0],0))
229 |         # mouth_top_corner_right = int(min(dlib_points[54][0],image.shape[1]))
230 |         # mouth_top_corner_bottom = int(min(dlib_points[48][1],image.shape[0]))
231 |         
232 |         # mouth_top_corner = image[mouth_top_corner_top:mouth_top_corner_bottom, mouth_top_corner_left:mouth_top_corner_right]
233 |         
234 |         # mouth_bottom_corner_top   = int(max(dlib_points[48][1],0))
235 |         # mouth_bottom_corner_left  = int(max(dlib_points[48][0],0))
236 |         # mouth_bottom_corner_right = int(min(dlib_points[54][0],image.shape[1]))
237 |         # mouth_bottom_corner_bottom = int(min(dlib_points[57][1],image.shape[0]))
238 |         
239 |         # mouth_bottom_corner = image[mouth_bottom_corner_top:mouth_bottom_corner_bottom, mouth_bottom_corner_left:mouth_bottom_corner_right]
240 |         
241 | 
242 |         mouth = self.resize_to_output_shape(mouth)
243 |         # mouth_left_corner = self.resize_to_output_shape(mouth_left_corner)
244 |         # mouth_right_corner = self.resize_to_output_shape(mouth_right_corner)
245 |         # mouth_top_corner = self.resize_to_output_shape(mouth_top_corner)
246 |         # mouth_bottom_corner = self.resize_to_output_shape(mouth_bottom_corner)
247 | 
248 |         
249 | 
250 |         # return mouth,mouth_left_corner,mouth_right_corner,mouth_top_corner,mouth_bottom_corner
251 |         return mouth
252 |    
253 |     def get_face_attributes(self,image,face,predictor):
254 |         face_image =image[ int(max(0,face.top())):int(min(image.shape[0],face.bottom())),
255 |                      int(max(0,face.left())):int(min(image.shape[1],face.right()))   
256 |                     ]
257 |         face_image = cv2.resize(face_image,(self.image_shape[0],self.image_shape[1]))
258 | 
259 |         dlib_points = self.get_dlib_points(image,face,self.predictor)
260 |         right_eye = self.get_right_eye_attributes(image,dlib_points)
261 |         left_eye = self.get_left_eye_attributes(image,dlib_points)
262 |         nose = self.get_nose_attributes(image,dlib_points)
263 |         mouth = self.get_mouth_attributes(image,dlib_points)
264 |         output = {"face_image":face_image,"right_eye":right_eye,"left_eye":left_eye,
265 |                     "mouth":mouth,"nose":nose
266 |                     }
267 |         # right_eye,right_eye_left_corner,right_eye_right_corner  = self.get_right_eye_attributes(image,dlib_points)
268 |         # left_eye,left_eye_left_corner,left_eye_right_corner  = self.get_left_eye_attributes(image,dlib_points)
269 |         # nose,nose_right_corner,nose_left_corner = self.get_nose_attributes(image,dlib_points)
270 |         # mouth,mouth_left_corner,mouth_right_corner,mouth_top_corner,mouth_bottom_corner = self.get_mouth_attributes(image,dlib_points)
271 |         # output = {"face_image":face_image,"right_eye":right_eye,"left_eye":left_eye,
272 |         #             "mouth":mouth,"nose":nose,"left_eye_right_corner":left_eye_right_corner,
273 |         #             "left_eye_left_corner":left_eye_left_corner,"right_eye_right_corner":right_eye_right_corner,
274 |         #             "right_eye_left_corner":right_eye_left_corner,"nose_right_corner":nose_right_corner,
275 |         #             "nose_left_corner":nose_left_corner,"mouth_left_corner":mouth_left_corner,
276 |         #             "mouth_right_corner":mouth_right_corner,"mouth_top_corner":mouth_top_corner,
277 |         #             "mouth_bottom_corner":mouth_bottom_corner
278 |         #             }
279 |         return output
280 |     
281 |     
282 |     def load_image_sequence(self,path,detector,predictor,verbose=False):
283 |         if verbose:
284 |             print "loading",path
285 |         imgs_files = os.listdir(path)
286 |         imgs_files.sort()
287 |         output_faces = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
288 |         output_right_eyes = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
289 |         output_left_eyes = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
290 |         output_mouths = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
291 |         output_noses = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
292 |         # output_left_eye_right_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
293 |         # output_left_eye_left_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
294 |         # output_right_eye_right_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
295 |         # output_right_eye_left_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
296 |         # output_nose_right_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
297 |         # output_nose_left_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
298 |         # output_mouth_left_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
299 |         # output_mouth_right_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
300 |         # output_mouth_top_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
301 |         # output_mouth_bottom_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
302 |         
303 |     
304 |         bounding_boxes = self.get_bounding_boxes(path)
305 | 
306 |         for i in range(len(imgs_files)):
307 |             img = cv2.imread(os.path.join(path,imgs_files[i]))
308 |             if not (img is None):
309 |                 bbox = bounding_boxes[imgs_files[i]]
310 |                 face = dlib.rectangle(int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3]))
311 |                 # face_image =img[ max(0,face.top()):min(img.shape[0],face.bottom()),
312 |                 #                  max(0,face.left()):min(img.shape[1],face.right())   
313 |                 #                 ]
314 |                 # [right_eye,left_eye,mouth,nose,left_eye_corners,right_eye_corners,nose_corners,mouth_corners]
315 | 
316 |                 attrs = self.get_face_attributes(img, face,self.predictor)
317 |                 output_faces[i] = attrs["face_image"]
318 |                 output_right_eyes[i] = attrs["right_eye"]
319 |                 output_left_eyes[i] = attrs["left_eye"]
320 |                 output_noses[i] = attrs["nose"]
321 |                 output_mouths[i] = attrs["mouth"]
322 |                 # output_left_eye_right_corners[i] = attrs["left_eye_right_corner"]
323 |                 # output_left_eye_left_corners[i] = attrs["left_eye_left_corner"]
324 |                 # output_right_eye_right_corners[i] = attrs["right_eye_right_corner"]
325 |                 # output_right_eye_left_corners[i] = attrs["right_eye_left_corner"]
326 |                 # output_nose_right_corners[i] = attrs["nose_right_corner"]
327 |                 # output_nose_left_corners[i] = attrs["nose_left_corner"]
328 |                 # output_mouth_right_corners[i] = attrs["mouth_right_corner"]
329 |                 # output_mouth_left_corners[i] = attrs["mouth_left_corner"]
330 |                 # output_mouth_top_corners[i] = attrs["mouth_top_corner"]
331 |                 # output_mouth_bottom_corners[i] = attrs["mouth_bottom_corner"]
332 |                     
333 | 
334 |             else:
335 |                 if verbose:
336 |                     print ("Unable to read image from ",os.path.join(path,imgs_files[i]))
337 |         if verbose:
338 |             print "loaded",path
339 |         return output_faces,output_left_eyes,output_right_eyes,output_noses,output_mouths
340 |         # return output_faces,output_left_eyes,output_right_eyes,output_noses,output_mouths,\
341 |         #     output_left_eye_right_corners,output_left_eye_right_corners,output_right_eye_left_corners,\
342 |         #     output_right_eye_right_corners,output_nose_left_corners,output_nose_right_corners,\
343 |         #     output_mouth_left_corners,output_mouth_right_corners,output_mouth_top_corners,output_mouth_bottom_corners
344 |     def get_is_talking(self,folder_name):
345 |         if folder_name.lower().count("talking")>0:
346 |             return 1
347 |         else:
348 |             return 0
349 |     def load_dataset(self):
350 |         sequences = os.listdir(self.dataset_dir)
351 | 
352 |         self.train_sequences,test_sequences = train_test_split(sequences,test_size=0.05)
353 |         self.train_sequences  =  np.array(self.train_sequences)
354 |         # num_train_sequences  = len(train_sequences)
355 |         num_test_sequences  = len(test_sequences)
356 |         
357 |         # self.face_image_train_sequences = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
358 |         # self.left_eye_image_train_sequences = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
359 |         # self.right_eye_image_train_sequences = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
360 |         # self.nose_image_train_sequences = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
361 |         # self.mouth_image_train_sequences = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
362 |         # self.talking_train = np.zeros((num_train_sequences,))
363 | 
364 | 
365 |         # for i in range(len(train_sequences)):
366 |         #     self.face_image_train_sequences[i],self.left_eye_image_train_sequences[i],\
367 |         #         self.right_eye_image_train_sequences[i],self.nose_image_train_sequences[i],\
368 |         #         self.mouth_image_train_sequences[i] = self.load_image_sequence(os.path.join(\
369 |         #         self.dataset_dir,train_sequences[i]),detector,predictor)
370 |         #     self.talking_train[i] = self.get_is_talking(train_sequences[i])
371 | 
372 |         print("loading test",len(test_sequences)," dataset")
373 | 
374 |         self.face_image_test_sequences = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
375 |         self.left_eye_image_test_sequences = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
376 |         self.right_eye_image_test_sequences = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
377 |         self.nose_image_test_sequences = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
378 |         self.mouth_image_test_sequences = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
379 |         self.talking_test = np.zeros((num_test_sequences,))
380 | 
381 | 
382 |         for i in range(len(test_sequences)):
383 |             self.face_image_test_sequences[i],self.left_eye_image_test_sequences[i],self.right_eye_image_test_sequences[i],\
384 |                 self.nose_image_test_sequences[i],self.mouth_image_test_sequences[i] = self.load_image_sequence(\
385 |                 os.path.join(self.dataset_dir,test_sequences[i]),self.detector,self.predictor)
386 |             self.talking_test[i] = self.get_is_talking(test_sequences[i])
387 |         print ("loadded test",len(test_sequences),"dataset ")
388 |         self.dataset_loaded = True
389 | 
390 |     def generator(self,batch_size):
391 |         while True:
392 |             indexes = np.arange(len(self.train_sequences))
393 |             np.random.shuffle(indexes)
394 |             for i in range(0,len(indexes),batch_size):
395 |                 current_indexes = indexes[i:i+batch_size]
396 | 
397 |                 current_sequences = self.train_sequences[current_indexes]
398 |                
399 |                 y = np.zeros((len(current_sequences),))
400 |                 for j in range(len(current_sequences)):
401 |                     faces,left_eyes,right_eyes,noses,mouths = self.load_image_sequence(os.path.join(\
402 |                         self.dataset_dir,current_sequences[j]),self.detector,self.predictor)
403 |                     y[j] = self.get_is_talking(current_sequences[j])
404 |                 y = y.astype(np.uint8)
405 |                 y = np.eye(2)[y]
406 |                 
407 |                 faces = faces.astype(np.float32)/255
408 |                 left_eyes = left_eyes.astype(np.float32)/255
409 |                 right_eyes = right_eyes.astype(np.float32)/255
410 |                 noses = noses.astype(np.float32)/255
411 |                 mouths = mouths.astype(np.float32)/255
412 | 
413 | 
414 |                 faces = faces.reshape(batch_size,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])
415 |                 left_eyes = left_eyes.reshape(batch_size,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])
416 |                 right_eyes = right_eyes.reshape(batch_size,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])
417 |                 noses = noses.reshape(batch_size,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])
418 |                 mouths = noses.reshape(batch_size,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])
419 |                 yield [faces,left_eyes,right_eyes,noses,mouths],y


--------------------------------------------------------------------------------
/dataset/__main__.py:
--------------------------------------------------------------------------------
 1 | from dataset import DriverActionDataset
 2 | import dlib
 3 | 
 4 | def main():
 5 |     dataset = DriverActionDataset("/home/mtk/datasets/Yaw/YawDD dataset/Mirror",(227,227,3))
 6 |     detector = dlib.get_frontal_face_detector()
 7 |     predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
 8 |     dataset.load_image_sequence("/dataset/yawn/images3/9-FemaleNoGlasses-Normal/",detector,predictor)
 9 | 
10 | if __name__ == "__main__":
11 |     main()


--------------------------------------------------------------------------------
/dataset/extract_images.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash 
 2 | dataset_path= "~/datasets/Yaw/YawDD\ dataset/Mirror/Male_mirror/";
 3 | output_path="/dataset/yawn/images3/"
 4 | 
 5 | for file in $dataset_path*.avi; 
 6 | do 
 7 | filename="$(basename "${file}" .avi)"
 8 | mkdir "$output_path/${filename}"
 9 | ffmpeg -i "$file" "$output_path/${filename}/${output-%05d}".jpg;
10 | done;
11 | 
12 | for file in ~/datasets/Yaw/YawDD\ dataset/Mirror/Female_mirror/*.avi; 
13 | do 
14 | filename="$(basename "${file}" .avi)"
15 | mkdir "$output_path/${filename}"
16 | ffmpeg -i "$file" "$output_path/${filename}/${output-%05d}".jpg;
17 | done;


--------------------------------------------------------------------------------
/dataset/mouth_features.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import dlib
  3 | import cv2
  4 | import json
  5 | import numpy as np
  6 | from sklearn.model_selection import train_test_split
  7 | 
  8 | 
  9 | class MouthFeatureOnlyDataset(object):
 10 | 
 11 |     def __init__(self,dataset_dir, bounding_box_dir, image_shape,max_sequence_length):
 12 |         self.dataset_dir = dataset_dir
 13 |         self.bounding_box_dir = bounding_box_dir
 14 |         self.image_shape = image_shape
 15 |         self.dataset_loaded = False
 16 |         self.max_sequence_length = max_sequence_length
 17 |         self.detector = dlib.get_frontal_face_detector()
 18 |         self.predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
 19 |     def get_dlib_points(self,image,face):
 20 |         shape = self.predictor(image,face)
 21 |         dlib_points = np.zeros((68,2))
 22 |         for i,part in enumerate(shape.parts()):
 23 |             dlib_points[i] = [part.x,part.y]
 24 |         return dlib_points
 25 |     def get_bounding_boxes(self,sequence_path):
 26 |         _,sequence_name = os.path.split(sequence_path)
 27 |         org_squence_name = "-".join(sequence_name.split("-")[:3])
 28 |         bbox_file_path = os.path.join(self.bounding_box_dir,org_squence_name+".json")
 29 |         with open(bbox_file_path,"r") as bbox_file:
 30 |             bboxes = json.load(bbox_file)
 31 |             if bboxes is None or len(bboxes)==0:
 32 |                 raise Exception("No bounding box for sequence:"+sequence_path)
 33 |             else:
 34 |                 return bboxes
 35 |     def distance_between(self,v1,v2):
 36 |         diff = v2 - v1
 37 |         diff_squared = np.square(diff)
 38 |         dist_squared = diff_squared.sum(axis=1) 
 39 |         dists = np.sqrt(dist_squared)
 40 |         return dists
 41 | 
 42 |     def angles_between(self,v1,v2):
 43 |         dot_prod = (v1 * v2).sum(axis=1)
 44 |         v1_norm = np.linalg.norm(v1,axis=1)
 45 |         v2_norm = np.linalg.norm(v2,axis=1)
 46 |         
 47 | 
 48 |         cosine_of_angle = (dot_prod/(v1_norm * v2_norm)).reshape(-1,1)
 49 | 
 50 |         angles = np.arccos(np.clip(cosine_of_angle,-1,1))
 51 |         return angles
 52 |     def draw_key_points(self,image,key_points):
 53 |         for i in range(key_points.shape[0]):
 54 |             image = cv2.circle(image, (int(key_points[i][0]), int(key_points[i][1])), 1,(255,0,0))
 55 |         return image
 56 |     def get_mouth_attributes_from_local_frame(self,image,key_points_20):
 57 |         
 58 |         current_image_shape = image.shape
 59 |         top_left = key_points_20.min(axis=0)
 60 |         bottom_right = key_points_20.max(axis=0)
 61 | 
 62 |         # bound the coordinate system inside eye image
 63 |         bottom_right[0] = min(current_image_shape[1],bottom_right[0]+5)
 64 |         bottom_right[1] = min(current_image_shape[0],bottom_right[1]+5)
 65 |         top_left[0] = max(0,top_left[0]-5)
 66 |         top_left[1] = max(0,top_left[1]-5)
 67 | 
 68 |         # crop the eye
 69 |         top_left = top_left.astype(int)
 70 |         bottom_right = bottom_right.astype(int)
 71 |         mouth_image = image[top_left[1]:bottom_right[1],top_left[0]:bottom_right[0]]
 72 |         if mouth_image.shape[0]==0:
 73 |             # self.draw_key_points(image,key_points_20)  
 74 |             # cv2.imshow("Image",image)
 75 |             # cv2.waitKey(0)
 76 |             # cv2.destoryAllWindows(0)
 77 |             image = np.zeros((self.image_shape[0],self.image_shape[1],self.image_shape[2]))
 78 |             key_points = np.zeros((20,2))
 79 |             dists = np.zeros((20))
 80 |             angles = np.zeros((20))
 81 |             return image, key_points,dists,angles
 82 | 
 83 |         # translate the eye key points from face image frame to eye image frame
 84 |         key_points = key_points_20 - top_left
 85 |         key_points +=np.finfo(float).eps
 86 |         # horizontal scale to resize image
 87 |         scale_h = self.image_shape[1]/float(mouth_image.shape[1])
 88 |         # vertical scale to resize image
 89 |         scale_v = self.image_shape[0]/float(mouth_image.shape[0])
 90 | 
 91 |         # resize left eye image to network input size
 92 |         mouth_image = cv2.resize(mouth_image,(self.image_shape[0],self.image_shape[1]))
 93 | 
 94 |         # scale left key points proportional with respect to left eye image resize scale
 95 |         scale = np.array([[scale_h,scale_v]])
 96 |         key_points = key_points * scale 
 97 | 
 98 |         # calculate centroid of left eye key points 
 99 |         centroid = np.array([key_points.mean(axis=0)])
100 | 
101 |         # calculate distances from  centroid to each left eye key points
102 |         dists = self.distance_between(key_points,centroid)
103 | 
104 |         # calculate angles between centroid point vector and left eye key points vectors
105 |         angles = self.angles_between(key_points,centroid)
106 |         return mouth_image, key_points,dists,angles
107 | 
108 |     def get_mouth_features_from_image(self,image,bounding_box):
109 |         face = dlib.rectangle(int(bounding_box[0]),int(bounding_box[1]),int(bounding_box[2]),int(bounding_box[3]))
110 | 
111 |         # cv2.rectangle(image,(face.left(),face.top()),(face.right(),face.bottom()),(255,255,0))
112 |         b_box_array = np.array(bounding_box)
113 |         key_points = self.get_dlib_points(image,face)
114 |         mouth_key_points = key_points[48:68]
115 |         image = self.draw_key_points(image,mouth_key_points)
116 |         assert len(mouth_key_points) == 20, "Mouth key points should be twenty points"
117 |         mouth_image, key_points,dists,angles = self.get_mouth_attributes_from_local_frame(image,mouth_key_points)
118 |         return mouth_image, key_points,dists,angles
119 |     def get_mouth_features(self,sequence_path):
120 |         bboxes = self.get_bounding_boxes(sequence_path)
121 |         output_images = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
122 |         output_faces = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
123 |         output_key_points = np.zeros((self.max_sequence_length,20,2))
124 |         output_distances = np.zeros((self.max_sequence_length,20))
125 |         output_angles = np.zeros((self.max_sequence_length,20))
126 |         img_files = os.listdir(sequence_path)
127 |         img_files.sort()
128 | 
129 |         for i in range(len(img_files)):
130 |             img = cv2.imread(os.path.join(sequence_path,img_files[i]))
131 |             if self.image_shape[2]==1:
132 |                 img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
133 |             bounding_box = bboxes[img_files[i]]
134 |             if not(img is None):
135 |                 
136 |                 face_image = img[
137 |                     max(0,int(bounding_box[1]-5)):min(img.shape[0],int(bounding_box[3])+5),
138 |                     max(0,int(bounding_box[0]-5)):min(img.shape[1],int(bounding_box[2])+5)
139 |                 ]
140 |                 try:
141 |                     face_image = cv2.resize(face_image,(self.image_shape[0],self.image_shape[1]))
142 |                     face_image = face_image.reshape(self.image_shape)
143 |                 except:
144 |                     print bounding_box
145 |                     face_image = np.zeros((self.image_shape[0],self.image_shape[1],self.image_shape[2]))
146 |                     # top_left = (max(0,int(bounding_box[0])),max(0,int(bounding_box[1])))
147 |                     # bottom_right = (min(img.shape[1],int(bounding_box[2])),min(img.shape[0],int(bounding_box[3])))
148 |                     # cv2.rectangle(img,top_left,bottom_right,(255,0,0))
149 |                     # cv2.imshow("Image",img)
150 |                     # # cv2.imshow("Face image",face_image)
151 |                     # cv2.waitKey(0)
152 |                     # cv2.destroyAllWindows()
153 |                 mouth_image,kps,dists,angles = self.get_mouth_features_from_image(img,bounding_box)
154 |                 mouth_image = mouth_image.reshape(self.image_shape)
155 |                 # self.draw_key_points(mouth_image,kps)
156 |                 # cv2.imshow("Image",img)
157 |                 # cv2.imshow("Mouth Image",mouth_image)
158 |                 # cv2.waitKey(0)
159 |                 # cv2.destroyAllWindows()
160 |                 output_faces[i] = face_image
161 |                 output_images[i] = mouth_image
162 |                 output_key_points[i] = kps
163 |                 output_distances[i] = dists
164 |                 output_angles[i] = angles.reshape((20,))
165 |             else:
166 |                 raise Exception("Unable to read image form "+os.path.join(sequence_path,img_files[i]))
167 |         return output_faces, output_images,output_key_points,output_distances,output_angles
168 |     def get_is_talking(self,folder_name):
169 |         if folder_name.lower().count("talking")>0:
170 |             return 1
171 |         else:
172 |             return 0
173 |     def load_dataset(self):
174 |         sequences = os.listdir(self.dataset_dir)
175 |         # sequences = sequences[:3000]
176 |         train_sequences,test_sequences = train_test_split(sequences,test_size=0.1)
177 |         num_train_sequences  = len(train_sequences)
178 |         num_test_sequences  = len(test_sequences)
179 |         
180 |         self.face_image_train_sequence = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
181 |         self.mouth_image_train_sequence = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
182 |         self.key_points_train_sequence = np.zeros((num_train_sequences,self.max_sequence_length,20,2))
183 |         self.distances_train_sequence = np.zeros((num_train_sequences,self.max_sequence_length,20))
184 |         self.angles_train_sequence = np.zeros((num_train_sequences,self.max_sequence_length,20))
185 |         self.Y_train = np.zeros((num_train_sequences,),dtype=np.uint8)
186 | 
187 | 
188 |         
189 |         
190 |         
191 |         self.face_image_test_sequence = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
192 |         self.mouth_image_test_sequence = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]))
193 |         self.key_points_test_sequence = np.zeros((num_test_sequences,self.max_sequence_length,20,2))
194 |         self.distances_test_sequence = np.zeros((num_test_sequences,self.max_sequence_length,20))
195 |         self.angles_test_sequence = np.zeros((num_test_sequences,self.max_sequence_length,20))
196 |         self.Y_test = np.zeros((num_test_sequences,),dtype=np.uint8)
197 | 
198 |         print "Loading",num_train_sequences,"train sequences"
199 |         for i in range(num_train_sequences):
200 |             faces,mouths,points,distances, angles = self.get_mouth_features(os.path.join(self.dataset_dir, train_sequences[i]))
201 |             self.face_image_train_sequence[i] = faces
202 |             self.mouth_image_train_sequence[i] = mouths
203 |             self.key_points_train_sequence[i] = points
204 |             self.distances_train_sequence[i] = distances
205 |             self.angles_train_sequence[i] = angles
206 |             self.Y_train[i] = self.get_is_talking(train_sequences[i])
207 |             if (i+1)%100==0:
208 |                 print "loaded",i+1,"sequences"
209 |         print "Loaded",num_train_sequences,"train sequences"
210 | 
211 |         print "Loading test sequences"
212 |         for i in range(num_test_sequences):
213 |             faces,images,points,distances, angles = self.get_mouth_features(os.path.join(self.dataset_dir,test_sequences[i]))
214 |             self.face_image_test_sequence[i] = faces
215 |             self.mouth_image_test_sequence[i] = images
216 |             self.key_points_test_sequence[i] = points
217 |             self.distances_test_sequence[i] = distances
218 |             self.angles_test_sequence[i] = angles
219 |             self.Y_test[i] = self.get_is_talking(test_sequences[i])
220 |             if (i+1)%100==0:
221 |                 print "loaded",i+1,"sequences"
222 |             
223 |         print "Loaded test sequences"
224 | 
225 | 
226 |         print "Preprocessing dataset"
227 |         # Normalize images
228 | 
229 |         self.face_image_train_sequence = self.face_image_train_sequence.astype(np.float32)/255.0
230 |         self.face_image_test_sequence = self.face_image_test_sequence.astype(np.float32)/255.0
231 |         # Normalize images
232 | 
233 |         self.mouth_image_train_sequence = self.mouth_image_train_sequence.astype(np.float32)/255.0
234 |         self.mouth_image_test_sequence = self.mouth_image_test_sequence.astype(np.float32)/255.0
235 | 
236 |         # Normalize key points 
237 |         image_width = self.image_shape[0]
238 |         self.key_points_train_sequence = self.key_points_train_sequence.astype(np.float32)/float(image_width)
239 |         self.key_points_test_sequence = self.key_points_test_sequence.astype(np.float32)/float(image_width)
240 | 
241 |         # Expand dims for network input
242 |         self.key_points_train_sequence = np.expand_dims(self.key_points_train_sequence,2)
243 |         self.key_points_test_sequence = np.expand_dims(self.key_points_test_sequence,2)
244 | 
245 |         # Normalize distances 
246 |         self.distances_train_sequence = self.distances_train_sequence.astype(np.float32)/float(image_width)
247 |         self.distances_test_sequence = self.distances_test_sequence.astype(np.float32)/float(image_width)
248 |         
249 |         # Expand dims for network input
250 |         self.distances_train_sequence = np.expand_dims(self.distances_train_sequence,2)
251 |         self.distances_train_sequence = np.expand_dims(self.distances_train_sequence,4)
252 | 
253 |         self.distances_test_sequence = np.expand_dims(self.distances_test_sequence,2)
254 |         self.distances_test_sequence = np.expand_dims(self.distances_test_sequence,4)
255 | 
256 |         # Normalize angles 
257 |         self.angles_train_sequence = self.angles_train_sequence.astype(np.float32)/np.pi
258 |         self.angles_test_sequence = self.angles_test_sequence.astype(np.float32)/np.pi
259 |          # Expand dims for network input
260 |         self.angles_train_sequence = np.expand_dims(self.angles_train_sequence,2)
261 |         self.angles_train_sequence = np.expand_dims(self.angles_train_sequence,4)
262 | 
263 |         self.angles_test_sequence = np.expand_dims(self.angles_test_sequence,2)
264 |         self.angles_test_sequence = np.expand_dims(self.angles_test_sequence,4)
265 |         print "All datasets are loaded and preprocessed"
266 |         self.dataset_loaded = True
267 |     def generator(self,batch_size):
268 |         while True:
269 |             indexes = range(len(self.mouth_image_train_sequence))
270 |             np.random.shuffle(indexes)
271 |             for i in range(0,len(indexes),batch_size):
272 |                 current_indexes = indexes[i:i+batch_size]
273 |                 f_images = self.face_image_train_sequence[current_indexes]
274 |                 m_images = self.mouth_image_train_sequence[current_indexes]
275 |                 kpoints = self.key_points_train_sequence[current_indexes]
276 |                 dpoints = self.distances_train_sequence[current_indexes]
277 |                 angles = self.angles_train_sequence[current_indexes]
278 | 
279 |                 y = self.Y_train[current_indexes]
280 |                 y = np.eye(2)[y]
281 |                 # yield  [m_images,kpoints,dpoints,angles],y
282 |                 # yield  [m_images, f_images],y
283 |                 yield [m_images,f_images,kpoints,dpoints,angles],y


--------------------------------------------------------------------------------
/nets/__init__.py:
--------------------------------------------------------------------------------
  1 | from keras.layers import Dropout
  2 | from keras.layers import Conv2D,MaxPool2D,Dense,Flatten,Input,concatenate,Concatenate
  3 | from keras.layers import LSTM,TimeDistributed,Add
  4 | from keras.models import Model,Sequential
  5 | import keras
  6 | import numpy as np
  7 | 
  8 | class Network(object):
  9 |     def __init__(self,dataset,input_shape,max_sequence_length):
 10 |         self.dataset = dataset
 11 |         self.input_shape = input_shape
 12 |         self.max_sequence_length = max_sequence_length
 13 |         self.model = self.build()
 14 |         self.model.summary()
 15 |     def build(self):
 16 |         
 17 |         face_model = Sequential()
 18 |         face_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\
 19 |                     name="face_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2])))
 20 |         
 21 |         face_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 22 |         face_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\
 23 |                 activation="relu",name="face_layer2")))
 24 |         face_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 25 |         face_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\
 26 |             activation="relu",name="face_layer3")))
 27 |         face_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 28 |         face_model.add(TimeDistributed(Flatten()))
 29 |         
 30 |         nose_model = Sequential()
 31 |         nose_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\
 32 |                     name="nose_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2])))
 33 |         
 34 |         nose_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 35 |         nose_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\
 36 |                 activation="relu",name="nose_layer2")))
 37 |         nose_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 38 |         nose_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\
 39 |             activation="relu",name="nose_layer3")))
 40 |         nose_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 41 |         nose_model.add(TimeDistributed(Flatten()))
 42 |         
 43 |         left_eye_model = Sequential()
 44 |         left_eye_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\
 45 |                     name="left_eye_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2])))
 46 |     
 47 |         left_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 48 |         left_eye_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\
 49 |                 activation="relu",name="left_eye_layer2")))
 50 |         left_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 51 |         left_eye_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\
 52 |             activation="relu",name="left_eye_layer3")))
 53 |         left_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 54 |         left_eye_model.add(TimeDistributed(Flatten()))
 55 | 
 56 |         right_eye_model = Sequential()
 57 |         right_eye_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\
 58 |                     name="right_eye_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2])))
 59 |         
 60 |         right_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 61 |         right_eye_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\
 62 |                 activation="relu",name="right_eye_layer2")))
 63 |         right_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 64 |         right_eye_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\
 65 |             activation="relu",name="right_eye_layer3")))
 66 |         right_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 67 |         right_eye_model.add(TimeDistributed(Flatten()))
 68 | 
 69 |         mouth_model = Sequential()
 70 |         mouth_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\
 71 |                     name="mouth_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2])))
 72 |         
 73 |         mouth_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 74 |         mouth_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\
 75 |                 activation="relu",name="mouth_layer2")))
 76 |         mouth_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 77 |         mouth_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\
 78 |             activation="relu",name="mouth_layer3")))
 79 |         mouth_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 80 |         mouth_model.add(TimeDistributed(Flatten()))
 81 |         merged_layer = Add()([face_model.output,left_eye_model.output,right_eye_model.output,nose_model.output,mouth_model.output])
 82 |         
 83 |         dense1 = TimeDistributed(Dense(128,activation="relu"))(merged_layer)   
 84 |         dropout1 = TimeDistributed(Dropout(0.2))(dense1)
 85 |         dense2 = TimeDistributed(Dense(256,activation="relu"))(dropout1)
 86 |         dropout2 = TimeDistributed(Dropout(0.2))(dense2)
 87 |         lstm1 = LSTM(32,activation='relu',return_sequences=True,stateful=False)(dropout2)
 88 |         lstm2 = LSTM(64,activation='relu',return_sequences=False,stateful=False)(lstm1)
 89 |         
 90 |         dense3 = Dense(256,activation="relu")(lstm2)
 91 |         output = Dense(2,activation="softmax")(dense3)
 92 | 
 93 |         # model = Model(inputs=[face_layer,left_eye_layer_input,right_eye_layer_input,nose_layer_input,mouth_layer_input],outputs=output)
 94 |         model = Model(inputs=[face_model.input,left_eye_model.input,right_eye_model.input,nose_model.input,mouth_model.input],\
 95 |                             outputs = output
 96 |                             )
 97 |         return model
 98 | 
 99 | 
100 | 
101 |     def train(self):
102 |         
103 |         faces = self.dataset.face_image_test_sequences.astype(np.float32)/255
104 |         left_eyes = self.dataset.left_eye_image_test_sequences.astype(np.float32)/255
105 |         right_eyes = self.dataset.right_eye_image_test_sequences.astype(np.float32)/255
106 |         noses = self.dataset.nose_image_test_sequences.astype(np.float32)/255
107 |         mouths = self.dataset.mouth_image_test_sequences.astype(np.float32)/255
108 | 
109 | 
110 |         X_test= [faces,left_eyes,right_eyes,noses,mouths]
111 | 
112 |         y_test = self.dataset.talking_test.astype(np.uint8)
113 |         print y_test[0]
114 |         y_test = np.eye(2)[y_test]
115 | 
116 |         self.model.compile(loss=keras.losses.categorical_crossentropy,optimizer=keras.optimizers.Adam(1e-4),metrics=["accuracy"])
117 |         self.model.fit_generator(self.dataset.generator(1),steps_per_epoch=5000,epochs=10,verbose=1,validation_data=(X_test,y_test))
118 |         self.model.save_weights("models/model.h5")
119 |         model_json = self.model.to_json()
120 |         with open("models/model.json","w+") as json_file:
121 |             json_file.write(model_json)
122 |         score = self.model.evaluate(X_test,y_test)
123 |         with open("logs/log.txt","w+") as log_file:
124 |             log_file.write("Score: "+str(score))
125 |             log_file.write("\n")


--------------------------------------------------------------------------------
/nets/__main__.py:
--------------------------------------------------------------------------------
1 | from nets import Network
2 | 
3 | def main():
4 |     net = Network((24,24,1),100)
5 | if __name__ == "__main__":
6 |     main()


--------------------------------------------------------------------------------
/nets/mouth_features.py:
--------------------------------------------------------------------------------
  1 | from keras.layers import Dropout
  2 | from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Input, Concatenate
  3 | from keras.layers import LSTM, TimeDistributed, Add, Bidirectional
  4 | from keras.models import Model, Sequential
  5 | import keras
  6 | import numpy as np
  7 | 
  8 | class MouthFeatureOnlyNet(object):
  9 |     def __init__(self, dataset, input_shape, max_sequence_length):
 10 |         self.dataset = dataset
 11 |         self.input_shape = input_shape
 12 |         self.max_sequence_length = max_sequence_length
 13 |         self.model = self.build()
 14 |         self.model.summary()
 15 |     def build(self):
 16 |         mouth_image_model = Sequential()
 17 |         mouth_image_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\
 18 |                     name="mouth_image_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2])))
 19 |         
 20 |         mouth_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 21 |         mouth_image_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\
 22 |                 activation="relu",name="mouth_image_layer2")))
 23 |         mouth_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 24 |         mouth_image_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\
 25 |             activation="relu",name="mouth_image_layer3")))
 26 |         mouth_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 27 |         mouth_image_model.add(TimeDistributed(Flatten()))
 28 |         
 29 |         mouth_image_model.add(Bidirectional(LSTM(32,return_sequences=True)))
 30 |         mouth_image_model.add(Bidirectional(LSTM(128,return_sequences=False)))
 31 |         mouth_image_model.add(Dense(128,activation="relu"))
 32 | 
 33 |         face_image_model = Sequential()
 34 |         face_image_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\
 35 |                     name="face_image_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2])))
 36 |         
 37 |         face_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 38 |         face_image_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\
 39 |                 activation="relu",name="face_image_layer2")))
 40 |         face_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 41 |         face_image_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\
 42 |             activation="relu",name="face_image_layer3")))
 43 |         face_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))
 44 |         face_image_model.add(TimeDistributed(Flatten()))
 45 |         
 46 |         face_image_model.add(Bidirectional(LSTM(32,return_sequences=True)))
 47 |         face_image_model.add(Bidirectional(LSTM(128,return_sequences=False)))
 48 |         face_image_model.add(Dense(128,activation="relu"))
 49 | 
 50 |         dpts_model = Sequential()
 51 |         dpts_model.add(TimeDistributed(Conv2D(32,(1,3),padding='same',activation="relu",strides=(1, 1)),\
 52 |                     name="dpts_layer1",input_shape=(self.max_sequence_length, 1, 20, 2)))
 53 |         dpts_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\
 54 |                 activation="relu",name="dpts_layer2")))
 55 |         dpts_model.add(TimeDistributed(Flatten()))
 56 | 
 57 |         dpts_model.add(Bidirectional(LSTM(32,return_sequences=True)))
 58 |         dpts_model.add(Bidirectional(LSTM(128,return_sequences=False)))
 59 |         dpts_model.add(Dense(128,activation="relu"))
 60 | 
 61 | 
 62 |         dpts_dists_model = Sequential()
 63 |         dpts_dists_model.add(TimeDistributed(Conv2D(32,(1,3),padding='same',activation="relu",strides=(1, 1)),\
 64 |                     name="dpts_dists_layer1",input_shape=(self.max_sequence_length, 1, 20, 1)))
 65 |         dpts_dists_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\
 66 |                 activation="relu",name="dpts_dists_layer2")))
 67 |         dpts_dists_model.add(TimeDistributed(Flatten()))
 68 |         dpts_dists_model.add(Bidirectional(LSTM(32,return_sequences=True)))
 69 |         dpts_dists_model.add(Bidirectional(LSTM(128,return_sequences=False)))
 70 |         dpts_dists_model.add(Dense(128,activation="relu"))
 71 | 
 72 |         dpts_angles_model = Sequential()
 73 |         dpts_angles_model.add(TimeDistributed(Conv2D(32,(1,3),padding='same',activation="relu",strides=(1, 1)),\
 74 |                     name="dpts_angles_layer1",input_shape=(self.max_sequence_length, 1, 20, 1)))
 75 |         dpts_angles_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\
 76 |                 activation="relu",name="dpts_angles_layer2")))
 77 |         dpts_angles_model.add(TimeDistributed(Flatten()))
 78 |         dpts_angles_model.add(Bidirectional(LSTM(32,return_sequences=True)))
 79 |         dpts_angles_model.add(Bidirectional(LSTM(128,return_sequences=False)))
 80 |         dpts_angles_model.add(Dense(128,activation="relu"))
 81 |     
 82 | 
 83 | 
 84 |         merged = keras.layers.concatenate([mouth_image_model.output, face_image_model.output,dpts_model.output,dpts_dists_model.output,dpts_angles_model.output])
 85 | 
 86 | 
 87 |         merged = Dense(128,activation="relu")(merged)
 88 |         merged = Dense(256,activation="relu")(merged)
 89 | 
 90 |         merged = Dense(2,activation="softmax")(merged)
 91 | 
 92 |         model = Model(inputs=[mouth_image_model.input,face_image_model.input,dpts_model.input,dpts_dists_model.input,dpts_angles_model.input],outputs=merged)
 93 | 
 94 | 
 95 |         return model
 96 |         
 97 | 
 98 | 
 99 | 
100 |     def train(self):
101 |         X_test= [self.dataset.mouth_image_test_sequence,self.dataset.face_image_test_sequence, self.dataset.key_points_test_sequence, \
102 |             self.dataset.distances_test_sequence, self.dataset.angles_test_sequence]
103 |         # X_test= [self.dataset.mouth_image_test_sequence,\
104 |         #         self.dataset.face_image_test_sequence]
105 | 
106 |         y_test = self.dataset.Y_test
107 |         y_test = np.eye(2)[y_test]
108 | 
109 |         self.model.compile(loss=keras.losses.binary_crossentropy,optimizer=keras.optimizers.Adam(1e-4),metrics=["accuracy"])
110 |         self.model.fit_generator(self.dataset.generator(1),steps_per_epoch=5000,epochs=25,verbose=1,validation_data=(X_test,y_test))
111 |         
112 |         model_name = "model-mouth-100"
113 |         self.model.save_weights("models/"+model_name+".h5")
114 |         model_json = self.model.to_json()
115 |         with open("models/"+model_name+".json","w+") as json_file:
116 |             json_file.write(model_json)
117 |         score = self.model.evaluate(X_test,y_test)
118 |         with open("logs/log-mouth.txt","a+") as log_file:
119 |             log_file.write("Score of "+model_name+": "+str(score))
120 |             log_file.write("\n")
121 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | # This file may be used to create an environment using:
  2 | # $ conda create --name <env> --file <this file>
  3 | # platform: linux-64
  4 | backports=1.0=py27h63c9359_1
  5 | backports.shutil_get_terminal_size=1.0.0=py27h5bc021e_2
  6 | backports.weakref=1.0rc1=py27_0
  7 | backports_abc=0.5=py27h7b3c97b_0
  8 | bleach=1.5.0=py27_0
  9 | bokeh=0.12.10=py27he46cc6b_0
 10 | ca-certificates=2017.08.26=h1d4fec5_0
 11 | cairo=1.12.18=6
 12 | certifi=2017.7.27.1=py27h9ceb091_0
 13 | cloudpickle=0.4.0=py27ha64365b_0
 14 | cycler=0.10.0=py27hc7354d3_0
 15 | dask=0.15.2=py27_0
 16 | decorator=4.1.2=py27h1544723_0
 17 | enum34=1.1.6=py27h99a27e9_1
 18 | fontconfig=2.11.1=6
 19 | freetype=2.5.5=1
 20 | funcsigs=1.0.2=py27h83f16ab_0
 21 | futures=3.1.1=py27hdbc8cbb_0
 22 | h5py=2.7.0=np111py27_0
 23 | hdf5=1.8.17=2
 24 | html5lib=0.9999999=py27_0
 25 | icu=58.2=h211956c_0
 26 | imageio=2.2.0=py27hf108a7f_0
 27 | intel-openmp=2018.0.0=h15fc484_7
 28 | ipython=5.4.1=py27h36c99b6_1
 29 | ipython_genutils=0.2.0=py27h89fb69b_0
 30 | jbig=2.1=hdba287a_0
 31 | jinja2=2.9.6=py27h82327ae_1
 32 | jpeg=8d=2
 33 | keras=2.0.8=py27hd1b5a5b_0
 34 | libedit=3.1=heed3624_0
 35 | libffi=3.2.1=h4deb6c0_3
 36 | libgcc-ng=7.2.0=h7cc24e2_2
 37 | libgfortran=3.0.0=1
 38 | libgfortran-ng=7.2.0=h9f7466a_2
 39 | libpng=1.6.32=hda9c8bc_2
 40 | libprotobuf=3.4.0=0
 41 | libstdcxx-ng=7.2.0=h7a57d05_2
 42 | libtiff=4.0.6=2
 43 | libxml2=2.9.4=h6b072ca_5
 44 | locket=0.2.0=py27h73929a2_1
 45 | markdown=2.6.9=py27_0
 46 | markupsafe=1.0=py27h97b2822_1
 47 | matplotlib=1.5.1=np111py27_0
 48 | mkl=2017.0.4=h4c4d0af_0
 49 | mock=2.0.0=py27h0c0c831_0
 50 | ncurses=6.0=h06874d7_1
 51 | networkx=2.0=py27hfc23926_0
 52 | numpy=1.11.3=py27_0
 53 | opencv=3.1.0=np111py27_1
 54 | openssl=1.0.2l=h077ae2c_5
 55 | pandas=0.20.3=py27h820b67f_2
 56 | partd=0.3.8=py27h4e55004_0
 57 | pathlib2=2.3.0=py27h6e9d198_0
 58 | pbr=3.1.1=py27hf64632f_0
 59 | pexpect=4.2.1=py27hcf82287_0
 60 | pickleshare=0.7.4=py27h09770e1_0
 61 | pillow=3.4.2=py27_0
 62 | pip=9.0.1=py27hbf658b2_3
 63 | pixman=0.32.6=0
 64 | prompt_toolkit=1.0.15=py27h1b593e1_0
 65 | protobuf=3.4.0=py27_0
 66 | ptyprocess=0.5.2=py27h4ccb14c_0
 67 | py2cairo=1.10.0=py27_2
 68 | pycairo=1.10.0=py27_0
 69 | pygments=2.2.0=py27h4a8b6f5_0
 70 | pyparsing=2.0.3=py27_0
 71 | pyqt=4.11.4=py27_4
 72 | python=2.7.14=hc2b0042_21
 73 | python-dateutil=2.6.1=py27h4ca5741_1
 74 | pytz=2017.2=py27hcac29fa_1
 75 | pywavelets=0.5.2=py27hecda097_0
 76 | pyyaml=3.12=py27h2d70dd7_1
 77 | qt=4.8.7=3
 78 | readline=7.0=hac23ff0_3
 79 | scandir=1.6=py27hf7388dc_0
 80 | scikit-image=0.13.0=py27h06cb35d_1
 81 | scikit-learn=0.18.1=np111py27_1
 82 | scipy=0.19.0=np111py27_0
 83 | setuptools=36.5.0=py27h68b189e_0
 84 | simplegeneric=0.8.1=py27h19e43cd_0
 85 | singledispatch=3.4.0.3=py27h9bcb476_0
 86 | sip=4.18=py27_0
 87 | six=1.11.0=py27h5f960f1_1
 88 | sqlite=3.20.1=h6d8b0f3_1
 89 | ssl_match_hostname=3.5.0.1=py27h4ec10b9_2
 90 | tensorflow=1.3.0=0
 91 | tensorflow-base=1.3.0=py27h0dbb4d0_1
 92 | tensorflow-tensorboard=0.1.5=py27_0
 93 | tk=8.6.7=h5979e9b_1
 94 | toolz=0.8.2=py27hd3b1e7e_0
 95 | tornado=4.5.2=py27h97b179f_0
 96 | traitlets=4.3.2=py27hd6ce930_0
 97 | wcwidth=0.1.7=py27h9e3e1ab_0
 98 | werkzeug=0.12.2=py27hbf75dff_0
 99 | wheel=0.29.0=py27h411dd7b_1
100 | xz=5.2.3=h2bcbf08_1
101 | yaml=0.1.7=h96e3832_1
102 | zlib=1.2.11=hfbfcf68_1
103 | 


--------------------------------------------------------------------------------
/split/__init__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | def get_cmd_args():
 4 |     parser = argparse.ArgumentParser()
 5 |     parser.add_argument("-p","--images_path",type=str)
 6 |     parser.add_argument("-f","--faces_path",type=str)
 7 |     parser.add_argument("-o","--output_path",type=str)
 8 |     parser.add_argument("-l","--sequence_length",type=int,default=30)
 9 |     args = parser.parse_args()
10 |     return args
11 | 


--------------------------------------------------------------------------------
/split/__main__.py:
--------------------------------------------------------------------------------
 1 | from split import get_cmd_args
 2 | from split.split_squence import track_faces_inside_sequences,split_sequence
 3 | def main():
 4 |     args = get_cmd_args()
 5 |     print "tracking all faces"
 6 |     track_faces_inside_sequences(args.images_path,args.faces_path)
 7 |     print "done with tracking faces"
 8 |     print "splitting dataset"
 9 |     split_sequence(args.images_path,args.output_path,args.sequence_length)
10 | if __name__ == '__main__':
11 |     main()


--------------------------------------------------------------------------------
/split/split_squence.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import cv2
  4 | import dlib
  5 | import numpy as np
  6 | import json
  7 | from sys import exit
  8 | 
  9 | 
 10 | def split_array(array,max_size):
 11 |     output = []
 12 |     for i in range(0,len(array),max_size):
 13 |         output+=[array[i:i+max_size]]
 14 |     return output
 15 | def copy_images(imgs_files,source_folder,dest_folder):
 16 |      for imfile in imgs_files:
 17 |          shutil.copy(os.path.join(source_folder,imfile),os.path.join(dest_folder,imfile))
 18 | 
 19 | def split_sequence(dataset_dir,output_dir,max_size):
 20 |     if not os.path.exists(output_dir):
 21 |         os.mkdir(output_dir)
 22 |     sequences = os.listdir(dataset_dir)
 23 |     for s in sequences:       
 24 |         current_path  = os.path.join(dataset_dir,s)
 25 |         s_images = os.listdir(current_path)
 26 |         s_images.sort()
 27 |         splited_seq = split_array(s_images,max_size) 
 28 |         for i in range(len(splited_seq)):
 29 |             dest_folder = os.path.join(output_dir,s+"-"+str(i))
 30 |             if not os.path.exists(dest_folder):
 31 |                 os.mkdir(dest_folder)
 32 |             copy_images(splited_seq[i],current_path,dest_folder)
 33 |         print "Processed",s
 34 | def rect_to_array(rect):
 35 |     output = []
 36 |     output[0:4] = rect.left(),rect.top(),rect.right(),rect.bottom()
 37 |     return output
 38 | def track_all_faces(sequence_path,img_files,face_index,detector,predictor):
 39 |     
 40 |     img = cv2.imread(os.path.join(sequence_path,img_files[face_index]))
 41 |     face = detector(img)[0]
 42 |     tracker = dlib.correlation_tracker()
 43 |     win = dlib.image_window()   
 44 |     tracker.start_track(img,face)
 45 |     bounding_boxes = {}
 46 |     for i in range(face_index,-1,-1):
 47 |         img = cv2.imread(os.path.join(sequence_path,img_files[i]))
 48 |         faces = detector(img)
 49 |         if len(faces)>0:
 50 |             tracked_face = faces[0]
 51 |             tracker.start_track(img,tracked_face)
 52 |         else:
 53 |             tracker.update(img)
 54 |             tracked_face = tracker.get_position()
 55 |         bounding_boxes[img_files[i]] = rect_to_array(tracked_face)
 56 |         win.clear_overlay()
 57 |         _,name = os.path.split(sequence_path)
 58 |         win.set_title(name.split("-")[2])
 59 |         win.set_image(img)
 60 |         win.add_overlay(tracked_face)
 61 | 
 62 |     img = cv2.imread(os.path.join(sequence_path,img_files[face_index]))
 63 |     face = detector(img)[0]
 64 |     tracker.start_track(img,face)
 65 |     for i in range(face_index+1,len(img_files)):
 66 |         
 67 |         img = cv2.imread(os.path.join(sequence_path,img_files[i]))
 68 |         faces = detector(img)
 69 |         if len(faces)>0:
 70 |             tracked_face = faces[0]
 71 |             tracker.start_track(img,tracked_face)
 72 |         else:
 73 |             tracker.update(img)
 74 |             tracked_face = tracker.get_position()
 75 |         bounding_boxes[img_files[i]] = rect_to_array(tracked_face)
 76 |         win.clear_overlay()
 77 |         win.set_image(img)
 78 |         win.add_overlay(tracked_face)
 79 |     return bounding_boxes
 80 | 
 81 | def track_face_inside_sequence(sequence_path,output_dir):
 82 |     img_files = os.listdir(sequence_path)
 83 |     img_files.sort()
 84 |     detector = dlib.get_frontal_face_detector()
 85 |     predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
 86 |     bounding_box = {}
 87 |     face_found = False
 88 |     sequence_basename = os.path.basename(sequence_path)
 89 |     if not os.path.exists(output_dir):
 90 |         os.mkdir(output_dir)
 91 |     for i in  range(len(img_files)):
 92 |         img = cv2.imread(os.path.join(sequence_path,img_files[i]))
 93 |         faces = detector(img)
 94 |         if len(faces)>0:
 95 |             bounding_box = track_all_faces(sequence_path, img_files,i,detector,predictor)
 96 |             with open(os.path.join(output_dir,sequence_basename)+".json","w+") as bbox_file:
 97 |                 json.dump(bounding_box,bbox_file)
 98 |             face_found = True
 99 |             break
100 |     if not face_found:
101 |         print "No faces found inside ",sequence_path, " sequence"
102 | def track_faces_inside_sequences(dataset_dir,output_dir):
103 |     for seq in os.listdir(dataset_dir):
104 |         track_face_inside_sequence(os.path.join(dataset_dir,seq),output_dir)
105 |     
106 | 


--------------------------------------------------------------------------------
/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ymitiku/TalkingYawnDetection/b8ab7a5ecacce31555ff3fa2769df1fd455084fa/train/__init__.py


--------------------------------------------------------------------------------
/train/__main__.py:
--------------------------------------------------------------------------------
 1 | from dataset.mouth_features import MouthFeatureOnlyDataset
 2 | from nets.mouth_features import MouthFeatureOnlyNet
 3 | import argparse
 4 | def get_cmd_args():
 5 |     parser = argparse.ArgumentParser()
 6 |     parser.add_argument("-d","--dataset_path",type=str)
 7 |     parser.add_argument("-f","--faces_path",type=str)
 8 |     parser.add_argument("-l","--sequence_length",type=int,default=30)
 9 |     args = parser.parse_args()
10 |     return args
11 | def main():
12 |     args  = get_cmd_args()
13 |     dataset = MouthFeatureOnlyDataset(args.dataset_path,args.faces_path,(48,48,1),args.sequence_length)
14 | 
15 |     dataset.load_dataset()
16 |     net = MouthFeatureOnlyNet(dataset,(48,48,1),args.sequence_length)
17 |     net.train()
18 | if __name__ == "__main__":
19 |     main()


--------------------------------------------------------------------------------