├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── __main__.py ├── dataset ├── __init__.py ├── __main__.py ├── extract_images.sh └── mouth_features.py ├── nets ├── __init__.py ├── __main__.py └── mouth_features.py ├── requirements.txt ├── split ├── __init__.py ├── __main__.py └── split_squence.py └── train ├── __init__.py └── __main__.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | logs/* 3 | models/* 4 | shape_predictor_68_face_landmarks.dat 5 | haarcas/* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Mitiku Yohannes 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Talking & Yawn Detection 2 | This project is aimed to train model that detects talking and yawning from sequential images. 3 | 4 | ## How to split dataset from videos 5 | This proejct uses[Yawdd dataset](http://www.site.uottawa.ca/~shervin/yawning/). 6 | * First modify dataset/extract.sh file to give mirror folders for both male and female subjects that contain extracted videos. Also modify the output directory(dataset/yawn/images3/) 7 | * Second split he extraced sequence images to smaller sequences by running the following command 8 | ``` python -m split --images_path path-to-extracted-images --faces_path path-to-save-bounding-boxes-of-sequence-images --output_path path-to-save-output-sequences --sequence_length sequence-length-to-split``` 9 | 10 | ### How to run training program 11 | 12 | ``` python -m train --dataset_path path-to-splitted-dataset --faces_path pathes-to-bounding-boxes --sequence_length sequence-length ``` 13 | 14 | * **shape_predictor should be inside root directory of this project. Shape predictor can be downloaded to project using the following script.** 15 | ``` 16 | cd /path-to-project 17 | wget "http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" 18 | bzip2 -d shape_predictor_68_face_landmarks.dat.bz2 19 | ``` 20 | 21 | [sp]: http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ymitiku/TalkingYawnDetection/b8ab7a5ecacce31555ff3fa2769df1fd455084fa/__init__.py -------------------------------------------------------------------------------- /__main__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import dlib 4 | 5 | detector = dlib.get_frontal_face_detector() 6 | 7 | face_cascade = cv2.CascadeClassifier("haarcas/haarcascade_profileface.xml") 8 | 9 | current_dir = "/dataset/yawn/splited-100/5-FemaleGlasses-Talking-0" 10 | for img_file in os.listdir(current_dir): 11 | img = cv2.imread(os.path.join(current_dir,img_file)) 12 | img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) 13 | 14 | faces = face_cascade.detectMultiScale(img_gray,1.5,5,minSize=(30,30),flags=cv2.CASCADE_SCALE_IMAGE) 15 | if len(faces)==0: 16 | faces =detector(img_gray) 17 | if len(faces)==0: 18 | continue 19 | face = faces[0] 20 | cv2.rectangle(img,(face.left(),face.top()),(face.right(),face.bottom()),(0,0,255),2) 21 | else: 22 | for (x,y,w,h) in faces: 23 | cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2) 24 | cv2.imshow("Image",img) 25 | cv2.waitKey(0) 26 | cv2.destroyAllWindows() 27 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import dlib 3 | import os 4 | import numpy as np 5 | import pandas as pd 6 | import json 7 | from threading import Thread 8 | from sklearn.model_selection import train_test_split 9 | 10 | 11 | class DriverActionDataset(object): 12 | def __init__(self,dataset_dir,bounding_box_dir,image_shape,max_sequence_length): 13 | self.dataset_dir = dataset_dir 14 | self.bounding_box_dir = bounding_box_dir 15 | self.image_shape = image_shape 16 | self.dataset_loaded = False 17 | self.max_sequence_length = max_sequence_length 18 | self.detector = dlib.get_frontal_face_detector() 19 | self.predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") 20 | def get_attribute(self,folder_name): 21 | subj,gender_glasses,action_str,_ = folder_name.split("-") 22 | gender = -1 23 | glasses_str = None 24 | if gender_glasses[:4].lower() == "male": 25 | gender = 1 26 | glasses_str = gender_glasses[4:] 27 | elif gender_glasses[:6].lower() == "female": 28 | gender = 0 29 | glasses_str = gender_glasses[6:] 30 | else: 31 | raise Exception("Unable to parse gender from "+str(folder_name)) 32 | glasses = -1 33 | 34 | if glasses_str[:9].lower() =="noglasses": 35 | glasses = 0 36 | elif glasses_str[:7].lower() == "glasses" or glasses_str[:10].lower() == "sunglasses": 37 | glasses = 1 38 | else: 39 | raise Exception("Unable to parse glasses information from "+str(folder_name)) 40 | 41 | actions_str = action_str.split("&") 42 | for i in range(len(actions_str)): 43 | if actions_str[i].lower()=="normal": 44 | action = 0 45 | elif actions_str[i].lower() == "yawning": 46 | action = 0 47 | elif actions_str[i].lower() == "talking": 48 | action = 1 49 | break 50 | else: 51 | raise Exception("Unable to parse action information from " + str(folder_name)) 52 | 53 | 54 | output = {"Subject":subj,"Gender":gender,"Glasses":glasses,"Action":action} 55 | 56 | return output 57 | def get_dlib_points(self,image,face,predictor): 58 | shape = predictor(image,face) 59 | dlib_points = np.zeros((68,2)) 60 | for i,part in enumerate(shape.parts()): 61 | dlib_points[i] = [part.x,part.y] 62 | return dlib_points 63 | def get_right_eye_attributes(self,image,dlib_points): 64 | 65 | right_eye_dlib_points = np.concatenate((dlib_points[17:22],dlib_points[36:42])) 66 | assert len(right_eye_dlib_points)==11, "right _eye dlib points should be 11" 67 | right_eye_top_left = right_eye_dlib_points.min(axis=0) 68 | right_eye_bottom_right = right_eye_dlib_points.max(axis=0) 69 | 70 | right_eye_top = int(max(right_eye_top_left[1]-5,0)) 71 | right_eye_left = int(max(right_eye_top_left[0]-5,0)) 72 | right_eye_right = int(min(right_eye_bottom_right[0]+5,image.shape[1])) 73 | right_eye_bottom = int(min(right_eye_bottom_right[1]+5,image.shape[0])) 74 | 75 | right_eye = image[right_eye_top:right_eye_bottom,right_eye_left:right_eye_right] 76 | 77 | # r_left_corner_top = int(max(dlib_points[19][1],0)) 78 | # r_left_corner_left = int(max(dlib_points[19][0],0)) 79 | # r_left_corner_right = int(min(dlib_points[27][0],image.shape[1])) 80 | # r_left_corner_bottom = int(min(dlib_points[41][1],image.shape[0])) 81 | 82 | # right_eye_left_corner = image[r_left_corner_top:r_left_corner_bottom,r_left_corner_left:r_left_corner_right] 83 | 84 | # r_right_corner_top = int(max(dlib_points[19][1],0)) 85 | # r_right_corner_left = int(max(dlib_points[17][0]-5,0)) 86 | # r_right_corner_right = int(min(dlib_points[19][0],image.shape[1])) 87 | # r_right_corner_bottom = int(min(dlib_points[41][1]+5,image.shape[0])) 88 | 89 | # right_eye_right_corner = image[r_right_corner_top:r_right_corner_bottom, r_right_corner_left:r_right_corner_right] 90 | 91 | right_eye = self.resize_to_output_shape(right_eye) 92 | # right_eye_left_corner = self.resize_to_output_shape(right_eye_left_corner) 93 | # right_eye_right_corner = self.resize_to_output_shape(right_eye_right_corner) 94 | 95 | # return right_eye,right_eye_left_corner,right_eye_right_corner 96 | return right_eye 97 | 98 | def get_left_eye_attributes(self,image,dlib_points): 99 | 100 | left_eye_dlib_points = np.concatenate((dlib_points[22:27],dlib_points[42:48])) 101 | assert len(left_eye_dlib_points)==11, "left _eye dlib points should be 11" 102 | left_eye_top_left = left_eye_dlib_points.min(axis=0) 103 | left_eye_bottom_right = left_eye_dlib_points.max(axis=0) 104 | 105 | left_eye_top = int(max(left_eye_top_left[1]-5,0)) 106 | left_eye_left = int(max(left_eye_top_left[0]-5,0)) 107 | left_eye_right = int(min(left_eye_bottom_right[0]+5,image.shape[1])) 108 | left_eye_bottom = int(min(left_eye_bottom_right[1]+5,image.shape[0])) 109 | 110 | 111 | left_eye = image[left_eye_top:left_eye_bottom,left_eye_left:left_eye_right] 112 | 113 | # l_left_corner_top = int(max(dlib_points[24][1],0)) 114 | # l_left_corner_left = int(max(dlib_points[24][0],0)) 115 | # l_left_corner_right = int(min(dlib_points[26][0],image.shape[1])) 116 | # l_left_corner_bottom = int(min(dlib_points[46][1],image.shape[0])) 117 | 118 | # left_eye_left_corner = image[l_left_corner_top:l_left_corner_bottom,l_left_corner_left:l_left_corner_right] 119 | 120 | # l_right_corner_top = int(max(dlib_points[24][1],0)) 121 | # l_right_corner_left = int(max(dlib_points[27][0],0)) 122 | # l_right_corner_right = int(min(dlib_points[24][0],image.shape[1])) 123 | # l_right_corner_bottom = int(min(dlib_points[46][1],image.shape[0])) 124 | 125 | # left_eye_right_corner = image[l_right_corner_top:l_right_corner_bottom, l_right_corner_left:l_right_corner_right] 126 | 127 | left_eye = self.resize_to_output_shape(left_eye) 128 | # left_eye_left_corner = self.resize_to_output_shape(left_eye_left_corner) 129 | # left_eye_right_corner = self.resize_to_output_shape(left_eye_right_corner) 130 | 131 | # return left_eye,left_eye_left_corner,left_eye_right_corner 132 | return left_eye 133 | def resize_to_output_shape(self,image): 134 | if image is None: 135 | return np.zeros((self.image_shape[0],self.image_shape[1],self.image_shape[2])) 136 | try: 137 | img = cv2.resize(image,(self.image_shape[0],self.image_shape[1])) 138 | except: 139 | print "img.shape",image.shape 140 | return np.zeros((self.image_shape[0],self.image_shape[1],self.image_shape[2])) 141 | return img 142 | def get_nose_attributes(self,image,dlib_points): 143 | nose_dlib_points = dlib_points[27:36] 144 | assert len(nose_dlib_points)==9, "nose dlib points should be 9" 145 | nose_top_left = nose_dlib_points.min(axis=0) 146 | nose_bottom_right = nose_dlib_points.max(axis=0) 147 | 148 | nose_top = int(max(nose_top_left[1]-5,0)) 149 | nose_left = int(max(nose_top_left[0]-5,0)) 150 | nose_right = int(min(nose_bottom_right[0]+5,image.shape[1])) 151 | nose_bottom = int(min(nose_bottom_right[1]+5,image.shape[0])) 152 | 153 | 154 | nose = image[nose_top:nose_bottom,nose_left:nose_right] 155 | 156 | # nose_left_corner_top = int(max(dlib_points[27][1],0)) 157 | # nose_left_corner_left = int(max(dlib_points[27][0],0)) 158 | # nose_left_corner_right = int(min(dlib_points[42][0],image.shape[1])) 159 | # nose_left_corner_bottom = int(min(dlib_points[33][1],image.shape[0])) 160 | 161 | # nose_left_corner = image[nose_left_corner_top:nose_left_corner_bottom,nose_left_corner_left:nose_left_corner_right] 162 | 163 | # nose_right_corner_top = int(max(dlib_points[27][1],0)) 164 | # nose_right_corner_left = int(max(dlib_points[39][0],0)) 165 | # nose_right_corner_right = int(min(dlib_points[27][0],image.shape[1])) 166 | # nose_right_corner_bottom = int(min(dlib_points[33][1],image.shape[0])) 167 | 168 | # nose_right_corner = image[nose_right_corner_top:nose_right_corner_bottom, nose_right_corner_left:nose_right_corner_right] 169 | 170 | 171 | 172 | nose = self.resize_to_output_shape(nose) 173 | # nose_left_corner = self.resize_to_output_shape(nose_left_corner) 174 | 175 | # nose_right_corner = self.resize_to_output_shape(nose_right_corner) 176 | 177 | 178 | # return nose,nose_left_corner,nose_right_corner 179 | return nose 180 | def get_bounding_boxes(self,sequence_path): 181 | _,sequence_name = os.path.split(sequence_path) 182 | org_squence_name = "-".join(sequence_name.split("-")[:3]) 183 | bbox_file_path = os.path.join(self.bounding_box_dir,org_squence_name+".json") 184 | with open(bbox_file_path,"r") as bbox_file: 185 | bboxes = json.load(bbox_file) 186 | if bboxes is None or len(bboxes)==0: 187 | raise Exception("No bounding box for sequence:"+sequence_path) 188 | else: 189 | return bboxes 190 | def draw_dlib_points(self,image,kps,color=(255,255,0)): 191 | for i in range(len(kps)): 192 | cv2.circle(image,(int(kps[i][0]),int(kps[i][1])),1,color) 193 | def get_mouth_attributes(self,image,dlib_points): 194 | mouth_dlib_points = dlib_points[48:68] 195 | assert len(mouth_dlib_points)==20, "Mouth dlib points should be 20" 196 | mouth_top_left = mouth_dlib_points.min(axis=0) 197 | mouth_bottom_right = mouth_dlib_points.max(axis=0) 198 | 199 | mouth_top = int(max(mouth_top_left[1]-5,0)) 200 | mouth_left = int(max(mouth_top_left[0]-5,0)) 201 | mouth_right = int(min(mouth_bottom_right[0]+5,image.shape[1])) 202 | mouth_bottom = int(min(mouth_bottom_right[1]+5,image.shape[0])) 203 | 204 | mouth = image[mouth_top:mouth_bottom,mouth_left:mouth_right] 205 | # if mouth.shape[0]==0: 206 | # print dlib_points 207 | # self.draw_dlib_points(image,dlib_points) 208 | # self.draw_dlib_points(image,mouth_dlib_points,color=(255,0,0)) 209 | 210 | # cv2.imshow("Image",image) 211 | # cv2.waitKey(0) 212 | # cv2.destroyAllWindows() 213 | # mouth_left_corner_top = int(max(dlib_points[52][1],0)) 214 | # mouth_left_corner_left = int(max(dlib_points[51][0],0)) 215 | # mouth_left_corner_right = int(min(dlib_points[54][0]+5,image.shape[1])) 216 | # mouth_left_corner_bottom = int(min(dlib_points[57][1],image.shape[0])) 217 | 218 | # mouth_left_corner = image[mouth_left_corner_top:mouth_left_corner_bottom,mouth_left_corner_left:mouth_left_corner_right] 219 | 220 | # mouth_right_corner_top = int(max(dlib_points[52][1],0)) 221 | # mouth_right_corner_left = int(max(dlib_points[48][0],0)) 222 | # mouth_right_corner_right = int(min(dlib_points[57][0],image.shape[1])) 223 | # mouth_right_corner_bottom = int(min(dlib_points[57][1],image.shape[0])) 224 | 225 | # mouth_right_corner = image[mouth_right_corner_top:mouth_right_corner_bottom, mouth_right_corner_left:mouth_right_corner_right] 226 | 227 | # mouth_top_corner_top = int(max(dlib_points[50][1],0)) 228 | # mouth_top_corner_left = int(max(dlib_points[48][0],0)) 229 | # mouth_top_corner_right = int(min(dlib_points[54][0],image.shape[1])) 230 | # mouth_top_corner_bottom = int(min(dlib_points[48][1],image.shape[0])) 231 | 232 | # mouth_top_corner = image[mouth_top_corner_top:mouth_top_corner_bottom, mouth_top_corner_left:mouth_top_corner_right] 233 | 234 | # mouth_bottom_corner_top = int(max(dlib_points[48][1],0)) 235 | # mouth_bottom_corner_left = int(max(dlib_points[48][0],0)) 236 | # mouth_bottom_corner_right = int(min(dlib_points[54][0],image.shape[1])) 237 | # mouth_bottom_corner_bottom = int(min(dlib_points[57][1],image.shape[0])) 238 | 239 | # mouth_bottom_corner = image[mouth_bottom_corner_top:mouth_bottom_corner_bottom, mouth_bottom_corner_left:mouth_bottom_corner_right] 240 | 241 | 242 | mouth = self.resize_to_output_shape(mouth) 243 | # mouth_left_corner = self.resize_to_output_shape(mouth_left_corner) 244 | # mouth_right_corner = self.resize_to_output_shape(mouth_right_corner) 245 | # mouth_top_corner = self.resize_to_output_shape(mouth_top_corner) 246 | # mouth_bottom_corner = self.resize_to_output_shape(mouth_bottom_corner) 247 | 248 | 249 | 250 | # return mouth,mouth_left_corner,mouth_right_corner,mouth_top_corner,mouth_bottom_corner 251 | return mouth 252 | 253 | def get_face_attributes(self,image,face,predictor): 254 | face_image =image[ int(max(0,face.top())):int(min(image.shape[0],face.bottom())), 255 | int(max(0,face.left())):int(min(image.shape[1],face.right())) 256 | ] 257 | face_image = cv2.resize(face_image,(self.image_shape[0],self.image_shape[1])) 258 | 259 | dlib_points = self.get_dlib_points(image,face,self.predictor) 260 | right_eye = self.get_right_eye_attributes(image,dlib_points) 261 | left_eye = self.get_left_eye_attributes(image,dlib_points) 262 | nose = self.get_nose_attributes(image,dlib_points) 263 | mouth = self.get_mouth_attributes(image,dlib_points) 264 | output = {"face_image":face_image,"right_eye":right_eye,"left_eye":left_eye, 265 | "mouth":mouth,"nose":nose 266 | } 267 | # right_eye,right_eye_left_corner,right_eye_right_corner = self.get_right_eye_attributes(image,dlib_points) 268 | # left_eye,left_eye_left_corner,left_eye_right_corner = self.get_left_eye_attributes(image,dlib_points) 269 | # nose,nose_right_corner,nose_left_corner = self.get_nose_attributes(image,dlib_points) 270 | # mouth,mouth_left_corner,mouth_right_corner,mouth_top_corner,mouth_bottom_corner = self.get_mouth_attributes(image,dlib_points) 271 | # output = {"face_image":face_image,"right_eye":right_eye,"left_eye":left_eye, 272 | # "mouth":mouth,"nose":nose,"left_eye_right_corner":left_eye_right_corner, 273 | # "left_eye_left_corner":left_eye_left_corner,"right_eye_right_corner":right_eye_right_corner, 274 | # "right_eye_left_corner":right_eye_left_corner,"nose_right_corner":nose_right_corner, 275 | # "nose_left_corner":nose_left_corner,"mouth_left_corner":mouth_left_corner, 276 | # "mouth_right_corner":mouth_right_corner,"mouth_top_corner":mouth_top_corner, 277 | # "mouth_bottom_corner":mouth_bottom_corner 278 | # } 279 | return output 280 | 281 | 282 | def load_image_sequence(self,path,detector,predictor,verbose=False): 283 | if verbose: 284 | print "loading",path 285 | imgs_files = os.listdir(path) 286 | imgs_files.sort() 287 | output_faces = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 288 | output_right_eyes = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 289 | output_left_eyes = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 290 | output_mouths = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 291 | output_noses = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 292 | # output_left_eye_right_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 293 | # output_left_eye_left_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 294 | # output_right_eye_right_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 295 | # output_right_eye_left_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 296 | # output_nose_right_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 297 | # output_nose_left_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 298 | # output_mouth_left_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 299 | # output_mouth_right_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 300 | # output_mouth_top_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 301 | # output_mouth_bottom_corners = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 302 | 303 | 304 | bounding_boxes = self.get_bounding_boxes(path) 305 | 306 | for i in range(len(imgs_files)): 307 | img = cv2.imread(os.path.join(path,imgs_files[i])) 308 | if not (img is None): 309 | bbox = bounding_boxes[imgs_files[i]] 310 | face = dlib.rectangle(int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3])) 311 | # face_image =img[ max(0,face.top()):min(img.shape[0],face.bottom()), 312 | # max(0,face.left()):min(img.shape[1],face.right()) 313 | # ] 314 | # [right_eye,left_eye,mouth,nose,left_eye_corners,right_eye_corners,nose_corners,mouth_corners] 315 | 316 | attrs = self.get_face_attributes(img, face,self.predictor) 317 | output_faces[i] = attrs["face_image"] 318 | output_right_eyes[i] = attrs["right_eye"] 319 | output_left_eyes[i] = attrs["left_eye"] 320 | output_noses[i] = attrs["nose"] 321 | output_mouths[i] = attrs["mouth"] 322 | # output_left_eye_right_corners[i] = attrs["left_eye_right_corner"] 323 | # output_left_eye_left_corners[i] = attrs["left_eye_left_corner"] 324 | # output_right_eye_right_corners[i] = attrs["right_eye_right_corner"] 325 | # output_right_eye_left_corners[i] = attrs["right_eye_left_corner"] 326 | # output_nose_right_corners[i] = attrs["nose_right_corner"] 327 | # output_nose_left_corners[i] = attrs["nose_left_corner"] 328 | # output_mouth_right_corners[i] = attrs["mouth_right_corner"] 329 | # output_mouth_left_corners[i] = attrs["mouth_left_corner"] 330 | # output_mouth_top_corners[i] = attrs["mouth_top_corner"] 331 | # output_mouth_bottom_corners[i] = attrs["mouth_bottom_corner"] 332 | 333 | 334 | else: 335 | if verbose: 336 | print ("Unable to read image from ",os.path.join(path,imgs_files[i])) 337 | if verbose: 338 | print "loaded",path 339 | return output_faces,output_left_eyes,output_right_eyes,output_noses,output_mouths 340 | # return output_faces,output_left_eyes,output_right_eyes,output_noses,output_mouths,\ 341 | # output_left_eye_right_corners,output_left_eye_right_corners,output_right_eye_left_corners,\ 342 | # output_right_eye_right_corners,output_nose_left_corners,output_nose_right_corners,\ 343 | # output_mouth_left_corners,output_mouth_right_corners,output_mouth_top_corners,output_mouth_bottom_corners 344 | def get_is_talking(self,folder_name): 345 | if folder_name.lower().count("talking")>0: 346 | return 1 347 | else: 348 | return 0 349 | def load_dataset(self): 350 | sequences = os.listdir(self.dataset_dir) 351 | 352 | self.train_sequences,test_sequences = train_test_split(sequences,test_size=0.05) 353 | self.train_sequences = np.array(self.train_sequences) 354 | # num_train_sequences = len(train_sequences) 355 | num_test_sequences = len(test_sequences) 356 | 357 | # self.face_image_train_sequences = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 358 | # self.left_eye_image_train_sequences = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 359 | # self.right_eye_image_train_sequences = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 360 | # self.nose_image_train_sequences = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 361 | # self.mouth_image_train_sequences = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 362 | # self.talking_train = np.zeros((num_train_sequences,)) 363 | 364 | 365 | # for i in range(len(train_sequences)): 366 | # self.face_image_train_sequences[i],self.left_eye_image_train_sequences[i],\ 367 | # self.right_eye_image_train_sequences[i],self.nose_image_train_sequences[i],\ 368 | # self.mouth_image_train_sequences[i] = self.load_image_sequence(os.path.join(\ 369 | # self.dataset_dir,train_sequences[i]),detector,predictor) 370 | # self.talking_train[i] = self.get_is_talking(train_sequences[i]) 371 | 372 | print("loading test",len(test_sequences)," dataset") 373 | 374 | self.face_image_test_sequences = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 375 | self.left_eye_image_test_sequences = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 376 | self.right_eye_image_test_sequences = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 377 | self.nose_image_test_sequences = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 378 | self.mouth_image_test_sequences = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 379 | self.talking_test = np.zeros((num_test_sequences,)) 380 | 381 | 382 | for i in range(len(test_sequences)): 383 | self.face_image_test_sequences[i],self.left_eye_image_test_sequences[i],self.right_eye_image_test_sequences[i],\ 384 | self.nose_image_test_sequences[i],self.mouth_image_test_sequences[i] = self.load_image_sequence(\ 385 | os.path.join(self.dataset_dir,test_sequences[i]),self.detector,self.predictor) 386 | self.talking_test[i] = self.get_is_talking(test_sequences[i]) 387 | print ("loadded test",len(test_sequences),"dataset ") 388 | self.dataset_loaded = True 389 | 390 | def generator(self,batch_size): 391 | while True: 392 | indexes = np.arange(len(self.train_sequences)) 393 | np.random.shuffle(indexes) 394 | for i in range(0,len(indexes),batch_size): 395 | current_indexes = indexes[i:i+batch_size] 396 | 397 | current_sequences = self.train_sequences[current_indexes] 398 | 399 | y = np.zeros((len(current_sequences),)) 400 | for j in range(len(current_sequences)): 401 | faces,left_eyes,right_eyes,noses,mouths = self.load_image_sequence(os.path.join(\ 402 | self.dataset_dir,current_sequences[j]),self.detector,self.predictor) 403 | y[j] = self.get_is_talking(current_sequences[j]) 404 | y = y.astype(np.uint8) 405 | y = np.eye(2)[y] 406 | 407 | faces = faces.astype(np.float32)/255 408 | left_eyes = left_eyes.astype(np.float32)/255 409 | right_eyes = right_eyes.astype(np.float32)/255 410 | noses = noses.astype(np.float32)/255 411 | mouths = mouths.astype(np.float32)/255 412 | 413 | 414 | faces = faces.reshape(batch_size,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]) 415 | left_eyes = left_eyes.reshape(batch_size,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]) 416 | right_eyes = right_eyes.reshape(batch_size,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]) 417 | noses = noses.reshape(batch_size,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]) 418 | mouths = noses.reshape(batch_size,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2]) 419 | yield [faces,left_eyes,right_eyes,noses,mouths],y -------------------------------------------------------------------------------- /dataset/__main__.py: -------------------------------------------------------------------------------- 1 | from dataset import DriverActionDataset 2 | import dlib 3 | 4 | def main(): 5 | dataset = DriverActionDataset("/home/mtk/datasets/Yaw/YawDD dataset/Mirror",(227,227,3)) 6 | detector = dlib.get_frontal_face_detector() 7 | predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") 8 | dataset.load_image_sequence("/dataset/yawn/images3/9-FemaleNoGlasses-Normal/",detector,predictor) 9 | 10 | if __name__ == "__main__": 11 | main() -------------------------------------------------------------------------------- /dataset/extract_images.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | dataset_path= "~/datasets/Yaw/YawDD\ dataset/Mirror/Male_mirror/"; 3 | output_path="/dataset/yawn/images3/" 4 | 5 | for file in $dataset_path*.avi; 6 | do 7 | filename="$(basename "${file}" .avi)" 8 | mkdir "$output_path/${filename}" 9 | ffmpeg -i "$file" "$output_path/${filename}/${output-%05d}".jpg; 10 | done; 11 | 12 | for file in ~/datasets/Yaw/YawDD\ dataset/Mirror/Female_mirror/*.avi; 13 | do 14 | filename="$(basename "${file}" .avi)" 15 | mkdir "$output_path/${filename}" 16 | ffmpeg -i "$file" "$output_path/${filename}/${output-%05d}".jpg; 17 | done; -------------------------------------------------------------------------------- /dataset/mouth_features.py: -------------------------------------------------------------------------------- 1 | import os 2 | import dlib 3 | import cv2 4 | import json 5 | import numpy as np 6 | from sklearn.model_selection import train_test_split 7 | 8 | 9 | class MouthFeatureOnlyDataset(object): 10 | 11 | def __init__(self,dataset_dir, bounding_box_dir, image_shape,max_sequence_length): 12 | self.dataset_dir = dataset_dir 13 | self.bounding_box_dir = bounding_box_dir 14 | self.image_shape = image_shape 15 | self.dataset_loaded = False 16 | self.max_sequence_length = max_sequence_length 17 | self.detector = dlib.get_frontal_face_detector() 18 | self.predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") 19 | def get_dlib_points(self,image,face): 20 | shape = self.predictor(image,face) 21 | dlib_points = np.zeros((68,2)) 22 | for i,part in enumerate(shape.parts()): 23 | dlib_points[i] = [part.x,part.y] 24 | return dlib_points 25 | def get_bounding_boxes(self,sequence_path): 26 | _,sequence_name = os.path.split(sequence_path) 27 | org_squence_name = "-".join(sequence_name.split("-")[:3]) 28 | bbox_file_path = os.path.join(self.bounding_box_dir,org_squence_name+".json") 29 | with open(bbox_file_path,"r") as bbox_file: 30 | bboxes = json.load(bbox_file) 31 | if bboxes is None or len(bboxes)==0: 32 | raise Exception("No bounding box for sequence:"+sequence_path) 33 | else: 34 | return bboxes 35 | def distance_between(self,v1,v2): 36 | diff = v2 - v1 37 | diff_squared = np.square(diff) 38 | dist_squared = diff_squared.sum(axis=1) 39 | dists = np.sqrt(dist_squared) 40 | return dists 41 | 42 | def angles_between(self,v1,v2): 43 | dot_prod = (v1 * v2).sum(axis=1) 44 | v1_norm = np.linalg.norm(v1,axis=1) 45 | v2_norm = np.linalg.norm(v2,axis=1) 46 | 47 | 48 | cosine_of_angle = (dot_prod/(v1_norm * v2_norm)).reshape(-1,1) 49 | 50 | angles = np.arccos(np.clip(cosine_of_angle,-1,1)) 51 | return angles 52 | def draw_key_points(self,image,key_points): 53 | for i in range(key_points.shape[0]): 54 | image = cv2.circle(image, (int(key_points[i][0]), int(key_points[i][1])), 1,(255,0,0)) 55 | return image 56 | def get_mouth_attributes_from_local_frame(self,image,key_points_20): 57 | 58 | current_image_shape = image.shape 59 | top_left = key_points_20.min(axis=0) 60 | bottom_right = key_points_20.max(axis=0) 61 | 62 | # bound the coordinate system inside eye image 63 | bottom_right[0] = min(current_image_shape[1],bottom_right[0]+5) 64 | bottom_right[1] = min(current_image_shape[0],bottom_right[1]+5) 65 | top_left[0] = max(0,top_left[0]-5) 66 | top_left[1] = max(0,top_left[1]-5) 67 | 68 | # crop the eye 69 | top_left = top_left.astype(int) 70 | bottom_right = bottom_right.astype(int) 71 | mouth_image = image[top_left[1]:bottom_right[1],top_left[0]:bottom_right[0]] 72 | if mouth_image.shape[0]==0: 73 | # self.draw_key_points(image,key_points_20) 74 | # cv2.imshow("Image",image) 75 | # cv2.waitKey(0) 76 | # cv2.destoryAllWindows(0) 77 | image = np.zeros((self.image_shape[0],self.image_shape[1],self.image_shape[2])) 78 | key_points = np.zeros((20,2)) 79 | dists = np.zeros((20)) 80 | angles = np.zeros((20)) 81 | return image, key_points,dists,angles 82 | 83 | # translate the eye key points from face image frame to eye image frame 84 | key_points = key_points_20 - top_left 85 | key_points +=np.finfo(float).eps 86 | # horizontal scale to resize image 87 | scale_h = self.image_shape[1]/float(mouth_image.shape[1]) 88 | # vertical scale to resize image 89 | scale_v = self.image_shape[0]/float(mouth_image.shape[0]) 90 | 91 | # resize left eye image to network input size 92 | mouth_image = cv2.resize(mouth_image,(self.image_shape[0],self.image_shape[1])) 93 | 94 | # scale left key points proportional with respect to left eye image resize scale 95 | scale = np.array([[scale_h,scale_v]]) 96 | key_points = key_points * scale 97 | 98 | # calculate centroid of left eye key points 99 | centroid = np.array([key_points.mean(axis=0)]) 100 | 101 | # calculate distances from centroid to each left eye key points 102 | dists = self.distance_between(key_points,centroid) 103 | 104 | # calculate angles between centroid point vector and left eye key points vectors 105 | angles = self.angles_between(key_points,centroid) 106 | return mouth_image, key_points,dists,angles 107 | 108 | def get_mouth_features_from_image(self,image,bounding_box): 109 | face = dlib.rectangle(int(bounding_box[0]),int(bounding_box[1]),int(bounding_box[2]),int(bounding_box[3])) 110 | 111 | # cv2.rectangle(image,(face.left(),face.top()),(face.right(),face.bottom()),(255,255,0)) 112 | b_box_array = np.array(bounding_box) 113 | key_points = self.get_dlib_points(image,face) 114 | mouth_key_points = key_points[48:68] 115 | image = self.draw_key_points(image,mouth_key_points) 116 | assert len(mouth_key_points) == 20, "Mouth key points should be twenty points" 117 | mouth_image, key_points,dists,angles = self.get_mouth_attributes_from_local_frame(image,mouth_key_points) 118 | return mouth_image, key_points,dists,angles 119 | def get_mouth_features(self,sequence_path): 120 | bboxes = self.get_bounding_boxes(sequence_path) 121 | output_images = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 122 | output_faces = np.zeros((self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 123 | output_key_points = np.zeros((self.max_sequence_length,20,2)) 124 | output_distances = np.zeros((self.max_sequence_length,20)) 125 | output_angles = np.zeros((self.max_sequence_length,20)) 126 | img_files = os.listdir(sequence_path) 127 | img_files.sort() 128 | 129 | for i in range(len(img_files)): 130 | img = cv2.imread(os.path.join(sequence_path,img_files[i])) 131 | if self.image_shape[2]==1: 132 | img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) 133 | bounding_box = bboxes[img_files[i]] 134 | if not(img is None): 135 | 136 | face_image = img[ 137 | max(0,int(bounding_box[1]-5)):min(img.shape[0],int(bounding_box[3])+5), 138 | max(0,int(bounding_box[0]-5)):min(img.shape[1],int(bounding_box[2])+5) 139 | ] 140 | try: 141 | face_image = cv2.resize(face_image,(self.image_shape[0],self.image_shape[1])) 142 | face_image = face_image.reshape(self.image_shape) 143 | except: 144 | print bounding_box 145 | face_image = np.zeros((self.image_shape[0],self.image_shape[1],self.image_shape[2])) 146 | # top_left = (max(0,int(bounding_box[0])),max(0,int(bounding_box[1]))) 147 | # bottom_right = (min(img.shape[1],int(bounding_box[2])),min(img.shape[0],int(bounding_box[3]))) 148 | # cv2.rectangle(img,top_left,bottom_right,(255,0,0)) 149 | # cv2.imshow("Image",img) 150 | # # cv2.imshow("Face image",face_image) 151 | # cv2.waitKey(0) 152 | # cv2.destroyAllWindows() 153 | mouth_image,kps,dists,angles = self.get_mouth_features_from_image(img,bounding_box) 154 | mouth_image = mouth_image.reshape(self.image_shape) 155 | # self.draw_key_points(mouth_image,kps) 156 | # cv2.imshow("Image",img) 157 | # cv2.imshow("Mouth Image",mouth_image) 158 | # cv2.waitKey(0) 159 | # cv2.destroyAllWindows() 160 | output_faces[i] = face_image 161 | output_images[i] = mouth_image 162 | output_key_points[i] = kps 163 | output_distances[i] = dists 164 | output_angles[i] = angles.reshape((20,)) 165 | else: 166 | raise Exception("Unable to read image form "+os.path.join(sequence_path,img_files[i])) 167 | return output_faces, output_images,output_key_points,output_distances,output_angles 168 | def get_is_talking(self,folder_name): 169 | if folder_name.lower().count("talking")>0: 170 | return 1 171 | else: 172 | return 0 173 | def load_dataset(self): 174 | sequences = os.listdir(self.dataset_dir) 175 | # sequences = sequences[:3000] 176 | train_sequences,test_sequences = train_test_split(sequences,test_size=0.1) 177 | num_train_sequences = len(train_sequences) 178 | num_test_sequences = len(test_sequences) 179 | 180 | self.face_image_train_sequence = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 181 | self.mouth_image_train_sequence = np.zeros((num_train_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 182 | self.key_points_train_sequence = np.zeros((num_train_sequences,self.max_sequence_length,20,2)) 183 | self.distances_train_sequence = np.zeros((num_train_sequences,self.max_sequence_length,20)) 184 | self.angles_train_sequence = np.zeros((num_train_sequences,self.max_sequence_length,20)) 185 | self.Y_train = np.zeros((num_train_sequences,),dtype=np.uint8) 186 | 187 | 188 | 189 | 190 | 191 | self.face_image_test_sequence = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 192 | self.mouth_image_test_sequence = np.zeros((num_test_sequences,self.max_sequence_length,self.image_shape[0],self.image_shape[1],self.image_shape[2])) 193 | self.key_points_test_sequence = np.zeros((num_test_sequences,self.max_sequence_length,20,2)) 194 | self.distances_test_sequence = np.zeros((num_test_sequences,self.max_sequence_length,20)) 195 | self.angles_test_sequence = np.zeros((num_test_sequences,self.max_sequence_length,20)) 196 | self.Y_test = np.zeros((num_test_sequences,),dtype=np.uint8) 197 | 198 | print "Loading",num_train_sequences,"train sequences" 199 | for i in range(num_train_sequences): 200 | faces,mouths,points,distances, angles = self.get_mouth_features(os.path.join(self.dataset_dir, train_sequences[i])) 201 | self.face_image_train_sequence[i] = faces 202 | self.mouth_image_train_sequence[i] = mouths 203 | self.key_points_train_sequence[i] = points 204 | self.distances_train_sequence[i] = distances 205 | self.angles_train_sequence[i] = angles 206 | self.Y_train[i] = self.get_is_talking(train_sequences[i]) 207 | if (i+1)%100==0: 208 | print "loaded",i+1,"sequences" 209 | print "Loaded",num_train_sequences,"train sequences" 210 | 211 | print "Loading test sequences" 212 | for i in range(num_test_sequences): 213 | faces,images,points,distances, angles = self.get_mouth_features(os.path.join(self.dataset_dir,test_sequences[i])) 214 | self.face_image_test_sequence[i] = faces 215 | self.mouth_image_test_sequence[i] = images 216 | self.key_points_test_sequence[i] = points 217 | self.distances_test_sequence[i] = distances 218 | self.angles_test_sequence[i] = angles 219 | self.Y_test[i] = self.get_is_talking(test_sequences[i]) 220 | if (i+1)%100==0: 221 | print "loaded",i+1,"sequences" 222 | 223 | print "Loaded test sequences" 224 | 225 | 226 | print "Preprocessing dataset" 227 | # Normalize images 228 | 229 | self.face_image_train_sequence = self.face_image_train_sequence.astype(np.float32)/255.0 230 | self.face_image_test_sequence = self.face_image_test_sequence.astype(np.float32)/255.0 231 | # Normalize images 232 | 233 | self.mouth_image_train_sequence = self.mouth_image_train_sequence.astype(np.float32)/255.0 234 | self.mouth_image_test_sequence = self.mouth_image_test_sequence.astype(np.float32)/255.0 235 | 236 | # Normalize key points 237 | image_width = self.image_shape[0] 238 | self.key_points_train_sequence = self.key_points_train_sequence.astype(np.float32)/float(image_width) 239 | self.key_points_test_sequence = self.key_points_test_sequence.astype(np.float32)/float(image_width) 240 | 241 | # Expand dims for network input 242 | self.key_points_train_sequence = np.expand_dims(self.key_points_train_sequence,2) 243 | self.key_points_test_sequence = np.expand_dims(self.key_points_test_sequence,2) 244 | 245 | # Normalize distances 246 | self.distances_train_sequence = self.distances_train_sequence.astype(np.float32)/float(image_width) 247 | self.distances_test_sequence = self.distances_test_sequence.astype(np.float32)/float(image_width) 248 | 249 | # Expand dims for network input 250 | self.distances_train_sequence = np.expand_dims(self.distances_train_sequence,2) 251 | self.distances_train_sequence = np.expand_dims(self.distances_train_sequence,4) 252 | 253 | self.distances_test_sequence = np.expand_dims(self.distances_test_sequence,2) 254 | self.distances_test_sequence = np.expand_dims(self.distances_test_sequence,4) 255 | 256 | # Normalize angles 257 | self.angles_train_sequence = self.angles_train_sequence.astype(np.float32)/np.pi 258 | self.angles_test_sequence = self.angles_test_sequence.astype(np.float32)/np.pi 259 | # Expand dims for network input 260 | self.angles_train_sequence = np.expand_dims(self.angles_train_sequence,2) 261 | self.angles_train_sequence = np.expand_dims(self.angles_train_sequence,4) 262 | 263 | self.angles_test_sequence = np.expand_dims(self.angles_test_sequence,2) 264 | self.angles_test_sequence = np.expand_dims(self.angles_test_sequence,4) 265 | print "All datasets are loaded and preprocessed" 266 | self.dataset_loaded = True 267 | def generator(self,batch_size): 268 | while True: 269 | indexes = range(len(self.mouth_image_train_sequence)) 270 | np.random.shuffle(indexes) 271 | for i in range(0,len(indexes),batch_size): 272 | current_indexes = indexes[i:i+batch_size] 273 | f_images = self.face_image_train_sequence[current_indexes] 274 | m_images = self.mouth_image_train_sequence[current_indexes] 275 | kpoints = self.key_points_train_sequence[current_indexes] 276 | dpoints = self.distances_train_sequence[current_indexes] 277 | angles = self.angles_train_sequence[current_indexes] 278 | 279 | y = self.Y_train[current_indexes] 280 | y = np.eye(2)[y] 281 | # yield [m_images,kpoints,dpoints,angles],y 282 | # yield [m_images, f_images],y 283 | yield [m_images,f_images,kpoints,dpoints,angles],y -------------------------------------------------------------------------------- /nets/__init__.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Dropout 2 | from keras.layers import Conv2D,MaxPool2D,Dense,Flatten,Input,concatenate,Concatenate 3 | from keras.layers import LSTM,TimeDistributed,Add 4 | from keras.models import Model,Sequential 5 | import keras 6 | import numpy as np 7 | 8 | class Network(object): 9 | def __init__(self,dataset,input_shape,max_sequence_length): 10 | self.dataset = dataset 11 | self.input_shape = input_shape 12 | self.max_sequence_length = max_sequence_length 13 | self.model = self.build() 14 | self.model.summary() 15 | def build(self): 16 | 17 | face_model = Sequential() 18 | face_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\ 19 | name="face_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2]))) 20 | 21 | face_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 22 | face_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\ 23 | activation="relu",name="face_layer2"))) 24 | face_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 25 | face_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\ 26 | activation="relu",name="face_layer3"))) 27 | face_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 28 | face_model.add(TimeDistributed(Flatten())) 29 | 30 | nose_model = Sequential() 31 | nose_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\ 32 | name="nose_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2]))) 33 | 34 | nose_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 35 | nose_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\ 36 | activation="relu",name="nose_layer2"))) 37 | nose_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 38 | nose_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\ 39 | activation="relu",name="nose_layer3"))) 40 | nose_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 41 | nose_model.add(TimeDistributed(Flatten())) 42 | 43 | left_eye_model = Sequential() 44 | left_eye_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\ 45 | name="left_eye_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2]))) 46 | 47 | left_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 48 | left_eye_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\ 49 | activation="relu",name="left_eye_layer2"))) 50 | left_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 51 | left_eye_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\ 52 | activation="relu",name="left_eye_layer3"))) 53 | left_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 54 | left_eye_model.add(TimeDistributed(Flatten())) 55 | 56 | right_eye_model = Sequential() 57 | right_eye_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\ 58 | name="right_eye_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2]))) 59 | 60 | right_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 61 | right_eye_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\ 62 | activation="relu",name="right_eye_layer2"))) 63 | right_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 64 | right_eye_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\ 65 | activation="relu",name="right_eye_layer3"))) 66 | right_eye_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 67 | right_eye_model.add(TimeDistributed(Flatten())) 68 | 69 | mouth_model = Sequential() 70 | mouth_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\ 71 | name="mouth_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2]))) 72 | 73 | mouth_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 74 | mouth_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\ 75 | activation="relu",name="mouth_layer2"))) 76 | mouth_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 77 | mouth_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\ 78 | activation="relu",name="mouth_layer3"))) 79 | mouth_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 80 | mouth_model.add(TimeDistributed(Flatten())) 81 | merged_layer = Add()([face_model.output,left_eye_model.output,right_eye_model.output,nose_model.output,mouth_model.output]) 82 | 83 | dense1 = TimeDistributed(Dense(128,activation="relu"))(merged_layer) 84 | dropout1 = TimeDistributed(Dropout(0.2))(dense1) 85 | dense2 = TimeDistributed(Dense(256,activation="relu"))(dropout1) 86 | dropout2 = TimeDistributed(Dropout(0.2))(dense2) 87 | lstm1 = LSTM(32,activation='relu',return_sequences=True,stateful=False)(dropout2) 88 | lstm2 = LSTM(64,activation='relu',return_sequences=False,stateful=False)(lstm1) 89 | 90 | dense3 = Dense(256,activation="relu")(lstm2) 91 | output = Dense(2,activation="softmax")(dense3) 92 | 93 | # model = Model(inputs=[face_layer,left_eye_layer_input,right_eye_layer_input,nose_layer_input,mouth_layer_input],outputs=output) 94 | model = Model(inputs=[face_model.input,left_eye_model.input,right_eye_model.input,nose_model.input,mouth_model.input],\ 95 | outputs = output 96 | ) 97 | return model 98 | 99 | 100 | 101 | def train(self): 102 | 103 | faces = self.dataset.face_image_test_sequences.astype(np.float32)/255 104 | left_eyes = self.dataset.left_eye_image_test_sequences.astype(np.float32)/255 105 | right_eyes = self.dataset.right_eye_image_test_sequences.astype(np.float32)/255 106 | noses = self.dataset.nose_image_test_sequences.astype(np.float32)/255 107 | mouths = self.dataset.mouth_image_test_sequences.astype(np.float32)/255 108 | 109 | 110 | X_test= [faces,left_eyes,right_eyes,noses,mouths] 111 | 112 | y_test = self.dataset.talking_test.astype(np.uint8) 113 | print y_test[0] 114 | y_test = np.eye(2)[y_test] 115 | 116 | self.model.compile(loss=keras.losses.categorical_crossentropy,optimizer=keras.optimizers.Adam(1e-4),metrics=["accuracy"]) 117 | self.model.fit_generator(self.dataset.generator(1),steps_per_epoch=5000,epochs=10,verbose=1,validation_data=(X_test,y_test)) 118 | self.model.save_weights("models/model.h5") 119 | model_json = self.model.to_json() 120 | with open("models/model.json","w+") as json_file: 121 | json_file.write(model_json) 122 | score = self.model.evaluate(X_test,y_test) 123 | with open("logs/log.txt","w+") as log_file: 124 | log_file.write("Score: "+str(score)) 125 | log_file.write("\n") -------------------------------------------------------------------------------- /nets/__main__.py: -------------------------------------------------------------------------------- 1 | from nets import Network 2 | 3 | def main(): 4 | net = Network((24,24,1),100) 5 | if __name__ == "__main__": 6 | main() -------------------------------------------------------------------------------- /nets/mouth_features.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Dropout 2 | from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Input, Concatenate 3 | from keras.layers import LSTM, TimeDistributed, Add, Bidirectional 4 | from keras.models import Model, Sequential 5 | import keras 6 | import numpy as np 7 | 8 | class MouthFeatureOnlyNet(object): 9 | def __init__(self, dataset, input_shape, max_sequence_length): 10 | self.dataset = dataset 11 | self.input_shape = input_shape 12 | self.max_sequence_length = max_sequence_length 13 | self.model = self.build() 14 | self.model.summary() 15 | def build(self): 16 | mouth_image_model = Sequential() 17 | mouth_image_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\ 18 | name="mouth_image_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2]))) 19 | 20 | mouth_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 21 | mouth_image_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\ 22 | activation="relu",name="mouth_image_layer2"))) 23 | mouth_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 24 | mouth_image_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\ 25 | activation="relu",name="mouth_image_layer3"))) 26 | mouth_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 27 | mouth_image_model.add(TimeDistributed(Flatten())) 28 | 29 | mouth_image_model.add(Bidirectional(LSTM(32,return_sequences=True))) 30 | mouth_image_model.add(Bidirectional(LSTM(128,return_sequences=False))) 31 | mouth_image_model.add(Dense(128,activation="relu")) 32 | 33 | face_image_model = Sequential() 34 | face_image_model.add(TimeDistributed(Conv2D(32,(3,3),padding='same',activation="relu",strides=(1, 1)),\ 35 | name="face_image_layer1",input_shape=(self.max_sequence_length, self.input_shape[0], self.input_shape[1], self.input_shape[2]))) 36 | 37 | face_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 38 | face_image_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\ 39 | activation="relu",name="face_image_layer2"))) 40 | face_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 41 | face_image_model.add(TimeDistributed(Conv2D(128,kernel_size=(3,3),strides=(1, 1),padding='same',\ 42 | activation="relu",name="face_image_layer3"))) 43 | face_image_model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) 44 | face_image_model.add(TimeDistributed(Flatten())) 45 | 46 | face_image_model.add(Bidirectional(LSTM(32,return_sequences=True))) 47 | face_image_model.add(Bidirectional(LSTM(128,return_sequences=False))) 48 | face_image_model.add(Dense(128,activation="relu")) 49 | 50 | dpts_model = Sequential() 51 | dpts_model.add(TimeDistributed(Conv2D(32,(1,3),padding='same',activation="relu",strides=(1, 1)),\ 52 | name="dpts_layer1",input_shape=(self.max_sequence_length, 1, 20, 2))) 53 | dpts_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\ 54 | activation="relu",name="dpts_layer2"))) 55 | dpts_model.add(TimeDistributed(Flatten())) 56 | 57 | dpts_model.add(Bidirectional(LSTM(32,return_sequences=True))) 58 | dpts_model.add(Bidirectional(LSTM(128,return_sequences=False))) 59 | dpts_model.add(Dense(128,activation="relu")) 60 | 61 | 62 | dpts_dists_model = Sequential() 63 | dpts_dists_model.add(TimeDistributed(Conv2D(32,(1,3),padding='same',activation="relu",strides=(1, 1)),\ 64 | name="dpts_dists_layer1",input_shape=(self.max_sequence_length, 1, 20, 1))) 65 | dpts_dists_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\ 66 | activation="relu",name="dpts_dists_layer2"))) 67 | dpts_dists_model.add(TimeDistributed(Flatten())) 68 | dpts_dists_model.add(Bidirectional(LSTM(32,return_sequences=True))) 69 | dpts_dists_model.add(Bidirectional(LSTM(128,return_sequences=False))) 70 | dpts_dists_model.add(Dense(128,activation="relu")) 71 | 72 | dpts_angles_model = Sequential() 73 | dpts_angles_model.add(TimeDistributed(Conv2D(32,(1,3),padding='same',activation="relu",strides=(1, 1)),\ 74 | name="dpts_angles_layer1",input_shape=(self.max_sequence_length, 1, 20, 1))) 75 | dpts_angles_model.add(TimeDistributed(Conv2D(64,kernel_size=(3,3),strides=(1, 1),padding='same',\ 76 | activation="relu",name="dpts_angles_layer2"))) 77 | dpts_angles_model.add(TimeDistributed(Flatten())) 78 | dpts_angles_model.add(Bidirectional(LSTM(32,return_sequences=True))) 79 | dpts_angles_model.add(Bidirectional(LSTM(128,return_sequences=False))) 80 | dpts_angles_model.add(Dense(128,activation="relu")) 81 | 82 | 83 | 84 | merged = keras.layers.concatenate([mouth_image_model.output, face_image_model.output,dpts_model.output,dpts_dists_model.output,dpts_angles_model.output]) 85 | 86 | 87 | merged = Dense(128,activation="relu")(merged) 88 | merged = Dense(256,activation="relu")(merged) 89 | 90 | merged = Dense(2,activation="softmax")(merged) 91 | 92 | model = Model(inputs=[mouth_image_model.input,face_image_model.input,dpts_model.input,dpts_dists_model.input,dpts_angles_model.input],outputs=merged) 93 | 94 | 95 | return model 96 | 97 | 98 | 99 | 100 | def train(self): 101 | X_test= [self.dataset.mouth_image_test_sequence,self.dataset.face_image_test_sequence, self.dataset.key_points_test_sequence, \ 102 | self.dataset.distances_test_sequence, self.dataset.angles_test_sequence] 103 | # X_test= [self.dataset.mouth_image_test_sequence,\ 104 | # self.dataset.face_image_test_sequence] 105 | 106 | y_test = self.dataset.Y_test 107 | y_test = np.eye(2)[y_test] 108 | 109 | self.model.compile(loss=keras.losses.binary_crossentropy,optimizer=keras.optimizers.Adam(1e-4),metrics=["accuracy"]) 110 | self.model.fit_generator(self.dataset.generator(1),steps_per_epoch=5000,epochs=25,verbose=1,validation_data=(X_test,y_test)) 111 | 112 | model_name = "model-mouth-100" 113 | self.model.save_weights("models/"+model_name+".h5") 114 | model_json = self.model.to_json() 115 | with open("models/"+model_name+".json","w+") as json_file: 116 | json_file.write(model_json) 117 | score = self.model.evaluate(X_test,y_test) 118 | with open("logs/log-mouth.txt","a+") as log_file: 119 | log_file.write("Score of "+model_name+": "+str(score)) 120 | log_file.write("\n") 121 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | backports=1.0=py27h63c9359_1 5 | backports.shutil_get_terminal_size=1.0.0=py27h5bc021e_2 6 | backports.weakref=1.0rc1=py27_0 7 | backports_abc=0.5=py27h7b3c97b_0 8 | bleach=1.5.0=py27_0 9 | bokeh=0.12.10=py27he46cc6b_0 10 | ca-certificates=2017.08.26=h1d4fec5_0 11 | cairo=1.12.18=6 12 | certifi=2017.7.27.1=py27h9ceb091_0 13 | cloudpickle=0.4.0=py27ha64365b_0 14 | cycler=0.10.0=py27hc7354d3_0 15 | dask=0.15.2=py27_0 16 | decorator=4.1.2=py27h1544723_0 17 | enum34=1.1.6=py27h99a27e9_1 18 | fontconfig=2.11.1=6 19 | freetype=2.5.5=1 20 | funcsigs=1.0.2=py27h83f16ab_0 21 | futures=3.1.1=py27hdbc8cbb_0 22 | h5py=2.7.0=np111py27_0 23 | hdf5=1.8.17=2 24 | html5lib=0.9999999=py27_0 25 | icu=58.2=h211956c_0 26 | imageio=2.2.0=py27hf108a7f_0 27 | intel-openmp=2018.0.0=h15fc484_7 28 | ipython=5.4.1=py27h36c99b6_1 29 | ipython_genutils=0.2.0=py27h89fb69b_0 30 | jbig=2.1=hdba287a_0 31 | jinja2=2.9.6=py27h82327ae_1 32 | jpeg=8d=2 33 | keras=2.0.8=py27hd1b5a5b_0 34 | libedit=3.1=heed3624_0 35 | libffi=3.2.1=h4deb6c0_3 36 | libgcc-ng=7.2.0=h7cc24e2_2 37 | libgfortran=3.0.0=1 38 | libgfortran-ng=7.2.0=h9f7466a_2 39 | libpng=1.6.32=hda9c8bc_2 40 | libprotobuf=3.4.0=0 41 | libstdcxx-ng=7.2.0=h7a57d05_2 42 | libtiff=4.0.6=2 43 | libxml2=2.9.4=h6b072ca_5 44 | locket=0.2.0=py27h73929a2_1 45 | markdown=2.6.9=py27_0 46 | markupsafe=1.0=py27h97b2822_1 47 | matplotlib=1.5.1=np111py27_0 48 | mkl=2017.0.4=h4c4d0af_0 49 | mock=2.0.0=py27h0c0c831_0 50 | ncurses=6.0=h06874d7_1 51 | networkx=2.0=py27hfc23926_0 52 | numpy=1.11.3=py27_0 53 | opencv=3.1.0=np111py27_1 54 | openssl=1.0.2l=h077ae2c_5 55 | pandas=0.20.3=py27h820b67f_2 56 | partd=0.3.8=py27h4e55004_0 57 | pathlib2=2.3.0=py27h6e9d198_0 58 | pbr=3.1.1=py27hf64632f_0 59 | pexpect=4.2.1=py27hcf82287_0 60 | pickleshare=0.7.4=py27h09770e1_0 61 | pillow=3.4.2=py27_0 62 | pip=9.0.1=py27hbf658b2_3 63 | pixman=0.32.6=0 64 | prompt_toolkit=1.0.15=py27h1b593e1_0 65 | protobuf=3.4.0=py27_0 66 | ptyprocess=0.5.2=py27h4ccb14c_0 67 | py2cairo=1.10.0=py27_2 68 | pycairo=1.10.0=py27_0 69 | pygments=2.2.0=py27h4a8b6f5_0 70 | pyparsing=2.0.3=py27_0 71 | pyqt=4.11.4=py27_4 72 | python=2.7.14=hc2b0042_21 73 | python-dateutil=2.6.1=py27h4ca5741_1 74 | pytz=2017.2=py27hcac29fa_1 75 | pywavelets=0.5.2=py27hecda097_0 76 | pyyaml=3.12=py27h2d70dd7_1 77 | qt=4.8.7=3 78 | readline=7.0=hac23ff0_3 79 | scandir=1.6=py27hf7388dc_0 80 | scikit-image=0.13.0=py27h06cb35d_1 81 | scikit-learn=0.18.1=np111py27_1 82 | scipy=0.19.0=np111py27_0 83 | setuptools=36.5.0=py27h68b189e_0 84 | simplegeneric=0.8.1=py27h19e43cd_0 85 | singledispatch=3.4.0.3=py27h9bcb476_0 86 | sip=4.18=py27_0 87 | six=1.11.0=py27h5f960f1_1 88 | sqlite=3.20.1=h6d8b0f3_1 89 | ssl_match_hostname=3.5.0.1=py27h4ec10b9_2 90 | tensorflow=1.3.0=0 91 | tensorflow-base=1.3.0=py27h0dbb4d0_1 92 | tensorflow-tensorboard=0.1.5=py27_0 93 | tk=8.6.7=h5979e9b_1 94 | toolz=0.8.2=py27hd3b1e7e_0 95 | tornado=4.5.2=py27h97b179f_0 96 | traitlets=4.3.2=py27hd6ce930_0 97 | wcwidth=0.1.7=py27h9e3e1ab_0 98 | werkzeug=0.12.2=py27hbf75dff_0 99 | wheel=0.29.0=py27h411dd7b_1 100 | xz=5.2.3=h2bcbf08_1 101 | yaml=0.1.7=h96e3832_1 102 | zlib=1.2.11=hfbfcf68_1 103 | -------------------------------------------------------------------------------- /split/__init__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def get_cmd_args(): 4 | parser = argparse.ArgumentParser() 5 | parser.add_argument("-p","--images_path",type=str) 6 | parser.add_argument("-f","--faces_path",type=str) 7 | parser.add_argument("-o","--output_path",type=str) 8 | parser.add_argument("-l","--sequence_length",type=int,default=30) 9 | args = parser.parse_args() 10 | return args 11 | -------------------------------------------------------------------------------- /split/__main__.py: -------------------------------------------------------------------------------- 1 | from split import get_cmd_args 2 | from split.split_squence import track_faces_inside_sequences,split_sequence 3 | def main(): 4 | args = get_cmd_args() 5 | print "tracking all faces" 6 | track_faces_inside_sequences(args.images_path,args.faces_path) 7 | print "done with tracking faces" 8 | print "splitting dataset" 9 | split_sequence(args.images_path,args.output_path,args.sequence_length) 10 | if __name__ == '__main__': 11 | main() -------------------------------------------------------------------------------- /split/split_squence.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import cv2 4 | import dlib 5 | import numpy as np 6 | import json 7 | from sys import exit 8 | 9 | 10 | def split_array(array,max_size): 11 | output = [] 12 | for i in range(0,len(array),max_size): 13 | output+=[array[i:i+max_size]] 14 | return output 15 | def copy_images(imgs_files,source_folder,dest_folder): 16 | for imfile in imgs_files: 17 | shutil.copy(os.path.join(source_folder,imfile),os.path.join(dest_folder,imfile)) 18 | 19 | def split_sequence(dataset_dir,output_dir,max_size): 20 | if not os.path.exists(output_dir): 21 | os.mkdir(output_dir) 22 | sequences = os.listdir(dataset_dir) 23 | for s in sequences: 24 | current_path = os.path.join(dataset_dir,s) 25 | s_images = os.listdir(current_path) 26 | s_images.sort() 27 | splited_seq = split_array(s_images,max_size) 28 | for i in range(len(splited_seq)): 29 | dest_folder = os.path.join(output_dir,s+"-"+str(i)) 30 | if not os.path.exists(dest_folder): 31 | os.mkdir(dest_folder) 32 | copy_images(splited_seq[i],current_path,dest_folder) 33 | print "Processed",s 34 | def rect_to_array(rect): 35 | output = [] 36 | output[0:4] = rect.left(),rect.top(),rect.right(),rect.bottom() 37 | return output 38 | def track_all_faces(sequence_path,img_files,face_index,detector,predictor): 39 | 40 | img = cv2.imread(os.path.join(sequence_path,img_files[face_index])) 41 | face = detector(img)[0] 42 | tracker = dlib.correlation_tracker() 43 | win = dlib.image_window() 44 | tracker.start_track(img,face) 45 | bounding_boxes = {} 46 | for i in range(face_index,-1,-1): 47 | img = cv2.imread(os.path.join(sequence_path,img_files[i])) 48 | faces = detector(img) 49 | if len(faces)>0: 50 | tracked_face = faces[0] 51 | tracker.start_track(img,tracked_face) 52 | else: 53 | tracker.update(img) 54 | tracked_face = tracker.get_position() 55 | bounding_boxes[img_files[i]] = rect_to_array(tracked_face) 56 | win.clear_overlay() 57 | _,name = os.path.split(sequence_path) 58 | win.set_title(name.split("-")[2]) 59 | win.set_image(img) 60 | win.add_overlay(tracked_face) 61 | 62 | img = cv2.imread(os.path.join(sequence_path,img_files[face_index])) 63 | face = detector(img)[0] 64 | tracker.start_track(img,face) 65 | for i in range(face_index+1,len(img_files)): 66 | 67 | img = cv2.imread(os.path.join(sequence_path,img_files[i])) 68 | faces = detector(img) 69 | if len(faces)>0: 70 | tracked_face = faces[0] 71 | tracker.start_track(img,tracked_face) 72 | else: 73 | tracker.update(img) 74 | tracked_face = tracker.get_position() 75 | bounding_boxes[img_files[i]] = rect_to_array(tracked_face) 76 | win.clear_overlay() 77 | win.set_image(img) 78 | win.add_overlay(tracked_face) 79 | return bounding_boxes 80 | 81 | def track_face_inside_sequence(sequence_path,output_dir): 82 | img_files = os.listdir(sequence_path) 83 | img_files.sort() 84 | detector = dlib.get_frontal_face_detector() 85 | predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") 86 | bounding_box = {} 87 | face_found = False 88 | sequence_basename = os.path.basename(sequence_path) 89 | if not os.path.exists(output_dir): 90 | os.mkdir(output_dir) 91 | for i in range(len(img_files)): 92 | img = cv2.imread(os.path.join(sequence_path,img_files[i])) 93 | faces = detector(img) 94 | if len(faces)>0: 95 | bounding_box = track_all_faces(sequence_path, img_files,i,detector,predictor) 96 | with open(os.path.join(output_dir,sequence_basename)+".json","w+") as bbox_file: 97 | json.dump(bounding_box,bbox_file) 98 | face_found = True 99 | break 100 | if not face_found: 101 | print "No faces found inside ",sequence_path, " sequence" 102 | def track_faces_inside_sequences(dataset_dir,output_dir): 103 | for seq in os.listdir(dataset_dir): 104 | track_face_inside_sequence(os.path.join(dataset_dir,seq),output_dir) 105 | 106 | -------------------------------------------------------------------------------- /train/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ymitiku/TalkingYawnDetection/b8ab7a5ecacce31555ff3fa2769df1fd455084fa/train/__init__.py -------------------------------------------------------------------------------- /train/__main__.py: -------------------------------------------------------------------------------- 1 | from dataset.mouth_features import MouthFeatureOnlyDataset 2 | from nets.mouth_features import MouthFeatureOnlyNet 3 | import argparse 4 | def get_cmd_args(): 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("-d","--dataset_path",type=str) 7 | parser.add_argument("-f","--faces_path",type=str) 8 | parser.add_argument("-l","--sequence_length",type=int,default=30) 9 | args = parser.parse_args() 10 | return args 11 | def main(): 12 | args = get_cmd_args() 13 | dataset = MouthFeatureOnlyDataset(args.dataset_path,args.faces_path,(48,48,1),args.sequence_length) 14 | 15 | dataset.load_dataset() 16 | net = MouthFeatureOnlyNet(dataset,(48,48,1),args.sequence_length) 17 | net.train() 18 | if __name__ == "__main__": 19 | main() --------------------------------------------------------------------------------