├── .gitignore ├── ControllerThread.py ├── DetectionThread.py ├── EstimateAge.py ├── GrabUnit.py ├── GrabberThread.py ├── LICENSE ├── README.md ├── RecognitionThread.py ├── UnitServer.py ├── Verdana.ttf ├── compute_features.py ├── config.ini ├── doc ├── architecture.png └── demo.jpg └── tools └── generate_celeb_visualizations.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | __pycache__/* 3 | detection/* 4 | recognizers/* 5 | -------------------------------------------------------------------------------- /ControllerThread.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Nov 2 15:44:40 2016 4 | 5 | @author: agedemo 6 | """ 7 | 8 | from UnitServer import UnitServer 9 | from GrabberThread import GrabberThread 10 | from DetectionThread import DetectionThread 11 | from RecognitionThread import RecognitionThread 12 | 13 | import threading 14 | import time 15 | import sys 16 | import cv2 17 | import copy 18 | import os 19 | import numpy as np 20 | import subprocess 21 | 22 | class ControllerThread(threading.Thread): 23 | """ Responsible for starting and shutting down all threads and 24 | services. """ 25 | 26 | def __init__(self, params): 27 | threading.Thread.__init__(self) 28 | 29 | self.terminated = False 30 | self.caption = params.get("window", "caption") 31 | 32 | self.initializeFonts(params) 33 | 34 | self.minDetections = int(params.get("recognition", "mindetections")) 35 | 36 | self.displaysize = params.get("window", "displaysize") 37 | self.displaysize = self.displaysize.upper().split("X") 38 | self.displaysize = tuple([int(s) for s in self.displaysize]) 39 | 40 | self.debug = params.get("general", "debug") not in ("false", "False", "0") 41 | 42 | # Get current resolution (only implemented for Linux) 43 | if sys.platform == 'linux': 44 | self.resolution = subprocess.Popen('xrandr | grep "\*" | cut -d" " -f4', shell=True, 45 | stdout=subprocess.PIPE).communicate()[0].decode("utf-8").rstrip().split('x') 46 | self.resolution = [int(s) for s in self.resolution] 47 | else: 48 | self.resolution = [1024, 768] 49 | 50 | # Start frame storage 51 | queueLength = params.getint("server", "num_frames") 52 | self.unitServer = UnitServer(queueLength) 53 | 54 | # Start Grabber thread 55 | self.grabberThread = GrabberThread(self, params) 56 | self.grabberThread.start() 57 | 58 | # Start Detection thread 59 | self.faces = [] 60 | self.detectionThread = DetectionThread(self, params) 61 | self.detectionThread.start() 62 | 63 | # Start Recognition Thread 64 | self.recognitionThread = RecognitionThread(self, params) 65 | self.recognitionThread.start() 66 | 67 | unused_width = self.resolution[0] - self.displaysize[0] 68 | 69 | cv2.moveWindow(self.caption, unused_width//2, 0) # Will move window to center when everything is running. 70 | 71 | self.commandInterface() 72 | 73 | def commandInterface(self): 74 | while True: 75 | text = input("Enter command (Q)uit, (L)ist models, (S)witch model: ").lower() 76 | if text == "l": 77 | self.recognitionThread.print_models() 78 | elif text == "s": 79 | idx = input("Please input a new index: ") 80 | try: 81 | self.recognitionThread.switch_model(idx) 82 | except KeyError as e: 83 | print("No such model index", e) 84 | 85 | elif text == "q": 86 | print("Bye!") 87 | self.terminate() 88 | break 89 | 90 | def initializeFonts(self, params): 91 | """ 92 | Tries to initialize freetype for nicer fonts, if not installed fall back to standard. 93 | Freetype isn't included in the PIP/Conda packages, so we can't require it. 94 | """ 95 | self.freeType = None 96 | freetype_fontpath = params.get("window", "freetype_fontpath") 97 | sizetest_text = "FEMALE 100%" # Probably longest text possible 98 | try: 99 | self.freeType = cv2.freetype.createFreeType2() 100 | self.freeType.loadFontData(fontFileName=freetype_fontpath, id=0) 101 | self.textBaseScale = 20 # Maximum text scale, will be decreased if there's overlap. 102 | self.textBaseWidth = self.freeType.getTextSize(sizetest_text, self.textBaseScale, -1)[0][0] 103 | 104 | except AttributeError: 105 | print("OpenCV Freetype not found, falling back to standard OpenCV font...") 106 | self.textBaseScale = 0.6 107 | self.textBaseWidth = cv2.getTextSize(sizetest_text, cv2.FONT_HERSHEY_SIMPLEX, self.textBaseScale, 2)[0][0] 108 | 109 | 110 | def run(self): 111 | while not self.terminated: 112 | time.sleep(0.5) 113 | 114 | def putUnit(self, unit): 115 | 116 | # Show the newest frame immediately. 117 | self.showVideo(unit) 118 | 119 | # Send to further processing 120 | if not self.terminated: 121 | self.unitServer.putUnit(unit) 122 | 123 | def getUnit(self, caller, timestamp = None): 124 | 125 | return self.unitServer.getUnit(caller, timestamp) 126 | 127 | def terminate(self): 128 | 129 | self.terminated = True 130 | 131 | def drawBoundingBox(self, img, bbox): 132 | 133 | x,y,w,h = [int(c) for c in bbox] 134 | 135 | m = 0.2 136 | 137 | # Upper left corner 138 | pt1 = (x,y) 139 | pt2 = (int(x + m*w), y) 140 | cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2) 141 | 142 | pt1 = (x,y) 143 | pt2 = (x, int(y + m*h)) 144 | cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2) 145 | 146 | # Upper right corner 147 | pt1 = (x + w, y) 148 | pt2 = (x + w, int(y + m*h)) 149 | cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2) 150 | 151 | pt1 = (x + w, y) 152 | pt2 = (int(x + w - m * w), y) 153 | cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2) 154 | 155 | # Lower left corner 156 | pt1 = (x, y + h) 157 | pt2 = (x, int(y + h - m*h)) 158 | cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2) 159 | 160 | pt1 = (x, y + h) 161 | pt2 = (int(x + m * w), y + h) 162 | cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2) 163 | 164 | # Lower right corner 165 | pt1 = (x + w, y + h) 166 | pt2 = (x + w, int(y + h - m*h)) 167 | cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2) 168 | 169 | pt1 = (x + w, y + h) 170 | pt2 = (int(x + w - m * w), y + h) 171 | cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2) 172 | 173 | def AddCeleb(self, face, img, x, y, w, h): 174 | 175 | celebs = face["celebs"] 176 | indexes = celebs["indexes"] 177 | most_common = max(set(indexes), key=indexes.count) 178 | 179 | filename = celebs[most_common].filename 180 | distance = celebs[most_common].distance 181 | identity = filename.split(os.sep)[-2] 182 | 183 | celeb_img = cv2.imread(filename) 184 | aspect_ratio = celeb_img.shape[1] / celeb_img.shape[0] 185 | new_w = w 186 | new_h = int(w/aspect_ratio) 187 | if new_h > h: 188 | new_h = h 189 | try: 190 | celeb_img = cv2.resize(celeb_img, (new_w, new_h), interpolation=cv2.INTER_AREA) 191 | except AssertionError: # new_w or new_h is 0 ie bounding box size is 0 192 | return None 193 | 194 | # Cut out pixels overflowing image on the right 195 | x_end = x + w + new_w 196 | if x_end > img.shape[1]: 197 | remove_pixels = x_end - img.shape[1] 198 | celeb_img = celeb_img[:, :-remove_pixels, :] 199 | new_w -= remove_pixels 200 | 201 | # Cut out pixels overflowing image on the bottom 202 | y_offset = h - new_h 203 | y_end = y + y_offset + new_h 204 | if y_end > img.shape[0]: 205 | remove_pixels = y_end - img.shape[0] 206 | celeb_img = celeb_img[:-remove_pixels, ...] 207 | new_h -= remove_pixels 208 | 209 | if celeb_img.size: 210 | img[y + y_offset: y + y_offset + new_h, x + w: x + w + new_w, :] = celeb_img 211 | return identity 212 | 213 | def drawFace(self, face, img): 214 | 215 | bbox = np.mean(face['bboxes'], axis = 0) 216 | 217 | self.drawBoundingBox(img, bbox) 218 | x, y, w, h = [int(c) for c in bbox] 219 | 220 | # 1. CELEBRITY TWIN 221 | 222 | celeb_identity = None 223 | 224 | # Clamp bounding box top to image 225 | y = 0 if y < 0 else y 226 | 227 | if "celebs" in face.keys(): 228 | celeb_identity = self.AddCeleb(face, img, x, y, w, h) 229 | 230 | # Check if text can overlap the celeb texts (goes past the bounding box), if so decrease size 231 | text_size = self.textBaseScale 232 | 233 | if self.textBaseWidth > w: 234 | text_size *= w/self.textBaseWidth 235 | if self.freeType: 236 | text_size = int(text_size) # Freetype doesn't accept float text size. 237 | 238 | 239 | # 1. AGE 240 | 241 | if "age" in face.keys(): 242 | age = face['age'] 243 | annotation = "Age: %.0f" % age 244 | txtLoc = (x, y + h + 30) 245 | self.writeText(img, annotation, txtLoc, text_size) 246 | 247 | 248 | # 2. GENDER 249 | 250 | if "gender" in face.keys(): 251 | gender = "MALE" if face['gender'] > 0.5 else "FEMALE" 252 | genderProb = max(face["gender"], 1-face["gender"]) 253 | annotation = "%s %.0f %%" % (gender, 100.0 * genderProb) 254 | txtLoc = (x, y + h + 60) 255 | self.writeText(img, annotation, txtLoc, text_size) 256 | 257 | # 3. EXPRESSION 258 | 259 | if "expression" in face.keys(): 260 | expression = face["expression"] 261 | annotation = "%s" % (expression) 262 | txtLoc = (x, y + h + 90) 263 | self.writeText(img, annotation, txtLoc, text_size) 264 | 265 | if celeb_identity: 266 | annotation = "CELEBRITY" 267 | txtLoc = (x + w, y + h + 30) 268 | self.writeText(img, annotation, txtLoc, text_size) 269 | 270 | annotation = "TWIN" # (%.0f %%)" % (100*np.exp(-face["celeb_distance"])) 271 | txtLoc = (x + w, y + h + 60) 272 | self.writeText(img, annotation, txtLoc, text_size) 273 | 274 | annotation = celeb_identity 275 | txtLoc = (x + w, y + h + 90) 276 | self.writeText(img, annotation, txtLoc, text_size) 277 | 278 | # DEBUG ONLY - Visualize aligned face crop in corner. 279 | if self.debug and "crop" in face.keys(): 280 | 281 | crop = face["crop"] 282 | crop = cv2.resize(crop, (100, 100)) 283 | croph, cropw = crop.shape[0:2] 284 | imgh, imgw = img.shape[0:2] 285 | 286 | img[:croph, imgw-cropw:, :] = crop[..., ::-1] 287 | 288 | def showVideo(self, unit): 289 | 290 | unit.acquire() 291 | frame = copy.deepcopy(unit.getFrame()) 292 | unit.release() 293 | 294 | # Annotate 295 | 296 | validFaces = [f for f in self.faces if len(f['bboxes']) > self.minDetections] 297 | 298 | for face in validFaces: 299 | self.drawFace(face, frame) 300 | 301 | frame = cv2.resize(frame, self.displaysize) 302 | cv2.imshow(self.caption, frame) 303 | key = cv2.waitKey(10) 304 | 305 | if key == 27: 306 | self.terminate() 307 | 308 | 309 | def findNearestFace(self, bbox): 310 | 311 | distances = [] 312 | 313 | x,y,w,h = bbox 314 | bboxCenter = [x + w/2, y + h/2] 315 | 316 | for face in self.faces: 317 | 318 | x,y,w,h = np.mean(face['bboxes'], axis = 0) 319 | faceCenter = [x + w/2, y + h/2] 320 | 321 | distance = np.hypot(faceCenter[0] - bboxCenter[0], 322 | faceCenter[1] - bboxCenter[1]) 323 | 324 | distances.append(distance) 325 | 326 | if len(distances) == 0: 327 | minIdx = None 328 | minDistance = None 329 | else: 330 | minDistance = np.min(distances) 331 | minIdx = np.argmin(distances) 332 | 333 | return minIdx, minDistance 334 | 335 | def setDetections(self, detections, timestamps): 336 | 337 | # Find the location among all recent face locations where this would belong 338 | 339 | for bbox, timestamp in zip(detections, timestamps): 340 | 341 | idx, dist = self.findNearestFace(bbox) 342 | 343 | if dist is not None and dist < 50: 344 | 345 | self.faces[idx]['bboxes'].append(bbox) 346 | self.faces[idx]['timestamps'].append(timestamp) 347 | 348 | if len(self.faces[idx]['bboxes']) > 7: 349 | self.faces[idx]['bboxes'].pop(0) 350 | self.faces[idx]['timestamps'].pop(0) 351 | 352 | else: 353 | # This is a new face not in the scene before 354 | self.faces.append({'timestamps': [timestamp], 'bboxes': [bbox]}) 355 | 356 | # Clean old detections: 357 | 358 | now = time.time() 359 | facesToRemove = [] 360 | 361 | for i, face in enumerate(self.faces): 362 | if now - face['timestamps'][-1] > 0.5: 363 | facesToRemove.append(i) 364 | 365 | for i in facesToRemove: 366 | try: 367 | self.faces.pop(i) 368 | except: 369 | # Face was deleted by other thread. 370 | pass 371 | 372 | def getFaces(self): 373 | 374 | if len(self.faces) == 0: 375 | return None 376 | else: 377 | return self.faces 378 | 379 | def isTerminated(self): 380 | 381 | return self.terminated 382 | 383 | def writeText(self, img, annotation, location, size): 384 | if self.freeType: 385 | self.freeType.putText(img=img, 386 | text=annotation, 387 | org=location, 388 | fontHeight=size, 389 | color=(255, 255, 0), 390 | thickness=-1, 391 | line_type=cv2.LINE_AA, 392 | bottomLeftOrigin=True) 393 | else: 394 | annotation = annotation.replace('ä', 'a').replace('ö', 'o').replace('å', 'o') 395 | cv2.putText(img, 396 | text=annotation, 397 | org=location, 398 | fontFace=cv2.FONT_HERSHEY_SIMPLEX, 399 | fontScale=size, 400 | color=[255, 255, 0], 401 | thickness=2) 402 | -------------------------------------------------------------------------------- /DetectionThread.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import cv2 4 | import threading 5 | import time 6 | 7 | 8 | class DetectionThread(threading.Thread): 9 | 10 | def __init__(self, parent, params): 11 | 12 | threading.Thread.__init__(self) 13 | 14 | print("Initializing detection thread...") 15 | self.parent = parent 16 | 17 | frozen_graph = str(params.get("detection", "inference_graph")) 18 | text_graph = str(params.get("detection", "text_graph")) 19 | 20 | self.cvNet = cv2.dnn.readNetFromTensorflow(frozen_graph, text_graph) 21 | 22 | # Image input size, must match the network 23 | self.width = int(params.get("detection", "input_width")) 24 | self.height = int(params.get("detection", "input_height")) 25 | 26 | def run(self): 27 | 28 | while self.parent.isTerminated() == False: 29 | 30 | unit = None 31 | 32 | while unit == None: 33 | 34 | unit = self.parent.getUnit(self) 35 | if unit == None: # No units available yet 36 | time.sleep(0.1) 37 | 38 | if self.parent.isTerminated(): 39 | break 40 | 41 | if self.parent.isTerminated(): 42 | break 43 | 44 | img = unit.getFrame() 45 | 46 | detection_img = img.copy() 47 | unit.release() 48 | 49 | rows, cols = img.shape[0:2] 50 | self.cvNet.setInput(cv2.dnn.blobFromImage(detection_img, size=(self.width, self.height), 51 | swapRB=True, crop=False)) 52 | timer = time.time() 53 | cvOut = self.cvNet.forward() 54 | 55 | # print("Det time: {:.2f} ms".format(1000*(time.time() - timer))) 56 | bboxes = [] 57 | timestamps = [] 58 | 59 | for detection in cvOut[0, 0, :, :]: 60 | score = float(detection[2]) 61 | 62 | left = int(detection[3] * cols) 63 | top = int(detection[4] * rows) 64 | right = int(detection[5] * cols) 65 | bottom = int(detection[6] * rows) 66 | width = right - left 67 | height = bottom - top 68 | 69 | if score > 0.3 and width > 60: 70 | bboxes.append([left, top, width, height]) 71 | timestamps.append(unit.getTimeStamp()) 72 | 73 | self.parent.setDetections(bboxes, timestamps) 74 | -------------------------------------------------------------------------------- /EstimateAge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import ControllerThread 4 | import configparser 5 | import sys 6 | 7 | if __name__ == '__main__': 8 | 9 | help_message = ''' 10 | USAGE: EstimateAge.py [params file] 11 | ''' 12 | 13 | if len(sys.argv) > 1: 14 | paramFile = sys.argv[1] 15 | else: 16 | paramFile = "config.ini" 17 | 18 | params = configparser.ConfigParser() 19 | params.read(paramFile) 20 | 21 | # Initialize controller thread 22 | 23 | controllerThread = ControllerThread.ControllerThread(params) 24 | controllerThread.start() 25 | -------------------------------------------------------------------------------- /GrabUnit.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Nov 2 16:03:54 2016 4 | 5 | @author: agedemo 6 | """ 7 | 8 | import time 9 | 10 | class GrabUnit(): 11 | 12 | def __init__(self, frame): 13 | 14 | self.timestamp = time.time() 15 | 16 | self.detected = False 17 | self.ageRecognized = False 18 | self.genderRecognized = False 19 | self.expressionRecognized = False 20 | 21 | # Keep track of how many processes are accessing this unit 22 | self.processes = 0 23 | 24 | self.frame = frame 25 | 26 | def getTimeStamp(self): 27 | 28 | return self.timestamp 29 | 30 | def getFrame(self): 31 | 32 | return self.frame 33 | 34 | def acquire(self): 35 | """ 36 | A thread starts to use this resource. Increment the 37 | process counter. 38 | """ 39 | 40 | self.processes += 1 41 | 42 | def release(self): 43 | """ 44 | A thread no longer needs this resource. Decrement the 45 | process counter. 46 | """ 47 | 48 | self.processes -= 1 49 | 50 | def isFree(self): 51 | 52 | if self.processes == 0: 53 | return True 54 | else: 55 | return False 56 | 57 | def getNumProcesses(self): 58 | 59 | return self.processes 60 | 61 | def getTimeStamp(self): 62 | 63 | return self.timestamp 64 | 65 | def getAge(self): 66 | 67 | return time.time() - self.timestamp 68 | 69 | def setDetected(self): 70 | 71 | self.detected = True 72 | 73 | def setAgeRecognized(self): 74 | 75 | self.ageRecognized = True 76 | 77 | def setGenderRecognized(self): 78 | 79 | self.genderRecognized = True 80 | 81 | def setExpressionRecognized(self): 82 | 83 | self.expressionRecognized = True 84 | 85 | def isDetected(self): 86 | 87 | return self.detected 88 | 89 | def isAgeRecognized(self): 90 | 91 | return self.ageRecognized 92 | 93 | def isGenderRecognized(self): 94 | 95 | return self.genderRecognized 96 | 97 | def isExpressionRecognized(self): 98 | 99 | return self.expressionRecognized 100 | 101 | 102 | -------------------------------------------------------------------------------- /GrabberThread.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Nov 2 15:44:40 2016 4 | 5 | @author: agedemo 6 | """ 7 | 8 | import threading 9 | import cv2 10 | import GrabUnit 11 | 12 | class GrabberThread(threading.Thread): 13 | 14 | def __init__(self, parent, params): 15 | 16 | threading.Thread.__init__(self) 17 | 18 | camId = params.getint("camera", "Id") 19 | 20 | camResolution = params.get("camera", "resolution") 21 | camResolution = camResolution.upper().split("X") 22 | camResolution = [int(x) for x in camResolution] 23 | print(("Using camera %d at resolution %s" % (camId, camResolution))) 24 | 25 | self.flipHor = params.getint("camera", "flip_horizontal") 26 | 27 | 28 | self.video = cv2.VideoCapture(camId) # 0: Laptop camera, 1: USB-camera 29 | #self.video.set(3, camResolution[0]) # 1280 #1920 Default: 640 30 | #self.video.set(4, camResolution[1]) # 720 #1080 Default: 480 31 | 32 | self.parent = parent 33 | 34 | print("Grabber Thread initialized...") 35 | 36 | def run(self): 37 | 38 | while not self.parent.isTerminated(): 39 | 40 | stat, frame = self.video.read() 41 | 42 | if frame is not None and not self.parent.isTerminated(): 43 | if self.flipHor: 44 | frame = frame[:, ::-1, ...] 45 | 46 | unit = GrabUnit.GrabUnit(frame) 47 | 48 | self.parent.putUnit(unit) 49 | 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Heikki Huttunen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TUT live age estimator 2 | 3 | **Python implementation of a live deep learning based age/gender/smile/celebrity twin recognizer.** 4 | 5 | ![Image](doc/demo.jpg "demo") 6 | 7 | All components use convolutional networks: 8 | 9 | * Detection uses an SSD model trained on Tensorflow object detection API, but running on OpenCV. 10 | * Age, gender, and smile recognition use a multitask mobilenet trained and running on keras. 11 | * Celebrity twin uses a squeeze-excite seresnet18 to extract features, trained and running on keras. 12 | 13 | The detailed functionality of the system (without multitask and celebrity similarity) is described in our paper: 14 | 15 | >Janne Tommola, Pedram Ghazi, Bishwo Adhikari, Heikki Huttunen, "[Real Time System for Facial Analysis](https://arxiv.org/abs/1809.05474)," Submitted to EUVIP2018. 16 | 17 | If you use our work for research purposes, consider citing the above work. 18 | 19 | ## Usage instructions: 20 | 21 | 22 | Dependencies: [OpenCV 4.0.1+](http://www.opencv.org/), [Tensorflow 1.8+](http://tensorflow.org), [Keras 2.2.3+](http://keras.io/), and [faiss](https://github.com/facebookresearch/faiss/). 23 | 24 | * Requires a webcam. 25 | * Tested on Ubuntu Linux 16.04, 18.04 and Windows 10 with and without a GPU. 26 | * Install OpenCV 4.0.1 or newer. Recommended to install with `pip3 install opencv-python` (includes GTK support, which is required). Freetype support for nicer fonts requires manual compilation of OpenCV. 27 | * Install Tensorflow (1.8 or newer). On a CPU, the MKL version seems to be radically faster than others (Anaconda install by smth like `conda install tensorflow=1.10.0=mkl_py36hb361250_0`. Seek for proper versions with `conda search tensorflow`.). On GPU, use `pip3 install tensorflow-gpu`. 28 | * Install Keras 2.2.3 (or newer). Earlier versions have a slightly different way of loading the models. For example: `pip3 install keras`. 29 | * Install dlib (version 19.4 or newer) with python 3 dependencies; _e.g.,_ `pip3 install dlib`. 30 | * Install faiss with Anaconda `conda install faiss-cpu -c pytorch`. 31 | * Run with `python3 EstimateAge.py`. 32 | 33 | [Required deep learning models and celebrity dataset](http://doi.org/10.5281/zenodo.3466980). Extract directly to the main folder so that 2 new folders are created there. 34 | 35 | [Example video](https://youtu.be/Kfe5hKNwrCU). 36 | 37 | Contributors: [Heikki Huttunen](http://www.cs.tut.fi/~hehu/), Janne Tommola 38 | -------------------------------------------------------------------------------- /RecognitionThread.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import threading 4 | import time 5 | import numpy as np 6 | import os 7 | from collections import namedtuple 8 | import cv2 9 | import keras 10 | from keras.utils.generic_utils import CustomObjectScope 11 | from compute_features import lifted_struct_loss, triplet_loss 12 | import h5py 13 | import faiss 14 | 15 | 16 | class RecognitionThread(threading.Thread): 17 | 18 | CELEB_RECOG_BUFFER = 15 # How many recognitions to store for picking the most common 19 | 20 | def __init__(self, parent, params): 21 | print("Initializing recognition thread...") 22 | threading.Thread.__init__(self) 23 | self.parent = parent 24 | 25 | ##### Initialize aligners for face alignment. 26 | aligner_path = params.get("recognition", "aligner") 27 | aligner_targets_path = params.get("recognition", "aligner_targets") 28 | self.aligner = keras.models.load_model(aligner_path) 29 | self.aligner._make_predict_function() 30 | self.aligner_input_shape = (self.aligner.input_shape[2], self.aligner.input_shape[1]) 31 | 32 | # load targets 33 | aligner_targets = np.loadtxt(aligner_targets_path) 34 | left_eye = (aligner_targets[36] + aligner_targets[39]) / 2 35 | right_eye = (aligner_targets[42] + aligner_targets[45]) / 2 36 | nose = aligner_targets[30] 37 | left_mouth = aligner_targets[48] 38 | right_mouth = aligner_targets[54] 39 | # Dlib order 40 | #self.shape_targets = np.stack((left_eye, left_mouth, nose, right_eye, right_mouth)) 41 | # CNN order 42 | self.shape_targets = np.stack((left_eye, right_eye, nose, left_mouth, right_mouth)) 43 | 44 | ##### Initialize networks for Age, Gender and Expression 45 | ##### 1. AGE, GENDER, SMILE MULTITASK 46 | print("Initializing multitask network...") 47 | multitaskpath = params.get("recognition", "multitask_folder") 48 | with CustomObjectScope({'relu6': keras.layers.ReLU(6.), 49 | 'DepthwiseConv2D': keras.layers.DepthwiseConv2D}): 50 | self.multiTaskNet = keras.models.load_model(os.path.join(multitaskpath, 'model.h5')) 51 | self.multiTaskNet._make_predict_function() 52 | 53 | ##### Read class names 54 | self.expressions = {int(key): val for key, val in params['expressions'].items()} # convert string key to int 55 | self.minDetections = int(params.get("recognition", "mindetections")) 56 | 57 | ##### 2. CELEBRITY 58 | self.siamesepaths = params['celebmodels'] 59 | self.siamesepath = self.siamesepaths["0"] 60 | self.celeb_dataset = params.get("recognition", "celeb_dataset") 61 | self.visualization_path = params.get("recognition", "visualization_path") 62 | self.initialize_celeb() 63 | 64 | # Starting the thread 65 | self.switching_model = False 66 | self.recognition_running = False 67 | print("Recognition thread started...") 68 | 69 | def initialize_celeb(self): 70 | print("Initializing celebrity network...") 71 | 72 | with CustomObjectScope({'relu6': keras.layers.ReLU(6.), 73 | 'DepthwiseConv2D': keras.layers.DepthwiseConv2D, 74 | 'lifted_struct_loss': lifted_struct_loss, 75 | 'triplet_loss': triplet_loss}): 76 | self.siameseNet = keras.models.load_model(os.path.join(self.siamesepath, "feature_model.h5")) 77 | 78 | self.siameseNet._make_predict_function() 79 | 80 | ##### Read celebrity features 81 | celebrity_features = self.siamesepath + os.sep + "features_" + self.celeb_dataset + ".h5" 82 | print("Reading celebrity data from {}...".format(celebrity_features)) 83 | 84 | with h5py.File(celebrity_features, "r") as h5: 85 | celeb_features = np.array(h5["features"]).astype(np.float32) 86 | self.path_ends = list(h5["path_ends"]) 87 | self.celeb_files = [os.path.join(self.visualization_path, s.decode("utf-8")) for s in self.path_ends] 88 | 89 | print("Building index...") 90 | self.celeb_index = faiss.IndexFlatL2(celeb_features.shape[1]) 91 | self.celeb_index.add(celeb_features) 92 | 93 | def crop_face(self, img, rect, margin=0.2): 94 | 95 | x,y,w,h = rect 96 | x1 = x 97 | x2 = x + w 98 | y1 = y 99 | y2 = y + h 100 | 101 | # Extend the area into square shape: 102 | if w > h: 103 | center = int(0.5 * (y1 + y2)) 104 | h = w 105 | y1 = center - int(h / 2) 106 | y2 = y1 + h 107 | elif h > w: 108 | center = int(0.5 * (x1 + x2)) 109 | w = h 110 | x1 = center - int(w / 2) 111 | x2 = x1 + w 112 | 113 | # add margin 114 | full_crop_x1 = x1 - int(w * margin) 115 | full_crop_y1 = y1 - int(h * margin) 116 | full_crop_x2 = x2 + int(w * margin) 117 | full_crop_y2 = y2 + int(h * margin) 118 | # size of face with margin 119 | new_size_w = full_crop_x2 - full_crop_x1 + 1 120 | new_size_h = full_crop_y2 - full_crop_y1 + 1 121 | 122 | # ensure that the region cropped from the original image with margin 123 | # doesn't go beyond the image size 124 | crop_x1 = max(full_crop_x1, 0) 125 | crop_y1 = max(full_crop_y1, 0) 126 | crop_x2 = min(full_crop_x2, img.shape[1] - 1) 127 | crop_y2 = min(full_crop_y2, img.shape[0] - 1) 128 | # size of the actual region being cropped from the original image 129 | crop_size_w = crop_x2 - crop_x1 + 1 130 | crop_size_h = crop_y2 - crop_y1 + 1 131 | 132 | # coordinates of region taken out of the original image in the new image 133 | new_location_x1 = crop_x1 - full_crop_x1 134 | new_location_y1 = crop_y1 - full_crop_y1 135 | new_location_x2 = crop_x1 - full_crop_x1 + crop_size_w - 1 136 | new_location_y2 = crop_y1 - full_crop_y1 + crop_size_h - 1 137 | 138 | new_img = np.random.randint(256, size=(new_size_h, new_size_w, img.shape[2])).astype('uint8') 139 | 140 | new_img[new_location_y1: new_location_y2 + 1, new_location_x1: new_location_x2 + 1, :] = \ 141 | img[crop_y1:crop_y2 + 1, crop_x1:crop_x2 + 1, :] 142 | 143 | # if margin goes beyond the size of the image, repeat last row of pixels 144 | if new_location_y1 > 0: 145 | new_img[0:new_location_y1, :, :] = np.tile(new_img[new_location_y1, :, :], (new_location_y1, 1, 1)) 146 | 147 | if new_location_y2 < new_size_h - 1: 148 | new_img[new_location_y2 + 1:new_size_h, :, :] = np.tile(new_img[new_location_y2:new_location_y2 + 1, :, :], 149 | (new_size_h - new_location_y2 - 1, 1, 1)) 150 | if new_location_x1 > 0: 151 | new_img[:, 0:new_location_x1, :] = np.tile(new_img[:, new_location_x1:new_location_x1 + 1, :], 152 | (1, new_location_x1, 1)) 153 | if new_location_x2 < new_size_w - 1: 154 | new_img[:, new_location_x2 + 1:new_size_w, :] = np.tile(new_img[:, new_location_x2:new_location_x2 + 1, :], 155 | (1, new_size_w - new_location_x2 - 1, 1)) 156 | 157 | return new_img 158 | 159 | def five_points_aligner(self, shape_targets, landmarks_pred, img, rect): 160 | 161 | B = shape_targets 162 | A = np.hstack((np.array(landmarks_pred), np.ones((len(landmarks_pred), 1)))) 163 | 164 | a = np.row_stack((np.array([-A[0][1], -A[0][0], 0, -1]), np.array([ 165 | A[0][0], -A[0][1], 1, 0]))) 166 | b = np.row_stack((-B[0][1], B[0][0])) 167 | 168 | for i in range(A.shape[0] - 1): 169 | i += 1 170 | a = np.row_stack((a, np.array([-A[i][1], -A[i][0], 0, -1]))) 171 | a = np.row_stack((a, np.array([A[i][0], -A[i][1], 1, 0]))) 172 | b = np.row_stack((b, np.array([[-B[i][1]], [B[i][0]]]))) 173 | 174 | X, res, rank, s = np.linalg.lstsq(a, b, rcond=-1) 175 | cos = (X[0][0]).real.astype(np.float32) 176 | sin = (X[1][0]).real.astype(np.float32) 177 | t_x = (X[2][0]).real.astype(np.float32) 178 | t_y = (X[3][0]).real.astype(np.float32) 179 | 180 | H = np.array([[cos, -sin, t_x], [sin, cos, t_y]]) 181 | s = np.linalg.eigvals(H[:, :-1]) 182 | R = s.max() / s.min() 183 | 184 | if R < 2.0: 185 | warped = cv2.warpAffine(img, H, (224, 224)) 186 | else: 187 | # Seems to distort too much, probably error in landmarks 188 | # Let's just crop. 189 | crop = self.crop_face(img, rect) 190 | warped = cv2.resize(crop, (224, 224)) 191 | 192 | return warped 193 | 194 | def aligner_preprocess(self, img): 195 | # RGB -> BGR 196 | 197 | x = img[..., ::-1].astype(np.float32) 198 | 199 | x[..., 0] -= 103.939 200 | x[..., 1] -= 116.779 201 | x[..., 2] -= 123.68 202 | 203 | return x 204 | 205 | def run(self): 206 | Celebinfo = namedtuple('Celeb', ['filename', 'distance']) 207 | 208 | while not self.parent.isTerminated(): 209 | 210 | while self.switching_model: 211 | self.recognition_running = False 212 | time.sleep(0.1) 213 | 214 | self.recognition_running = True 215 | 216 | faces = self.parent.getFaces() 217 | while faces == None: 218 | time.sleep(0.1) 219 | faces = self.parent.getFaces() 220 | 221 | validFaces = [f for f in faces if len(f['bboxes']) > self.minDetections] 222 | 223 | for face in validFaces: 224 | # get the timestamp of the most recent frame: 225 | timestamp = face['timestamps'][-1] 226 | unit = self.parent.getUnit(self, timestamp) 227 | 228 | if unit is not None: 229 | img = unit.getFrame() 230 | mean_box = np.mean(face['bboxes'], axis=0) 231 | x, y, w, h = [int(c) for c in mean_box] 232 | 233 | # Align the face to match the targets 234 | 235 | # 1. DETECT LANDMARKS 236 | crop = img[y : y+h, x : x+w, ::-1].astype(np.uint8) # Crop face and convert BGR to RGB (which preprocess will convert back to BGR --- TODO: clean up) 237 | 238 | if crop.size == 0: 239 | continue 240 | 241 | landmarks_crop = cv2.resize(crop, self.aligner_input_shape) 242 | landmarks_crop = self.aligner_preprocess(landmarks_crop) 243 | net_input = landmarks_crop[np.newaxis, ...].astype(np.float32) 244 | 245 | s = self.aligner.predict(net_input)[0] 246 | landmarks = s.reshape((5, 2)) 247 | 248 | # Normalize landmarks to the full image coordinates: 249 | landmarks[:, 0] = x + landmarks[:, 0] * w / self.aligner_input_shape[0] 250 | landmarks[:, 1] = y + landmarks[:, 1] * h / self.aligner_input_shape[1] 251 | 252 | if "landmarks" in face: 253 | face["landmarks"].append(landmarks) 254 | else: 255 | face["landmarks"] = [landmarks] 256 | 257 | landmarks = np.array(face["landmarks"][-10:]).mean(axis = 0) 258 | 259 | # 2. ALIGN 260 | crop = self.five_points_aligner(self.shape_targets, landmarks, img, rect = [x,y,w,h]) 261 | cv2.imwrite("rec/%d.jpg" % np.random.randint(0, 1000), crop) 262 | 263 | # Save aligned face crop, used for debugging if turned on. 264 | face["crop"] = crop[..., ::-1] 265 | 266 | crop = crop.astype(np.float32) 267 | 268 | siamese_target_size = self.siameseNet.input_shape[1:3] 269 | crop_celeb = cv2.resize(crop, siamese_target_size).astype(np.float32) 270 | 271 | # Preprocess network inputs, add singleton batch dimension 272 | recog_input = np.expand_dims(crop / 255, axis=0) 273 | siamese_input = np.expand_dims(crop_celeb / 255, axis=0) 274 | 275 | # Recognize age, gender and smile in one forward pass 276 | 277 | ageout, genderout, smileout = self.multiTaskNet.predict(recog_input) 278 | age = np.dot(ageout[0], list(range(101))) 279 | if "age" in face: 280 | face["age"] = 0.95 * face["age"] + 0.05 * age 281 | else: 282 | face["age"] = age 283 | face["recog_round"] = 0 284 | 285 | gender = genderout[0][1] # male probability 286 | if "gender" in face: 287 | face["gender"] = 0.8 * face["gender"] + 0.2 * gender 288 | else: 289 | face["gender"] = gender 290 | 291 | t = smileout[0] 292 | t = np.argmax(t) 293 | expression = self.expressions[t] 294 | face["expression"] = expression 295 | 296 | # Find closest celebrity match if new face or once every 5 rounds 297 | if "celebs" not in face or face["recog_round"] % 5 == 0: 298 | siamese_features = self.siameseNet.predict(siamese_input) 299 | K = 1 # This many nearest matches 300 | celeb_distance, I = self.celeb_index.search(siamese_features, K) 301 | celeb_idx = I[0][0] 302 | celeb_filename = self.celeb_files[celeb_idx] 303 | 304 | if "celebs" in face: 305 | celebs = face["celebs"] 306 | recognitions = celebs["recognitions"] 307 | 308 | # Maintain a buffer of closest matches and pick the most common one for stability 309 | if recognitions < RecognitionThread.CELEB_RECOG_BUFFER: 310 | celebs["indexes"].append(celeb_idx) 311 | else: 312 | celebs["indexes"][recognitions % RecognitionThread.CELEB_RECOG_BUFFER] = celeb_idx 313 | 314 | celebs[celeb_idx] = Celebinfo(filename=celeb_filename, distance=celeb_distance) 315 | celebs["recognitions"] += 1 316 | else: 317 | face["celebs"] = { 318 | "indexes": [celeb_idx], 319 | celeb_idx: Celebinfo(filename=celeb_filename, distance=celeb_distance), 320 | "recognitions": 1} 321 | 322 | face["recog_round"] += 1 323 | 324 | # Support for switching celebrity model on the fly 325 | def switch_model(self, modelidx): 326 | 327 | self.siamesepath = self.siamesepaths[modelidx] 328 | 329 | print("Switching to", self.siamesepath) 330 | print("Stopping recognition thread...") 331 | self.switching_model = True 332 | 333 | # Wait for recognition thread to finish and stop before changing 334 | while self.recognition_running: 335 | time.sleep(0.1) 336 | 337 | self.initialize_celeb() 338 | 339 | print("Switching model complete. Resuming recognition thread...") 340 | self.switching_model = False 341 | 342 | def print_models(self): 343 | idx = 0 344 | while str(idx) in self.siamesepaths: 345 | desc = self.siamesepaths.get("{}_desc".format(idx), "") 346 | modelpath = self.siamesepaths[str(idx)] 347 | currentindicator = "<----- CURRENT MODEL" if modelpath == self.siamesepath else "" 348 | if desc: 349 | print("{}: {}, {} {}".format(idx, modelpath, desc, currentindicator)) 350 | else: 351 | print("{}: {} {}".format(idx, modelpath, currentindicator)) 352 | idx += 1 353 | 354 | -------------------------------------------------------------------------------- /UnitServer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Nov 2 15:36:48 2016 4 | 5 | @author: agedemo 6 | """ 7 | 8 | import DetectionThread 9 | import RecognitionThread 10 | import threading 11 | 12 | class UnitServer(): 13 | 14 | def __init__(self, maxUnits = 4): 15 | 16 | self.maxUnits = maxUnits 17 | self.units = [] 18 | self.mutex = threading.Lock() 19 | 20 | def getUnit(self, caller, timestamp = None): 21 | 22 | self.mutex.acquire() 23 | 24 | # Detection thread will receive the newest undetected frame 25 | 26 | unit = None 27 | 28 | if timestamp is not None: 29 | 30 | for f in self.units: 31 | if f.getTimeStamp() == timestamp: 32 | unit = f 33 | 34 | else: 35 | 36 | if isinstance(caller, DetectionThread.DetectionThread): 37 | 38 | validUnits = [f for f in self.units if f.isDetected() == False] 39 | 40 | if len(validUnits) == 0: 41 | unit = None 42 | else: 43 | unit = validUnits[-1] 44 | unit.acquire() 45 | unit.setDetected() 46 | 47 | #print("Locking %.6f for %s" % (unit.getTimeStamp(), str(type(caller)))) 48 | 49 | # Age thread will receive the newest detected frame with age rec not done 50 | 51 | if isinstance(caller, RecognitionThread.RecognitionThread): 52 | 53 | validUnits = [f for f in self.units if 54 | f.isDetected() == True and 55 | f.isAgeRecognized() == False] 56 | 57 | if len(validUnits) == 0: 58 | unit = None 59 | else: 60 | unit = validUnits[-1] 61 | unit.acquire() 62 | unit.setDetected() 63 | 64 | # print("Locking %.6f for %s" % (unit.getTimeStamp(), str(type(caller)))) 65 | 66 | 67 | self.mutex.release() 68 | 69 | return unit 70 | 71 | def putUnit(self, unit): 72 | 73 | self.mutex.acquire() 74 | 75 | #print "Adding %.6f" % (unit.getTimeStamp()) 76 | 77 | if len(self.units) >= self.maxUnits: 78 | # Attempt to remove oldest unit 79 | if self.units[0].isFree(): 80 | self.units.pop(0) 81 | 82 | if len(self.units) < self.maxUnits: 83 | self.units.append(unit) 84 | else: 85 | #print("Unable to add new unit.") 86 | pass 87 | 88 | # for i,unit in enumerate(self.units): 89 | # print("Unit %.6f: numProcesses: %d" % (unit.getTimeStamp(), unit.getNumProcesses())) 90 | # print "=" * 5 91 | 92 | self.mutex.release() 93 | -------------------------------------------------------------------------------- /Verdana.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahehu/TUT-live-age-estimator/508e5745d858eede98cf70ac305425d5d618e339/Verdana.ttf -------------------------------------------------------------------------------- /compute_features.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Oct 15 15:47:04 2018 4 | 5 | @author: agedemo 6 | """ 7 | 8 | from keras.models import load_model 9 | import h5py 10 | import cv2 11 | import sys 12 | import numpy as np 13 | import time 14 | import os 15 | import tensorflow as tf 16 | from keras import backend as K 17 | 18 | visualize = True 19 | if visualize: 20 | import matplotlib.pyplot as plt 21 | 22 | def find_images_from_tree(path): 23 | """ Collect images from a tree with one folder per identity """ 24 | 25 | print("Searching for images in {}".format(path)) 26 | image_files = [] 27 | 28 | for root, dirs, files in os.walk(path): 29 | for name in files: 30 | if name.lower().endswith(("jpg", "jpeg", "png", "bmp")): 31 | image_files.append(root + os.sep + name) 32 | 33 | return image_files 34 | 35 | def find_images(path): 36 | """ Collect one image per identity """ 37 | 38 | found_ids = [] 39 | files = [] 40 | 41 | top_folder = os.sep.join(path.split(os.sep)[:-1]) 42 | identity_file = top_folder + os.sep + "identities.txt" 43 | 44 | with open(identity_file) as fp: 45 | for i, line in enumerate(fp): 46 | 47 | name, identity = line.split() 48 | identity = int(identity) 49 | 50 | if identity not in found_ids: 51 | found_ids.append(identity) 52 | fullfile = os.path.abspath(path + os.sep + os.path.basename(name)) 53 | 54 | if not os.path.isfile(fullfile): 55 | print("File {} not found, ignoring.".format(fullfile)) 56 | else: 57 | files.append(fullfile) 58 | 59 | return files 60 | 61 | def triplet_semihard_loss(y_true, y_pred): 62 | 63 | loss = tf.contrib.losses.metric_learning.triplet_semihard_loss(labels = K.argmax(y_true, axis = -1), embeddings = y_pred, margin = 1.0) 64 | return loss 65 | 66 | def cluster_loss(y_true, y_pred): 67 | 68 | loss = tf.contrib.losses.metric_learning.cluster_loss(labels = K.argmax(y_true, axis = -1), embeddings = y_pred, margin_multiplier = 1.0) 69 | return loss 70 | 71 | def triplet_loss(y_true, y_pred): 72 | 73 | loss = tf.contrib.losses.metric_learning.triplet_semihard_loss(labels = K.argmax(y_true, axis = -1), embeddings = y_pred, margin = 1.0) 74 | return loss 75 | 76 | def lifted_struct_loss(y_true, y_pred): 77 | 78 | loss = tf.contrib.losses.metric_learning.lifted_struct_loss(labels = K.argmax(y_true, axis = -1), embeddings = y_pred, margin = 1.0) 79 | return loss 80 | 81 | if __name__ == "__main__": 82 | 83 | if len(sys.argv) > 1: 84 | model_folder = sys.argv[1] 85 | else: 86 | model_folder = "recognizers/celebrities/network/seresnet18_STRUCT_2019-04-03-02-56-41" 87 | 88 | model = load_model(model_folder + os.sep + "feature_model.h5", custom_objects = {'triplet_loss': triplet_loss, 'lifted_struct_loss': lifted_struct_loss, 'cluster_loss': cluster_loss}) 89 | 90 | if len(sys.argv) > 2: 91 | images_folder = sys.argv[2] 92 | else: 93 | images_folder = "recognizers/celebrities/data/CelebA/CelebA_aligned_5points" 94 | 95 | images_folder = os.path.abspath(images_folder) 96 | files = find_images(path = images_folder) 97 | #files = find_images_from_tree(path = images_folder) 98 | 99 | # Gather the file structure of the dataset, used when visualizing with different images than the ones features are calculated from 100 | commonpath = os.path.commonpath((files[0], images_folder)) 101 | path_ends = [os.path.relpath(file, start=commonpath) for file in files] 102 | 103 | in_shape = model.input_shape[1:3] 104 | out_dim = model.output_shape[-1] 105 | 106 | features = np.empty((len(files), out_dim)) 107 | 108 | print("Found {} files...".format(len(files))) 109 | 110 | if visualize: 111 | fig, ax = plt.subplots(2, 1) 112 | 113 | start_time = time.time() 114 | buf_size = 64 # MUST BE MANUALLY DECREASED IF THERE ARE FEWER IMAGES! 115 | fb_shape = (buf_size, ) + model.input_shape[1:] 116 | frame_buffer = np.empty(fb_shape, dtype = np.float32) 117 | fb_idx = 0 118 | 119 | cnt = 0 120 | prev_sample = None 121 | 122 | for i, name in enumerate(files): 123 | 124 | print(name) 125 | img = cv2.imread(name) 126 | 127 | # Take center crop and scale to in-shape 128 | h, w, d = img.shape 129 | 130 | if w > h: 131 | c = w // 2 132 | x1 = c - h // 2 133 | x2 = x1 + h 134 | img = img[:, x1:x2, :] 135 | elif w < h: 136 | c = h // 2 137 | y1 = c - w // 2 138 | y2 = y1 + w 139 | img = img[y1:y2, :, :] 140 | img = cv2.resize(img, in_shape) 141 | 142 | # Convert to RGB and scale 143 | img = img[..., ::-1].astype(np.float32) / 255.0 144 | frame_buffer[fb_idx, ...] = img 145 | fb_idx += 1 146 | 147 | if fb_idx == buf_size: 148 | 149 | feat = model.predict(frame_buffer) 150 | elapsed_time = time.time() - start_time 151 | sec_per_frame = elapsed_time / (i+1) 152 | remaining_frames = len(files) - (i+1) 153 | remaining_time = remaining_frames * sec_per_frame 154 | remaining_time_mins = remaining_time / 60 155 | 156 | msg = "Computing features: {:.1f} % done [{:.1f} MB]. {:.1f} mins remaining".format(100*(i+1) / len(files), 157 | sys.getsizeof(features) / 1024**2, 158 | remaining_time_mins) 159 | 160 | print(msg, end = " ") 161 | print("File {}".format(name)) 162 | 163 | if visualize: 164 | 165 | ax[0].cla() 166 | 167 | f = feat[0,...] 168 | ax[0].plot(f) 169 | ax[0].set_title(msg) 170 | 171 | if prev_sample is not None: 172 | ax[1].cla() 173 | ax[1].plot(f - prev_sample) 174 | ax[1].set_title("Difference to previous sample") 175 | 176 | plt.show(block = False) 177 | plt.pause(0.1) 178 | prev_sample = f 179 | 180 | fb_idx = 0 181 | else: 182 | continue 183 | 184 | features[cnt : cnt + feat.shape[0], :] = feat 185 | cnt += feat.shape[0] 186 | 187 | with h5py.File(model_folder + os.sep + "features_" + os.path.basename(os.path.normpath(images_folder)) + ".h5", "w") as h5file: 188 | h5file["features"] = np.array(features) 189 | b_files = [bytes(f, 'utf-8') for f in files] 190 | h5file["filenames"] = b_files 191 | b_pathends = [bytes(f, 'utf-8') for f in path_ends] 192 | h5file["path_ends"] = b_pathends 193 | 194 | -------------------------------------------------------------------------------- /config.ini: -------------------------------------------------------------------------------- 1 | [general] 2 | # Visualize aligned crop of face detection 3 | debug = True 4 | 5 | [camera] 6 | id = 0 7 | resolution = 1024x768 8 | flip_horizontal = 1 9 | 10 | [window] 11 | caption = TUT Age Estimator 12 | displaysize = 1200x900 13 | freetype_fontpath = Verdana.ttf 14 | 15 | [server] 16 | num_frames = 8 17 | 18 | [recognition] 19 | multitask_folder=recognizers/multitask/MOBILENET_2019-04-09-08-43-46 20 | 21 | celeb_dataset determines the features loaded (ie. features_.h5) 22 | #celeb_dataset = FinnishCelebs_aligned_YueMethod 23 | #visualization_path = recognizers/celebrities/data/visualization_FinnishCelebs_unprocessed 24 | 25 | celeb_dataset = CelebA_aligned_5points 26 | visualization_path = recognizers/celebrities/data/CelebA/CelebA_aligned_5points 27 | 28 | mindetections = 2 29 | aligner = recognizers/alignment/model-056-0.716316-0.967865.h5 30 | aligner_targets = recognizers/alignment/targets_symm.txt 31 | 32 | [celebmodels] 33 | # Top model is the default, rest can be changed to while the program is running 34 | 0 = recognizers/celebrities/network/seresnet18_STRUCT_2019-04-03-02-56-41 35 | 0_desc = Default (squeeze-excite Resnet18), aligned with 5 point method 36 | ;1 = recognizers/celebrities/network/resnet34_STRUCT_2019-04-03-02-14-26 37 | ;1_desc = Alternative (Resnet 34), aligned with 5 point method 38 | 39 | [expressions] 40 | 0 = No smile 41 | 1 = Smile 42 | # Current multitask model only supports the two above expressions. 43 | ;2 = Sad 44 | ;3 = Surprise 45 | ;4 = Fear 46 | ;5 = Disgust 47 | ;6 = Anger 48 | 49 | [detection] 50 | input_width = 240 51 | input_height = 180 52 | inference_graph = detection/240x180_depth075_ssd_mobilenetv1/frozen_inference_graph.pb 53 | text_graph = detection/240x180_depth075_ssd_mobilenetv1/graph.pbtxt 54 | 55 | -------------------------------------------------------------------------------- /doc/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahehu/TUT-live-age-estimator/508e5745d858eede98cf70ac305425d5d618e339/doc/architecture.png -------------------------------------------------------------------------------- /doc/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahehu/TUT-live-age-estimator/508e5745d858eede98cf70ac305425d5d618e339/doc/demo.jpg -------------------------------------------------------------------------------- /tools/generate_celeb_visualizations.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import cv2 5 | 6 | """ 7 | Generates pre-scaled output images from source images, that will be shown in the application as celebrity match. 8 | The use case for this is that you probably want to compare aligned face features to aligned celebrity 9 | features. They might not look very pleasant though, so this gives the option to use unprocessed images in visualization. 10 | 11 | The usage of large source images can slow down the program heavily (reading and resizing the image), so the purpose of this 12 | script is to adjust them to a small but still pleasant looking size. 13 | """ 14 | 15 | output_size = 300 # A compromise to preserve image quality while getting rid of huge images that would slow down the 16 | # live-age-estimator. The output images are resized further in the application, hopefully to a smaller size than this. 17 | 18 | if __name__ == '__main__': 19 | path = "../recognizers/celebrities/data/FinnishCelebs_unprocessed" 20 | basepath, celebfolder = os.path.split(path) 21 | os.makedirs(os.path.join(basepath, "visualization_" + celebfolder), exist_ok=True) 22 | 23 | for root, dirs, files in os.walk(path): 24 | for filename in files: 25 | if filename.lower().endswith(("jpg", "jpeg", "png", "bmp")): 26 | celebname = os.path.split(root)[1] 27 | 28 | img = cv2.imread(root + os.sep + filename) 29 | newpath = os.path.join(basepath, "visualization_" + celebfolder, celebname) 30 | os.makedirs(newpath, exist_ok=True) 31 | 32 | w, h = img.shape[0:2] 33 | 34 | if w >= h: 35 | new_h = output_size 36 | new_w = int(output_size*w/h) 37 | else: 38 | new_w = output_size 39 | new_h = int(output_size*h/w) 40 | 41 | img = cv2.resize(img, (new_h, new_w), interpolation=cv2.INTER_AREA) 42 | cv2.imwrite(os.path.join(newpath + os.sep + filename), img) 43 | 44 | 45 | --------------------------------------------------------------------------------