├── .gitignore
├── ControllerThread.py
├── DetectionThread.py
├── EstimateAge.py
├── GrabUnit.py
├── GrabberThread.py
├── LICENSE
├── README.md
├── RecognitionThread.py
├── UnitServer.py
├── Verdana.ttf
├── compute_features.py
├── config.ini
├── doc
    ├── architecture.png
    └── demo.jpg
└── tools
    └── generate_celeb_visualizations.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/*
2 | __pycache__/*
3 | detection/*
4 | recognizers/*
5 | 


--------------------------------------------------------------------------------
/ControllerThread.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Nov  2 15:44:40 2016
  4 | 
  5 | @author: agedemo
  6 | """
  7 | 
  8 | from UnitServer import UnitServer
  9 | from GrabberThread import GrabberThread
 10 | from DetectionThread import DetectionThread
 11 | from RecognitionThread import RecognitionThread
 12 | 
 13 | import threading
 14 | import time
 15 | import sys
 16 | import cv2
 17 | import copy
 18 | import os
 19 | import numpy as np
 20 | import subprocess
 21 | 
 22 | class ControllerThread(threading.Thread):
 23 |     """ Responsible for starting and shutting down all threads and
 24 |         services. """
 25 |         
 26 |     def __init__(self, params):
 27 |         threading.Thread.__init__(self)
 28 | 
 29 |         self.terminated = False
 30 |         self.caption = params.get("window", "caption")
 31 | 
 32 |         self.initializeFonts(params)
 33 | 
 34 |         self.minDetections = int(params.get("recognition", "mindetections"))
 35 |         
 36 |         self.displaysize = params.get("window", "displaysize")
 37 |         self.displaysize = self.displaysize.upper().split("X")
 38 |         self.displaysize = tuple([int(s) for s in self.displaysize])
 39 | 
 40 |         self.debug = params.get("general", "debug") not in ("false", "False", "0")
 41 | 
 42 |         # Get current resolution (only implemented for Linux)
 43 |         if sys.platform == 'linux':
 44 |             self.resolution = subprocess.Popen('xrandr | grep "\*" | cut -d" " -f4', shell=True,
 45 |                                                stdout=subprocess.PIPE).communicate()[0].decode("utf-8").rstrip().split('x')
 46 |             self.resolution = [int(s) for s in self.resolution]
 47 |         else:
 48 |             self.resolution = [1024, 768]
 49 | 
 50 |         # Start frame storage
 51 |         queueLength = params.getint("server", "num_frames")
 52 |         self.unitServer = UnitServer(queueLength)
 53 | 
 54 |         # Start Grabber thread
 55 |         self.grabberThread = GrabberThread(self, params)
 56 |         self.grabberThread.start()
 57 |         
 58 |         # Start Detection thread
 59 |         self.faces = []
 60 |         self.detectionThread = DetectionThread(self, params)
 61 |         self.detectionThread.start()
 62 | 
 63 |         # Start Recognition Thread
 64 |         self.recognitionThread = RecognitionThread(self, params)
 65 |         self.recognitionThread.start()
 66 | 
 67 |         unused_width = self.resolution[0] - self.displaysize[0]
 68 | 
 69 |         cv2.moveWindow(self.caption, unused_width//2, 0)  # Will move window to center when everything is running.
 70 | 
 71 |         self.commandInterface()
 72 | 
 73 |     def commandInterface(self):
 74 |         while True:
 75 |             text = input("Enter command (Q)uit, (L)ist models, (S)witch model: ").lower()
 76 |             if text == "l":
 77 |                 self.recognitionThread.print_models()
 78 |             elif text == "s":
 79 |                 idx = input("Please input a new index: ")
 80 |                 try:
 81 |                     self.recognitionThread.switch_model(idx)
 82 |                 except KeyError as e:
 83 |                     print("No such model index", e)
 84 | 
 85 |             elif text == "q":
 86 |                 print("Bye!")
 87 |                 self.terminate()
 88 |                 break
 89 | 
 90 |     def initializeFonts(self, params):
 91 |         """
 92 |         Tries to initialize freetype for nicer fonts, if not installed fall back to standard.
 93 |         Freetype isn't included in the PIP/Conda packages, so we can't require it.
 94 |         """
 95 |         self.freeType = None
 96 |         freetype_fontpath = params.get("window", "freetype_fontpath")
 97 |         sizetest_text = "FEMALE 100%"  # Probably longest text possible
 98 |         try:
 99 |             self.freeType = cv2.freetype.createFreeType2()
100 |             self.freeType.loadFontData(fontFileName=freetype_fontpath, id=0)
101 |             self.textBaseScale = 20  # Maximum text scale, will be decreased if there's overlap.
102 |             self.textBaseWidth = self.freeType.getTextSize(sizetest_text, self.textBaseScale, -1)[0][0]
103 | 
104 |         except AttributeError:
105 |             print("OpenCV Freetype not found, falling back to standard OpenCV font...")
106 |             self.textBaseScale = 0.6
107 |             self.textBaseWidth = cv2.getTextSize(sizetest_text, cv2.FONT_HERSHEY_SIMPLEX, self.textBaseScale, 2)[0][0]
108 | 
109 | 
110 |     def run(self):
111 |         while not self.terminated:
112 |             time.sleep(0.5)
113 |                         
114 |     def putUnit(self, unit):
115 |         
116 |         # Show the newest frame immediately.
117 |         self.showVideo(unit)
118 |         
119 |         # Send to further processing
120 |         if not self.terminated:
121 |             self.unitServer.putUnit(unit)
122 |         
123 |     def getUnit(self, caller, timestamp = None):
124 |         
125 |         return self.unitServer.getUnit(caller, timestamp)
126 |     
127 |     def terminate(self):
128 |         
129 |         self.terminated = True
130 |        
131 |     def drawBoundingBox(self, img, bbox):
132 | 
133 |         x,y,w,h = [int(c) for c in bbox]
134 |         
135 |         m = 0.2
136 |         
137 |         # Upper left corner
138 |         pt1 = (x,y)
139 |         pt2 = (int(x + m*w), y)
140 |         cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2)
141 | 
142 |         pt1 = (x,y)
143 |         pt2 = (x, int(y + m*h))
144 |         cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2)
145 | 
146 |         # Upper right corner
147 |         pt1 = (x + w, y)
148 |         pt2 = (x + w, int(y + m*h))
149 |         cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2)
150 | 
151 |         pt1 = (x + w, y)
152 |         pt2 = (int(x + w - m * w), y)
153 |         cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2)
154 |         
155 |         # Lower left corner
156 |         pt1 = (x, y + h)
157 |         pt2 = (x, int(y + h - m*h))
158 |         cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2)
159 | 
160 |         pt1 = (x, y + h)
161 |         pt2 = (int(x + m * w), y + h)
162 |         cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2)
163 |                 
164 |         # Lower right corner
165 |         pt1 = (x + w, y + h)
166 |         pt2 = (x + w, int(y + h - m*h))
167 |         cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2)
168 | 
169 |         pt1 = (x + w, y + h)
170 |         pt2 = (int(x + w - m * w), y + h)
171 |         cv2.line(img, pt1, pt2, color = [255,255,0], thickness = 2)
172 | 
173 |     def AddCeleb(self, face, img, x, y, w, h):
174 | 
175 |         celebs = face["celebs"]
176 |         indexes = celebs["indexes"]
177 |         most_common = max(set(indexes), key=indexes.count)
178 | 
179 |         filename = celebs[most_common].filename
180 |         distance = celebs[most_common].distance
181 |         identity = filename.split(os.sep)[-2]
182 | 
183 |         celeb_img = cv2.imread(filename)
184 |         aspect_ratio = celeb_img.shape[1] / celeb_img.shape[0]
185 |         new_w = w
186 |         new_h = int(w/aspect_ratio)
187 |         if new_h > h:
188 |             new_h = h
189 |         try:
190 |             celeb_img = cv2.resize(celeb_img, (new_w, new_h), interpolation=cv2.INTER_AREA)
191 |         except AssertionError:  # new_w or new_h is 0 ie bounding box size is 0
192 |             return None
193 | 
194 |         # Cut out pixels overflowing image on the right
195 |         x_end = x + w + new_w
196 |         if x_end > img.shape[1]:
197 |             remove_pixels = x_end - img.shape[1]
198 |             celeb_img = celeb_img[:, :-remove_pixels, :]
199 |             new_w -= remove_pixels
200 | 
201 |         # Cut out pixels overflowing image on the bottom
202 |         y_offset = h - new_h
203 |         y_end = y + y_offset + new_h
204 |         if y_end > img.shape[0]:
205 |             remove_pixels = y_end - img.shape[0]
206 |             celeb_img = celeb_img[:-remove_pixels, ...]
207 |             new_h -= remove_pixels
208 | 
209 |         if celeb_img.size:
210 |             img[y + y_offset: y + y_offset + new_h, x + w: x + w + new_w, :] = celeb_img
211 |             return identity
212 | 
213 |     def drawFace(self, face, img):
214 |         
215 |         bbox = np.mean(face['bboxes'], axis = 0)
216 |         
217 |         self.drawBoundingBox(img, bbox)
218 |         x, y, w, h = [int(c) for c in bbox]
219 | 
220 |         # 1. CELEBRITY TWIN
221 | 
222 |         celeb_identity = None
223 | 
224 |         # Clamp bounding box top to image
225 |         y = 0 if y < 0 else y
226 | 
227 |         if "celebs" in face.keys():
228 |             celeb_identity = self.AddCeleb(face, img, x, y, w, h)
229 | 
230 |         # Check if text can overlap the celeb texts (goes past the bounding box), if so decrease size
231 |         text_size = self.textBaseScale
232 | 
233 |         if self.textBaseWidth > w:
234 |             text_size *= w/self.textBaseWidth
235 |             if self.freeType:
236 |                 text_size = int(text_size)  # Freetype doesn't accept float text size.
237 | 
238 | 
239 |         # 1. AGE
240 | 
241 |         if "age" in face.keys():
242 |             age = face['age']
243 |             annotation = "Age: %.0f" % age
244 |             txtLoc = (x, y + h + 30)
245 |             self.writeText(img, annotation, txtLoc, text_size)
246 | 
247 | 
248 |             # 2. GENDER
249 | 
250 |         if "gender" in face.keys():
251 |             gender = "MALE" if face['gender'] > 0.5 else "FEMALE"
252 |             genderProb = max(face["gender"], 1-face["gender"])
253 |             annotation = "%s %.0f %%" % (gender, 100.0 * genderProb)
254 |             txtLoc = (x, y + h + 60)
255 |             self.writeText(img, annotation, txtLoc, text_size)
256 | 
257 |         # 3. EXPRESSION
258 | 
259 |         if "expression" in face.keys():
260 |             expression = face["expression"]
261 |             annotation = "%s" % (expression)
262 |             txtLoc = (x, y + h + 90)
263 |             self.writeText(img, annotation, txtLoc, text_size)
264 | 
265 |         if celeb_identity:
266 |             annotation = "CELEBRITY"
267 |             txtLoc = (x + w, y + h + 30)
268 |             self.writeText(img, annotation, txtLoc, text_size)
269 | 
270 |             annotation = "TWIN"  # (%.0f %%)" % (100*np.exp(-face["celeb_distance"]))
271 |             txtLoc = (x + w, y + h + 60)
272 |             self.writeText(img, annotation, txtLoc, text_size)
273 | 
274 |             annotation = celeb_identity
275 |             txtLoc = (x + w, y + h + 90)
276 |             self.writeText(img, annotation, txtLoc, text_size)
277 | 
278 |         # DEBUG ONLY - Visualize aligned face crop in corner.
279 |         if self.debug and "crop" in face.keys():
280 | 
281 |             crop = face["crop"]
282 |             crop = cv2.resize(crop, (100, 100))
283 |             croph, cropw = crop.shape[0:2]
284 |             imgh, imgw = img.shape[0:2]
285 |             
286 |             img[:croph, imgw-cropw:, :] = crop[..., ::-1]
287 | 
288 |     def showVideo(self, unit):
289 | 
290 |         unit.acquire()
291 |         frame = copy.deepcopy(unit.getFrame())
292 |         unit.release()
293 |         
294 |         # Annotate
295 | 
296 |         validFaces = [f for f in self.faces if len(f['bboxes']) > self.minDetections]
297 | 
298 |         for face in validFaces:
299 |             self.drawFace(face, frame)
300 |         
301 |         frame = cv2.resize(frame, self.displaysize)
302 |         cv2.imshow(self.caption, frame)
303 |         key = cv2.waitKey(10)
304 |         
305 |         if key == 27:
306 |             self.terminate()
307 | 
308 |             
309 |     def findNearestFace(self, bbox):
310 |         
311 |         distances = []
312 |         
313 |         x,y,w,h = bbox
314 |         bboxCenter = [x + w/2, y + h/2]
315 |         
316 |         for face in self.faces:
317 |             
318 |             x,y,w,h = np.mean(face['bboxes'], axis = 0)
319 |             faceCenter = [x + w/2, y + h/2]
320 |             
321 |             distance = np.hypot(faceCenter[0] - bboxCenter[0], 
322 |                                 faceCenter[1] - bboxCenter[1])
323 | 
324 |             distances.append(distance)
325 |         
326 |         if len(distances) == 0:
327 |             minIdx = None
328 |             minDistance = None
329 |         else:            
330 |             minDistance = np.min(distances)
331 |             minIdx = np.argmin(distances)
332 | 
333 |         return minIdx, minDistance        
334 |         
335 |     def setDetections(self, detections, timestamps):
336 |         
337 |         # Find the location among all recent face locations where this would belong
338 | 
339 |         for bbox, timestamp in zip(detections, timestamps):
340 |             
341 |             idx, dist = self.findNearestFace(bbox)
342 | 
343 |             if dist is not None and dist < 50:
344 | 
345 |                 self.faces[idx]['bboxes'].append(bbox)
346 |                 self.faces[idx]['timestamps'].append(timestamp)
347 |                 
348 |                 if len(self.faces[idx]['bboxes']) > 7:
349 |                     self.faces[idx]['bboxes'].pop(0)
350 |                     self.faces[idx]['timestamps'].pop(0)
351 |                     
352 |             else:
353 |                 # This is a new face not in the scene before
354 |                 self.faces.append({'timestamps': [timestamp], 'bboxes': [bbox]})
355 |         
356 |         # Clean old detections:
357 |         
358 |         now = time.time()
359 |         facesToRemove = []
360 |         
361 |         for i, face in enumerate(self.faces):
362 |             if now - face['timestamps'][-1] > 0.5:
363 |                 facesToRemove.append(i)                
364 |                     
365 |         for i in facesToRemove:
366 |             try:
367 |                 self.faces.pop(i)
368 |             except:
369 |                 # Face was deleted by other thread. 
370 |                 pass
371 |         
372 |     def getFaces(self):
373 |         
374 |         if len(self.faces) == 0:
375 |             return None
376 |         else:
377 |             return self.faces
378 | 
379 |     def isTerminated(self):
380 |         
381 |         return self.terminated
382 | 
383 |     def writeText(self, img, annotation, location, size):
384 |         if self.freeType:
385 |             self.freeType.putText(img=img,
386 |                             text=annotation,
387 |                             org=location,
388 |                             fontHeight=size,
389 |                             color=(255, 255, 0),
390 |                             thickness=-1,
391 |                             line_type=cv2.LINE_AA,
392 |                             bottomLeftOrigin=True)
393 |         else:
394 |             annotation = annotation.replace('ä', 'a').replace('ö', 'o').replace('å', 'o')
395 |             cv2.putText(img,
396 |                         text=annotation,
397 |                         org=location,
398 |                         fontFace=cv2.FONT_HERSHEY_SIMPLEX,
399 |                         fontScale=size,
400 |                         color=[255, 255, 0],
401 |                         thickness=2)
402 | 


--------------------------------------------------------------------------------
/DetectionThread.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import cv2
 4 | import threading
 5 | import time
 6 | 
 7 | 
 8 | class DetectionThread(threading.Thread):
 9 | 
10 |     def __init__(self, parent, params):
11 | 
12 |         threading.Thread.__init__(self)
13 | 
14 |         print("Initializing detection thread...")
15 |         self.parent = parent
16 | 
17 |         frozen_graph = str(params.get("detection", "inference_graph"))
18 |         text_graph = str(params.get("detection", "text_graph"))
19 | 
20 |         self.cvNet = cv2.dnn.readNetFromTensorflow(frozen_graph, text_graph)
21 | 
22 |         # Image input size, must match the network
23 |         self.width = int(params.get("detection", "input_width"))
24 |         self.height = int(params.get("detection", "input_height"))
25 | 
26 |     def run(self):
27 | 
28 |         while self.parent.isTerminated() == False:
29 | 
30 |             unit = None
31 | 
32 |             while unit == None:
33 | 
34 |                 unit = self.parent.getUnit(self)
35 |                 if unit == None:  # No units available yet
36 |                     time.sleep(0.1)
37 | 
38 |                 if self.parent.isTerminated():
39 |                     break
40 | 
41 |             if self.parent.isTerminated():
42 |                 break
43 | 
44 |             img = unit.getFrame()
45 | 
46 |             detection_img = img.copy()
47 |             unit.release()
48 | 
49 |             rows, cols = img.shape[0:2]
50 |             self.cvNet.setInput(cv2.dnn.blobFromImage(detection_img, size=(self.width, self.height),
51 |                                                       swapRB=True, crop=False))
52 |             timer = time.time()
53 |             cvOut = self.cvNet.forward()
54 | 
55 |             # print("Det time: {:.2f} ms".format(1000*(time.time() - timer)))
56 |             bboxes = []
57 |             timestamps = []
58 | 
59 |             for detection in cvOut[0, 0, :, :]:
60 |                 score = float(detection[2])
61 | 
62 |                 left = int(detection[3] * cols)
63 |                 top = int(detection[4] * rows)
64 |                 right = int(detection[5] * cols)
65 |                 bottom = int(detection[6] * rows)
66 |                 width = right - left
67 |                 height = bottom - top
68 | 
69 |                 if score > 0.3 and width > 60:
70 |                     bboxes.append([left, top, width, height])
71 |                     timestamps.append(unit.getTimeStamp())
72 | 
73 |             self.parent.setDetections(bboxes, timestamps)
74 | 


--------------------------------------------------------------------------------
/EstimateAge.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import ControllerThread
 4 | import configparser
 5 | import sys
 6 | 
 7 | if __name__ == '__main__':
 8 | 
 9 |     help_message = '''
10 |     USAGE: EstimateAge.py [params file]
11 |     '''
12 | 
13 |     if len(sys.argv) > 1:
14 |         paramFile = sys.argv[1]
15 |     else:
16 |         paramFile = "config.ini"
17 | 
18 |     params = configparser.ConfigParser()
19 |     params.read(paramFile)
20 | 
21 |     # Initialize controller thread
22 | 
23 |     controllerThread = ControllerThread.ControllerThread(params)
24 |     controllerThread.start()
25 | 


--------------------------------------------------------------------------------
/GrabUnit.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Nov  2 16:03:54 2016
  4 | 
  5 | @author: agedemo
  6 | """
  7 | 
  8 | import time
  9 | 
 10 | class GrabUnit():
 11 |     
 12 |     def __init__(self, frame):
 13 |         
 14 |         self.timestamp = time.time()
 15 | 
 16 |         self.detected = False
 17 |         self.ageRecognized = False
 18 |         self.genderRecognized = False
 19 |         self.expressionRecognized = False
 20 | 
 21 |         # Keep track of how many processes are accessing this unit        
 22 |         self.processes = 0
 23 |                 
 24 |         self.frame = frame
 25 |         
 26 |     def getTimeStamp(self):
 27 |         
 28 |         return self.timestamp
 29 |         
 30 |     def getFrame(self):
 31 |         
 32 |         return self.frame
 33 | 
 34 |     def acquire(self):
 35 |         """ 
 36 |         A thread starts to use this resource. Increment the
 37 |         process counter.
 38 |         """
 39 |         
 40 |         self.processes += 1
 41 |         
 42 |     def release(self):
 43 |         """ 
 44 |         A thread no longer needs this resource. Decrement the
 45 |         process counter.
 46 |         """
 47 |         
 48 |         self.processes -= 1
 49 | 
 50 |     def isFree(self):
 51 |         
 52 |         if self.processes == 0:
 53 |             return True
 54 |         else:
 55 |             return False
 56 |     
 57 |     def getNumProcesses(self):
 58 |         
 59 |         return self.processes
 60 |         
 61 |     def getTimeStamp(self):
 62 |         
 63 |         return self.timestamp
 64 |         
 65 |     def getAge(self):
 66 |         
 67 |         return time.time() - self.timestamp
 68 | 
 69 |     def setDetected(self):
 70 |         
 71 |         self.detected = True
 72 | 
 73 |     def setAgeRecognized(self):
 74 |         
 75 |         self.ageRecognized = True
 76 | 
 77 |     def setGenderRecognized(self):
 78 |         
 79 |         self.genderRecognized = True
 80 | 
 81 |     def setExpressionRecognized(self):
 82 |         
 83 |         self.expressionRecognized = True
 84 | 
 85 |     def isDetected(self):
 86 |         
 87 |         return self.detected
 88 |         
 89 |     def isAgeRecognized(self):
 90 |         
 91 |         return self.ageRecognized
 92 |         
 93 |     def isGenderRecognized(self):
 94 |         
 95 |         return self.genderRecognized
 96 |         
 97 |     def isExpressionRecognized(self):
 98 |         
 99 |         return self.expressionRecognized
100 |     
101 |         
102 | 


--------------------------------------------------------------------------------
/GrabberThread.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Wed Nov  2 15:44:40 2016
 4 | 
 5 | @author: agedemo
 6 | """
 7 | 
 8 | import threading
 9 | import cv2
10 | import GrabUnit
11 | 
12 | class GrabberThread(threading.Thread):
13 | 
14 |     def __init__(self, parent, params):
15 |         
16 |         threading.Thread.__init__(self)
17 | 
18 |         camId = params.getint("camera", "Id")
19 | 
20 |         camResolution = params.get("camera", "resolution")
21 |         camResolution = camResolution.upper().split("X")
22 |         camResolution = [int(x) for x in camResolution]
23 |         print(("Using camera %d at resolution %s" % (camId, camResolution)))
24 | 
25 |         self.flipHor = params.getint("camera", "flip_horizontal")
26 | 
27 |         
28 |         self.video = cv2.VideoCapture(camId)  # 0: Laptop camera, 1: USB-camera
29 |         #self.video.set(3, camResolution[0])  # 1280 #1920 Default: 640
30 |         #self.video.set(4, camResolution[1])  # 720  #1080 Default: 480
31 | 
32 |         self.parent = parent
33 |         
34 |         print("Grabber Thread initialized...")
35 |         
36 |     def run(self):
37 | 
38 |         while not self.parent.isTerminated():
39 |             
40 |             stat, frame = self.video.read()
41 | 
42 |             if frame is not None and not self.parent.isTerminated():
43 |                 if self.flipHor:
44 |                     frame = frame[:, ::-1, ...]            
45 | 
46 |                 unit = GrabUnit.GrabUnit(frame)
47 |                 
48 |                 self.parent.putUnit(unit)            
49 |             
50 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Heikki Huttunen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TUT live age estimator
 2 | 
 3 | **Python implementation of a live deep learning based age/gender/smile/celebrity twin recognizer.**
 4 | 
 5 | ![Image](doc/demo.jpg "demo")
 6 | 
 7 | All components use convolutional networks:
 8 | 
 9 |  * Detection uses an SSD model trained on Tensorflow object detection API, but running on OpenCV.
10 |  * Age, gender, and smile recognition use a multitask mobilenet trained and running on keras.
11 |  * Celebrity twin uses a squeeze-excite seresnet18 to extract features, trained and running on keras.
12 |  
13 | The detailed functionality of the system (without multitask and celebrity similarity) is described in our paper:
14 | 
15 | >Janne Tommola, Pedram Ghazi, Bishwo Adhikari, Heikki Huttunen, "[Real Time System for Facial Analysis](https://arxiv.org/abs/1809.05474)," Submitted to EUVIP2018.
16 | 
17 | If you use our work for research purposes, consider citing the above work.
18 | 
19 | ## Usage instructions:
20 | 
21 | 
22 | Dependencies: [OpenCV 4.0.1+](http://www.opencv.org/), [Tensorflow 1.8+](http://tensorflow.org), [Keras 2.2.3+](http://keras.io/), and [faiss](https://github.com/facebookresearch/faiss/).
23 | 
24 |   * Requires a webcam.
25 |   * Tested on Ubuntu Linux 16.04, 18.04 and Windows 10 with and without a GPU.
26 |   * Install OpenCV 4.0.1 or newer. Recommended to install with `pip3 install opencv-python` (includes GTK support, which is required). Freetype support for nicer fonts requires manual compilation of OpenCV.
27 |   * Install Tensorflow (1.8 or newer). On a CPU, the MKL version seems to be radically faster than others (Anaconda install by smth like `conda install tensorflow=1.10.0=mkl_py36hb361250_0`. Seek for proper versions with `conda search tensorflow`.). On GPU, use `pip3 install tensorflow-gpu`.
28 |   * Install Keras 2.2.3 (or newer). Earlier versions have a slightly different way of loading the models. For example: `pip3 install keras`.
29 |   * Install dlib (version 19.4 or newer) with python 3 dependencies; _e.g.,_ `pip3 install dlib`.
30 |   * Install faiss with Anaconda `conda install faiss-cpu -c pytorch`.
31 |   * Run with `python3 EstimateAge.py`.
32 | 
33 | [Required deep learning models and celebrity dataset](http://doi.org/10.5281/zenodo.3466980). Extract directly to the main folder so that 2 new folders are created there.
34 | 
35 | [Example video](https://youtu.be/Kfe5hKNwrCU).
36 | 
37 | Contributors: [Heikki Huttunen](http://www.cs.tut.fi/~hehu/), Janne Tommola
38 | 


--------------------------------------------------------------------------------
/RecognitionThread.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import threading
  4 | import time
  5 | import numpy as np
  6 | import os
  7 | from collections import namedtuple
  8 | import cv2
  9 | import keras
 10 | from keras.utils.generic_utils import CustomObjectScope
 11 | from compute_features import lifted_struct_loss, triplet_loss
 12 | import h5py
 13 | import faiss
 14 | 
 15 | 
 16 | class RecognitionThread(threading.Thread):
 17 | 
 18 |     CELEB_RECOG_BUFFER = 15  # How many recognitions to store for picking the most common
 19 | 
 20 |     def __init__(self, parent, params):
 21 |         print("Initializing recognition thread...")
 22 |         threading.Thread.__init__(self)
 23 |         self.parent = parent
 24 | 
 25 |         ##### Initialize aligners for face alignment.
 26 |         aligner_path = params.get("recognition", "aligner")
 27 |         aligner_targets_path = params.get("recognition", "aligner_targets")
 28 |         self.aligner = keras.models.load_model(aligner_path)
 29 |         self.aligner._make_predict_function()
 30 |         self.aligner_input_shape = (self.aligner.input_shape[2], self.aligner.input_shape[1])
 31 | 
 32 |         # load targets
 33 |         aligner_targets = np.loadtxt(aligner_targets_path)
 34 |         left_eye = (aligner_targets[36] + aligner_targets[39]) / 2
 35 |         right_eye = (aligner_targets[42] + aligner_targets[45]) / 2
 36 |         nose = aligner_targets[30]
 37 |         left_mouth = aligner_targets[48]
 38 |         right_mouth = aligner_targets[54]
 39 |         # Dlib order
 40 |         #self.shape_targets = np.stack((left_eye, left_mouth, nose, right_eye, right_mouth))
 41 |         # CNN order
 42 |         self.shape_targets = np.stack((left_eye, right_eye, nose, left_mouth, right_mouth))
 43 | 
 44 |         ##### Initialize networks for Age, Gender and Expression
 45 |         ##### 1. AGE, GENDER, SMILE MULTITASK
 46 |         print("Initializing multitask network...")
 47 |         multitaskpath = params.get("recognition", "multitask_folder")
 48 |         with CustomObjectScope({'relu6': keras.layers.ReLU(6.),
 49 |                                 'DepthwiseConv2D': keras.layers.DepthwiseConv2D}):
 50 |             self.multiTaskNet = keras.models.load_model(os.path.join(multitaskpath, 'model.h5'))
 51 |         self.multiTaskNet._make_predict_function()
 52 | 
 53 |         ##### Read class names
 54 |         self.expressions = {int(key): val for key, val in params['expressions'].items()}  # convert string key to int
 55 |         self.minDetections = int(params.get("recognition", "mindetections"))
 56 | 
 57 |         ##### 2. CELEBRITY
 58 |         self.siamesepaths = params['celebmodels']
 59 |         self.siamesepath = self.siamesepaths["0"]
 60 |         self.celeb_dataset = params.get("recognition", "celeb_dataset")
 61 |         self.visualization_path = params.get("recognition", "visualization_path")
 62 |         self.initialize_celeb()
 63 | 
 64 |         # Starting the thread
 65 |         self.switching_model = False
 66 |         self.recognition_running = False
 67 |         print("Recognition thread started...")
 68 | 
 69 |     def initialize_celeb(self):
 70 |         print("Initializing celebrity network...")
 71 | 
 72 |         with CustomObjectScope({'relu6': keras.layers.ReLU(6.),
 73 |                                 'DepthwiseConv2D': keras.layers.DepthwiseConv2D,
 74 |                                 'lifted_struct_loss': lifted_struct_loss,
 75 |                                 'triplet_loss': triplet_loss}):
 76 |             self.siameseNet = keras.models.load_model(os.path.join(self.siamesepath, "feature_model.h5"))
 77 | 
 78 |         self.siameseNet._make_predict_function()
 79 | 
 80 |         ##### Read celebrity features
 81 |         celebrity_features = self.siamesepath + os.sep + "features_" + self.celeb_dataset + ".h5"
 82 |         print("Reading celebrity data from {}...".format(celebrity_features))
 83 | 
 84 |         with h5py.File(celebrity_features, "r") as h5:
 85 |             celeb_features = np.array(h5["features"]).astype(np.float32)
 86 |             self.path_ends = list(h5["path_ends"])
 87 |             self.celeb_files = [os.path.join(self.visualization_path, s.decode("utf-8")) for s in self.path_ends]
 88 | 
 89 |         print("Building index...")
 90 |         self.celeb_index = faiss.IndexFlatL2(celeb_features.shape[1])
 91 |         self.celeb_index.add(celeb_features)
 92 | 
 93 |     def crop_face(self, img, rect, margin=0.2):
 94 |         
 95 |         x,y,w,h = rect
 96 |         x1 = x
 97 |         x2 = x + w
 98 |         y1 = y
 99 |         y2 = y + h
100 | 
101 |         # Extend the area into square shape:
102 |         if w > h:
103 |             center = int(0.5 * (y1 + y2))
104 |             h = w
105 |             y1 = center - int(h / 2)
106 |             y2 = y1 + h
107 |         elif h > w:
108 |             center = int(0.5 * (x1 + x2))
109 |             w = h
110 |             x1 = center - int(w / 2)
111 |             x2 = x1 + w
112 | 
113 |         # add margin
114 |         full_crop_x1 = x1 - int(w * margin)
115 |         full_crop_y1 = y1 - int(h * margin)
116 |         full_crop_x2 = x2 + int(w * margin)
117 |         full_crop_y2 = y2 + int(h * margin)
118 |         # size of face with margin
119 |         new_size_w = full_crop_x2 - full_crop_x1 + 1
120 |         new_size_h = full_crop_y2 - full_crop_y1 + 1
121 | 
122 |         # ensure that the region cropped from the original image with margin
123 |         # doesn't go beyond the image size
124 |         crop_x1 = max(full_crop_x1, 0)
125 |         crop_y1 = max(full_crop_y1, 0)
126 |         crop_x2 = min(full_crop_x2, img.shape[1] - 1)
127 |         crop_y2 = min(full_crop_y2, img.shape[0] - 1)
128 |         # size of the actual region being cropped from the original image
129 |         crop_size_w = crop_x2 - crop_x1 + 1
130 |         crop_size_h = crop_y2 - crop_y1 + 1
131 | 
132 |         # coordinates of region taken out of the original image in the new image
133 |         new_location_x1 = crop_x1 - full_crop_x1
134 |         new_location_y1 = crop_y1 - full_crop_y1
135 |         new_location_x2 = crop_x1 - full_crop_x1 + crop_size_w - 1
136 |         new_location_y2 = crop_y1 - full_crop_y1 + crop_size_h - 1
137 | 
138 |         new_img = np.random.randint(256, size=(new_size_h, new_size_w, img.shape[2])).astype('uint8')
139 | 
140 |         new_img[new_location_y1: new_location_y2 + 1, new_location_x1: new_location_x2 + 1, :] = \
141 |             img[crop_y1:crop_y2 + 1, crop_x1:crop_x2 + 1, :]
142 | 
143 |         # if margin goes beyond the size of the image, repeat last row of pixels
144 |         if new_location_y1 > 0:
145 |             new_img[0:new_location_y1, :, :] = np.tile(new_img[new_location_y1, :, :], (new_location_y1, 1, 1))
146 | 
147 |         if new_location_y2 < new_size_h - 1:
148 |             new_img[new_location_y2 + 1:new_size_h, :, :] = np.tile(new_img[new_location_y2:new_location_y2 + 1, :, :],
149 |                                                                     (new_size_h - new_location_y2 - 1, 1, 1))
150 |         if new_location_x1 > 0:
151 |             new_img[:, 0:new_location_x1, :] = np.tile(new_img[:, new_location_x1:new_location_x1 + 1, :],
152 |                                                        (1, new_location_x1, 1))
153 |         if new_location_x2 < new_size_w - 1:
154 |             new_img[:, new_location_x2 + 1:new_size_w, :] = np.tile(new_img[:, new_location_x2:new_location_x2 + 1, :],
155 |                                                                     (1, new_size_w - new_location_x2 - 1, 1))
156 | 
157 |         return new_img
158 | 
159 |     def five_points_aligner(self, shape_targets, landmarks_pred, img, rect):
160 | 
161 |         B = shape_targets
162 |         A = np.hstack((np.array(landmarks_pred), np.ones((len(landmarks_pred), 1))))
163 | 
164 |         a = np.row_stack((np.array([-A[0][1], -A[0][0], 0, -1]), np.array([
165 |             A[0][0], -A[0][1], 1, 0])))
166 |         b = np.row_stack((-B[0][1], B[0][0]))
167 | 
168 |         for i in range(A.shape[0] - 1):
169 |             i += 1
170 |             a = np.row_stack((a, np.array([-A[i][1], -A[i][0], 0, -1])))
171 |             a = np.row_stack((a, np.array([A[i][0], -A[i][1], 1, 0])))
172 |             b = np.row_stack((b, np.array([[-B[i][1]], [B[i][0]]])))
173 | 
174 |         X, res, rank, s = np.linalg.lstsq(a, b, rcond=-1)
175 |         cos = (X[0][0]).real.astype(np.float32)
176 |         sin = (X[1][0]).real.astype(np.float32)
177 |         t_x = (X[2][0]).real.astype(np.float32)
178 |         t_y = (X[3][0]).real.astype(np.float32)
179 | 
180 |         H = np.array([[cos, -sin, t_x], [sin, cos, t_y]])
181 |         s = np.linalg.eigvals(H[:, :-1])
182 |         R = s.max() / s.min()
183 | 
184 |         if R < 2.0:
185 |             warped = cv2.warpAffine(img, H, (224, 224))
186 |         else:
187 |             # Seems to distort too much, probably error in landmarks
188 |             # Let's just crop.
189 |             crop = self.crop_face(img, rect)
190 |             warped = cv2.resize(crop, (224, 224))
191 | 
192 |         return warped
193 | 
194 |     def aligner_preprocess(self, img):
195 |         # RGB -> BGR
196 | 
197 |         x = img[..., ::-1].astype(np.float32)
198 | 
199 |         x[..., 0] -= 103.939
200 |         x[..., 1] -= 116.779
201 |         x[..., 2] -= 123.68
202 | 
203 |         return x
204 | 
205 |     def run(self):
206 |         Celebinfo = namedtuple('Celeb', ['filename', 'distance'])
207 | 
208 |         while not self.parent.isTerminated():
209 | 
210 |             while self.switching_model:
211 |                 self.recognition_running = False
212 |                 time.sleep(0.1)
213 | 
214 |             self.recognition_running = True
215 | 
216 |             faces = self.parent.getFaces()
217 |             while faces == None:
218 |                 time.sleep(0.1)
219 |                 faces = self.parent.getFaces()
220 | 
221 |             validFaces = [f for f in faces if len(f['bboxes']) > self.minDetections]
222 | 
223 |             for face in validFaces:
224 |                 # get the timestamp of the most recent frame:
225 |                 timestamp = face['timestamps'][-1]
226 |                 unit = self.parent.getUnit(self, timestamp)
227 | 
228 |                 if unit is not None:
229 |                     img = unit.getFrame()
230 |                     mean_box = np.mean(face['bboxes'], axis=0)
231 |                     x, y, w, h = [int(c) for c in mean_box]
232 | 
233 |                     # Align the face to match the targets
234 | 
235 |                     # 1. DETECT LANDMARKS
236 |                     crop = img[y : y+h, x : x+w, ::-1].astype(np.uint8)  # Crop face and convert BGR to RGB (which preprocess will convert back to BGR --- TODO: clean up)
237 |                     
238 |                     if crop.size == 0:
239 |                         continue
240 |                     
241 |                     landmarks_crop = cv2.resize(crop, self.aligner_input_shape)
242 |                     landmarks_crop = self.aligner_preprocess(landmarks_crop)
243 |                     net_input = landmarks_crop[np.newaxis, ...].astype(np.float32)
244 | 
245 |                     s = self.aligner.predict(net_input)[0]
246 |                     landmarks = s.reshape((5, 2))
247 |                     
248 |                     # Normalize landmarks to the full image coordinates:
249 |                     landmarks[:, 0] = x + landmarks[:, 0] * w / self.aligner_input_shape[0]
250 |                     landmarks[:, 1] = y + landmarks[:, 1] * h / self.aligner_input_shape[1]
251 | 
252 |                     if "landmarks" in face:
253 |                         face["landmarks"].append(landmarks)
254 |                     else:
255 |                         face["landmarks"] = [landmarks]
256 | 
257 |                     landmarks = np.array(face["landmarks"][-10:]).mean(axis = 0)
258 | 
259 |                     # 2. ALIGN
260 |                     crop = self.five_points_aligner(self.shape_targets, landmarks, img, rect = [x,y,w,h])                    
261 |                     cv2.imwrite("rec/%d.jpg" % np.random.randint(0, 1000), crop)
262 |                     
263 |                     # Save aligned face crop, used for debugging if turned on.
264 |                     face["crop"] = crop[..., ::-1]
265 | 
266 |                     crop = crop.astype(np.float32)
267 | 
268 |                     siamese_target_size = self.siameseNet.input_shape[1:3]
269 |                     crop_celeb = cv2.resize(crop, siamese_target_size).astype(np.float32)
270 | 
271 |                     # Preprocess network inputs, add singleton batch dimension
272 |                     recog_input = np.expand_dims(crop / 255, axis=0)
273 |                     siamese_input = np.expand_dims(crop_celeb / 255, axis=0)
274 | 
275 |                     # Recognize age, gender and smile in one forward pass
276 | 
277 |                     ageout, genderout, smileout = self.multiTaskNet.predict(recog_input)
278 |                     age = np.dot(ageout[0], list(range(101)))
279 |                     if "age" in face:
280 |                         face["age"] = 0.95 * face["age"] + 0.05 * age
281 |                     else:
282 |                         face["age"] = age
283 |                         face["recog_round"] = 0
284 | 
285 |                     gender = genderout[0][1]  # male probability
286 |                     if "gender" in face:
287 |                         face["gender"] = 0.8 * face["gender"] + 0.2 * gender
288 |                     else:
289 |                         face["gender"] = gender
290 | 
291 |                     t = smileout[0]
292 |                     t = np.argmax(t)
293 |                     expression = self.expressions[t]
294 |                     face["expression"] = expression
295 | 
296 |                     # Find closest celebrity match if new face or once every 5 rounds
297 |                     if "celebs" not in face or face["recog_round"] % 5 == 0:
298 |                         siamese_features = self.siameseNet.predict(siamese_input)
299 |                         K = 1  # This many nearest matches
300 |                         celeb_distance, I = self.celeb_index.search(siamese_features, K)
301 |                         celeb_idx = I[0][0]
302 |                         celeb_filename = self.celeb_files[celeb_idx]
303 | 
304 |                         if "celebs" in face:
305 |                             celebs = face["celebs"]
306 |                             recognitions = celebs["recognitions"]
307 | 
308 |                             # Maintain a buffer of closest matches and pick the most common one for stability
309 |                             if recognitions < RecognitionThread.CELEB_RECOG_BUFFER:
310 |                                 celebs["indexes"].append(celeb_idx)
311 |                             else:
312 |                                 celebs["indexes"][recognitions % RecognitionThread.CELEB_RECOG_BUFFER] = celeb_idx
313 | 
314 |                             celebs[celeb_idx] = Celebinfo(filename=celeb_filename, distance=celeb_distance)
315 |                             celebs["recognitions"] += 1
316 |                         else:
317 |                             face["celebs"] = {
318 |                                 "indexes": [celeb_idx],
319 |                                 celeb_idx: Celebinfo(filename=celeb_filename, distance=celeb_distance),
320 |                                 "recognitions": 1}
321 | 
322 |                     face["recog_round"] += 1
323 | 
324 |     # Support for switching celebrity model on the fly
325 |     def switch_model(self, modelidx):
326 | 
327 |         self.siamesepath = self.siamesepaths[modelidx]
328 | 
329 |         print("Switching to", self.siamesepath)
330 |         print("Stopping recognition thread...")
331 |         self.switching_model = True
332 | 
333 |         # Wait for recognition thread to finish and stop before changing
334 |         while self.recognition_running:
335 |             time.sleep(0.1)
336 | 
337 |         self.initialize_celeb()
338 | 
339 |         print("Switching model complete. Resuming recognition thread...")
340 |         self.switching_model = False
341 | 
342 |     def print_models(self):
343 |         idx = 0
344 |         while str(idx) in self.siamesepaths:
345 |             desc = self.siamesepaths.get("{}_desc".format(idx), "")
346 |             modelpath = self.siamesepaths[str(idx)]
347 |             currentindicator = "<----- CURRENT MODEL" if modelpath == self.siamesepath else ""
348 |             if desc:
349 |                 print("{}: {}, {} {}".format(idx, modelpath, desc, currentindicator))
350 |             else:
351 |                 print("{}: {} {}".format(idx, modelpath, currentindicator))
352 |             idx += 1
353 | 
354 | 


--------------------------------------------------------------------------------
/UnitServer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Wed Nov  2 15:36:48 2016
 4 | 
 5 | @author: agedemo
 6 | """
 7 | 
 8 | import DetectionThread
 9 | import RecognitionThread
10 | import threading
11 | 
12 | class UnitServer():
13 |     
14 |     def __init__(self, maxUnits = 4):
15 |         
16 |         self.maxUnits = maxUnits
17 |         self.units = []
18 |         self.mutex = threading.Lock()
19 |         
20 |     def getUnit(self, caller, timestamp = None):
21 |         
22 |         self.mutex.acquire()
23 | 
24 |         # Detection thread will receive the newest undetected frame
25 | 
26 |         unit = None
27 |         
28 |         if timestamp is not None:
29 |             
30 |             for f in self.units:
31 |                 if f.getTimeStamp() == timestamp:
32 |                     unit = f
33 | 
34 |         else:
35 | 
36 |             if isinstance(caller, DetectionThread.DetectionThread):
37 |         
38 |                 validUnits = [f for f in self.units if f.isDetected() == False]
39 |     
40 |                 if len(validUnits) == 0:
41 |                     unit = None
42 |                 else:
43 |                     unit = validUnits[-1]
44 |                     unit.acquire()
45 |                     unit.setDetected()
46 |                     
47 |                     #print("Locking %.6f for %s" % (unit.getTimeStamp(), str(type(caller))))
48 | 
49 |             # Age thread will receive the newest detected frame with age rec not done
50 |     
51 |             if isinstance(caller, RecognitionThread.RecognitionThread):
52 |                 
53 |                 validUnits = [f for f in self.units if 
54 |                               f.isDetected() == True and
55 |                               f.isAgeRecognized() == False]
56 |     
57 |                 if len(validUnits) == 0:
58 |                     unit = None
59 |                 else:
60 |                     unit = validUnits[-1]
61 |                     unit.acquire()
62 |                     unit.setDetected()
63 |                     
64 |                   #  print("Locking %.6f for %s" % (unit.getTimeStamp(), str(type(caller))))
65 | 
66 | 
67 |         self.mutex.release()
68 | 
69 |         return unit
70 |         
71 |     def putUnit(self, unit):
72 |         
73 |         self.mutex.acquire()
74 | 
75 |         #print "Adding %.6f" % (unit.getTimeStamp())
76 |         
77 |         if len(self.units) >= self.maxUnits:
78 |             # Attempt to remove oldest unit
79 |             if self.units[0].isFree():
80 |                 self.units.pop(0)
81 |                 
82 |         if len(self.units) < self.maxUnits:
83 |             self.units.append(unit)
84 |         else:
85 |             #print("Unable to add new unit.")
86 |             pass
87 |             
88 | #        for i,unit in enumerate(self.units):
89 | #            print("Unit %.6f: numProcesses: %d" % (unit.getTimeStamp(), unit.getNumProcesses()))
90 | #        print "=" * 5
91 |         
92 |         self.mutex.release()
93 |         


--------------------------------------------------------------------------------
/Verdana.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mahehu/TUT-live-age-estimator/508e5745d858eede98cf70ac305425d5d618e339/Verdana.ttf


--------------------------------------------------------------------------------
/compute_features.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Oct 15 15:47:04 2018
  4 | 
  5 | @author: agedemo
  6 | """
  7 | 
  8 | from keras.models import load_model
  9 | import h5py
 10 | import cv2
 11 | import sys
 12 | import numpy as np
 13 | import time
 14 | import os
 15 | import tensorflow as tf
 16 | from keras import backend as K
 17 | 
 18 | visualize = True
 19 | if visualize:
 20 |     import matplotlib.pyplot as plt
 21 | 
 22 | def find_images_from_tree(path):
 23 |     """ Collect images from a tree with one folder per identity """
 24 | 
 25 |     print("Searching for images in {}".format(path))
 26 |     image_files = []
 27 |     
 28 |     for root, dirs, files in os.walk(path):
 29 |         for name in files:
 30 |             if name.lower().endswith(("jpg", "jpeg", "png", "bmp")):
 31 |                 image_files.append(root + os.sep + name)
 32 |                 
 33 |     return image_files
 34 | 
 35 | def find_images(path):
 36 |     """ Collect one image per identity """
 37 | 
 38 |     found_ids = []
 39 |     files = []
 40 | 
 41 |     top_folder = os.sep.join(path.split(os.sep)[:-1])
 42 |     identity_file = top_folder + os.sep + "identities.txt"
 43 | 
 44 |     with open(identity_file) as fp:
 45 |         for i, line in enumerate(fp):
 46 | 
 47 |             name, identity = line.split()
 48 |             identity = int(identity)
 49 |             
 50 |             if identity not in found_ids:
 51 |                 found_ids.append(identity)
 52 |                 fullfile = os.path.abspath(path + os.sep + os.path.basename(name))
 53 | 
 54 |                 if not os.path.isfile(fullfile):
 55 |                     print("File {} not found, ignoring.".format(fullfile))
 56 |                 else:
 57 |                     files.append(fullfile)
 58 | 
 59 |     return files
 60 | 
 61 | def triplet_semihard_loss(y_true, y_pred):
 62 | 
 63 |     loss = tf.contrib.losses.metric_learning.triplet_semihard_loss(labels = K.argmax(y_true, axis = -1), embeddings = y_pred, margin = 1.0)
 64 |     return loss
 65 | 
 66 | def cluster_loss(y_true, y_pred):
 67 | 
 68 |     loss = tf.contrib.losses.metric_learning.cluster_loss(labels = K.argmax(y_true, axis = -1), embeddings = y_pred, margin_multiplier = 1.0)
 69 |     return loss
 70 | 
 71 | def triplet_loss(y_true, y_pred):
 72 | 
 73 |     loss = tf.contrib.losses.metric_learning.triplet_semihard_loss(labels = K.argmax(y_true, axis = -1), embeddings = y_pred, margin = 1.0)
 74 |     return loss
 75 | 
 76 | def lifted_struct_loss(y_true, y_pred):
 77 | 
 78 |     loss = tf.contrib.losses.metric_learning.lifted_struct_loss(labels = K.argmax(y_true, axis = -1), embeddings = y_pred, margin = 1.0)
 79 |     return loss
 80 | 
 81 | if __name__ == "__main__":
 82 |     
 83 |     if len(sys.argv) > 1:
 84 |         model_folder = sys.argv[1]
 85 |     else:
 86 |         model_folder = "recognizers/celebrities/network/seresnet18_STRUCT_2019-04-03-02-56-41"
 87 | 
 88 |     model = load_model(model_folder + os.sep + "feature_model.h5", custom_objects = {'triplet_loss': triplet_loss, 'lifted_struct_loss': lifted_struct_loss, 'cluster_loss': cluster_loss})
 89 | 
 90 |     if len(sys.argv) > 2:
 91 |         images_folder = sys.argv[2]
 92 |     else:
 93 |         images_folder = "recognizers/celebrities/data/CelebA/CelebA_aligned_5points"
 94 | 
 95 |     images_folder = os.path.abspath(images_folder)
 96 |     files = find_images(path = images_folder)
 97 |     #files = find_images_from_tree(path = images_folder)
 98 | 
 99 |     # Gather the file structure of the dataset, used when visualizing with different images than the ones features are calculated from
100 |     commonpath = os.path.commonpath((files[0], images_folder))
101 |     path_ends = [os.path.relpath(file, start=commonpath) for file in files]
102 |     
103 |     in_shape = model.input_shape[1:3]
104 |     out_dim  = model.output_shape[-1]
105 |         
106 |     features = np.empty((len(files), out_dim))
107 |                 
108 |     print("Found {} files...".format(len(files)))
109 | 
110 |     if visualize:
111 |         fig, ax = plt.subplots(2, 1)
112 | 
113 |     start_time = time.time()
114 |     buf_size = 64  # MUST BE MANUALLY DECREASED IF THERE ARE FEWER IMAGES!
115 |     fb_shape = (buf_size, ) + model.input_shape[1:]
116 |     frame_buffer = np.empty(fb_shape, dtype = np.float32)
117 |     fb_idx = 0
118 |     
119 |     cnt = 0
120 |     prev_sample = None
121 | 
122 |     for i, name in enumerate(files):
123 |         
124 |         print(name)
125 |         img = cv2.imread(name)
126 |         
127 |         # Take center crop and scale to in-shape
128 |         h, w, d = img.shape
129 |             
130 |         if w > h:
131 |             c = w // 2
132 |             x1 = c - h // 2
133 |             x2 = x1 + h
134 |             img = img[:, x1:x2, :]
135 |         elif w < h:
136 |             c = h // 2
137 |             y1 = c - w // 2
138 |             y2 = y1 + w
139 |             img = img[y1:y2, :, :]
140 |         img = cv2.resize(img, in_shape)
141 | 
142 |         # Convert to RGB and scale
143 |         img = img[..., ::-1].astype(np.float32) / 255.0
144 |         frame_buffer[fb_idx, ...] = img
145 |         fb_idx += 1
146 |         
147 |         if fb_idx == buf_size:
148 |             
149 |             feat = model.predict(frame_buffer)
150 |             elapsed_time = time.time() - start_time
151 |             sec_per_frame = elapsed_time / (i+1)
152 |             remaining_frames = len(files) - (i+1)
153 |             remaining_time = remaining_frames * sec_per_frame
154 |             remaining_time_mins = remaining_time / 60
155 |             
156 |             msg = "Computing features: {:.1f} % done [{:.1f} MB]. {:.1f} mins remaining".format(100*(i+1) / len(files),
157 |                                                       sys.getsizeof(features) / 1024**2,
158 |                                                       remaining_time_mins)
159 |     
160 |             print(msg, end = " ")
161 |             print("File {}".format(name))
162 |             
163 |             if visualize:        
164 |                 
165 |                 ax[0].cla()
166 | 
167 |                 f = feat[0,...]
168 |                 ax[0].plot(f)
169 |                 ax[0].set_title(msg)
170 | 
171 |                 if prev_sample is not None:
172 |                     ax[1].cla()
173 |                     ax[1].plot(f - prev_sample)
174 |                     ax[1].set_title("Difference to previous sample")
175 | 
176 |                 plt.show(block = False)
177 |                 plt.pause(0.1)
178 |                 prev_sample = f
179 | 
180 |             fb_idx = 0
181 |         else:
182 |             continue
183 |             
184 |         features[cnt : cnt + feat.shape[0], :] = feat
185 |         cnt += feat.shape[0]
186 |                             
187 |     with h5py.File(model_folder + os.sep + "features_" + os.path.basename(os.path.normpath(images_folder)) + ".h5", "w") as h5file:
188 |         h5file["features"]  = np.array(features)
189 |         b_files = [bytes(f, 'utf-8') for f in files]
190 |         h5file["filenames"] = b_files
191 |         b_pathends = [bytes(f, 'utf-8') for f in path_ends]
192 |         h5file["path_ends"] = b_pathends
193 | 
194 | 


--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
 1 | [general]
 2 | # Visualize aligned crop of face detection
 3 | debug = True
 4 | 
 5 | [camera]
 6 | id = 0
 7 | resolution = 1024x768
 8 | flip_horizontal = 1
 9 | 
10 | [window]
11 | caption = TUT Age Estimator
12 | displaysize = 1200x900
13 | freetype_fontpath = Verdana.ttf
14 | 
15 | [server]
16 | num_frames = 8
17 | 
18 | [recognition]
19 | multitask_folder=recognizers/multitask/MOBILENET_2019-04-09-08-43-46
20 | 
21 | celeb_dataset determines the features loaded (ie. features_<celeb_dataset>.h5)
22 | #celeb_dataset = FinnishCelebs_aligned_YueMethod
23 | #visualization_path = recognizers/celebrities/data/visualization_FinnishCelebs_unprocessed
24 | 
25 | celeb_dataset = CelebA_aligned_5points
26 | visualization_path = recognizers/celebrities/data/CelebA/CelebA_aligned_5points
27 | 
28 | mindetections = 2
29 | aligner = recognizers/alignment/model-056-0.716316-0.967865.h5
30 | aligner_targets = recognizers/alignment/targets_symm.txt
31 | 
32 | [celebmodels]
33 | # Top model is the default, rest can be changed to while the program is running
34 | 0 = recognizers/celebrities/network/seresnet18_STRUCT_2019-04-03-02-56-41
35 | 0_desc = Default (squeeze-excite Resnet18), aligned with 5 point method
36 | ;1 = recognizers/celebrities/network/resnet34_STRUCT_2019-04-03-02-14-26
37 | ;1_desc = Alternative (Resnet 34), aligned with 5 point method
38 | 
39 | [expressions]
40 | 0 = No smile
41 | 1 = Smile
42 | # Current multitask model only supports the two above expressions.
43 | ;2 = Sad
44 | ;3 = Surprise
45 | ;4 = Fear
46 | ;5 = Disgust
47 | ;6 = Anger
48 | 
49 | [detection]
50 | input_width = 240
51 | input_height = 180
52 | inference_graph = detection/240x180_depth075_ssd_mobilenetv1/frozen_inference_graph.pb
53 | text_graph = detection/240x180_depth075_ssd_mobilenetv1/graph.pbtxt
54 | 
55 | 


--------------------------------------------------------------------------------
/doc/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mahehu/TUT-live-age-estimator/508e5745d858eede98cf70ac305425d5d618e339/doc/architecture.png


--------------------------------------------------------------------------------
/doc/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mahehu/TUT-live-age-estimator/508e5745d858eede98cf70ac305425d5d618e339/doc/demo.jpg


--------------------------------------------------------------------------------
/tools/generate_celeb_visualizations.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import cv2
 5 | 
 6 | """
 7 | Generates pre-scaled output images from source images, that will be shown in the application as celebrity match.
 8 | The use case for this is that you probably want to compare aligned face features to aligned celebrity
 9 | features. They might not look very pleasant though, so this gives the option to use unprocessed images in visualization.
10 | 
11 | The usage of large source images can slow down the program heavily (reading and resizing the image), so the purpose of this
12 | script is to adjust them to a small but still pleasant looking size. 
13 | """
14 | 
15 | output_size = 300 # A compromise to preserve image quality while getting rid of huge images that would slow down the
16 | # live-age-estimator. The output images are resized further in the application, hopefully to a smaller size than this.
17 | 
18 | if __name__ == '__main__':
19 |     path = "../recognizers/celebrities/data/FinnishCelebs_unprocessed"
20 |     basepath, celebfolder = os.path.split(path)
21 |     os.makedirs(os.path.join(basepath, "visualization_" + celebfolder), exist_ok=True)
22 | 
23 |     for root, dirs, files in os.walk(path):
24 |         for filename in files:
25 |             if filename.lower().endswith(("jpg", "jpeg", "png", "bmp")):
26 |                 celebname = os.path.split(root)[1]
27 | 
28 |                 img = cv2.imread(root + os.sep + filename)
29 |                 newpath = os.path.join(basepath, "visualization_" + celebfolder, celebname)
30 |                 os.makedirs(newpath, exist_ok=True)
31 | 
32 |                 w, h = img.shape[0:2]
33 | 
34 |                 if w >= h:
35 |                     new_h = output_size
36 |                     new_w = int(output_size*w/h)
37 |                 else:
38 |                     new_w = output_size
39 |                     new_h = int(output_size*h/w)
40 | 
41 |                 img = cv2.resize(img, (new_h, new_w), interpolation=cv2.INTER_AREA)
42 |                 cv2.imwrite(os.path.join(newpath + os.sep + filename), img)
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------