├── .gitignore ├── README.md ├── main.py ├── requirements.txt └── video.py /.gitignore: -------------------------------------------------------------------------------- 1 | resources/ 2 | .idea/ 3 | 4 | *.dat 5 | *.png 6 | .DS_STORE 7 | 8 | # Created by https://www.gitignore.io/api/python 9 | 10 | ### Python ### 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | env/ 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *,cover 57 | .hypothesis/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # celery beat schedule file 87 | celerybeat-schedule 88 | 89 | # dotenv 90 | .env 91 | 92 | # virtualenv 93 | .venv 94 | venv/ 95 | ENV/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # End of https://www.gitignore.io/api/python -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | ```sh 3 | python3 -m virtualenv -p python3 .env 4 | source .env/bin/activate 5 | pip3 install -r requirements.txt 6 | (mkdir -p resources; cd resources; curl http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 | bzip2 -d > shape_predictor_68_face_landmarks.dat) 7 | 8 | ``` 9 | 10 | # Running 11 | ```sh 12 | source .env/bin/activate 13 | 14 | # Glasses filter 15 | ./main.py --filter glasses --footage glasses.png 16 | # Moustache filter 17 | ./main.py --filter moustache --footage resources/moustache.png 18 | 19 | ``` 20 | 21 | # Research paper 22 | Paper written on this is available here: https://me.syzible.com/snapchat-filters.pdf 23 | 24 | 3 filters for CS7434 augmented reality - face swap, glasses and moustache 25 | 26 | Clone the repo and create a directory in it called "resources". In this, you need the pre-trained face data available here: 27 | http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 28 | 29 | In resources, place any images you want to use in it for filters (face swap images, moustache image, glasses image, etc); modify the code as appropriate. 30 | 31 | Make sure you have Python 3 installed, see here for easy installation with Brew on OSX http://www.pyimagesearch.com/2016/12/19/install-opencv-3-on-macos-with-homebrew-the-easy-way/ 32 | 33 | 34 | If you want to create bug fixes or extend functionality, feel free to send pull requests. 35 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import math 4 | 5 | import cv2 6 | import dlib 7 | import numpy as np 8 | 9 | from video import create_capture 10 | import sys 11 | import argparse 12 | import time 13 | import logging 14 | 15 | predictor_path = "resources/shape_predictor_68_face_landmarks.dat" 16 | detector = dlib.get_frontal_face_detector() 17 | predictor = dlib.shape_predictor(predictor_path) 18 | 19 | SCALE_FACTOR = 1 20 | FEATHER_AMOUNT = 11 21 | COLOUR_CORRECT_BLUR = 0.5 22 | 23 | MOUTH_POINTS = list(range(48, 61)) 24 | RIGHT_BROW_POINTS = list(range(17, 22)) 25 | LEFT_BROW_POINTS = list(range(22, 27)) 26 | RIGHT_EYE_POINTS = list(range(36, 42)) 27 | LEFT_EYE_POINTS = list(range(42, 48)) 28 | NOSE_POINTS = list(range(27, 35)) 29 | 30 | POINTS = LEFT_BROW_POINTS + RIGHT_EYE_POINTS + LEFT_EYE_POINTS + RIGHT_BROW_POINTS + NOSE_POINTS + MOUTH_POINTS 31 | ALIGN_POINTS = POINTS 32 | OVERLAY_POINTS = [POINTS] 33 | 34 | class TimeProfiler(object): 35 | def __init__(self, label): 36 | self.label = label 37 | 38 | def __enter__(self): 39 | self.start = time.time() 40 | return self 41 | 42 | def __exit__(self, *exc): 43 | logging.info("The %s is done in %fs", self.label, time.time() - self.start) 44 | 45 | 46 | def get_cam_frame(cam): 47 | ret, img = cam.read() 48 | img = cv2.resize(img, (640, 480)) 49 | return img 50 | 51 | 52 | def get_landmarks(img): 53 | rects = detector(img, 1) 54 | if len(rects) == 0: 55 | return -1 56 | 57 | return np.matrix([[p.x, p.y] for p in predictor(img, rects[0]).parts()]) 58 | 59 | 60 | def annotate_landmarks(im, landmarks): 61 | im = im.copy() 62 | for idx, point in enumerate(landmarks): 63 | pos = (point[0, 0], point[0, 1]) 64 | cv2.putText(im, str(idx), pos, 65 | fontFace=cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 66 | fontScale=0.4, 67 | color=(0, 0, 255)) 68 | cv2.circle(im, pos, 3, color=(0, 255, 255)) 69 | return im 70 | 71 | 72 | def draw_convex_hull(im, points, color): 73 | points = cv2.convexHull(points) 74 | cv2.fillConvexPoly(im, points, color=color) 75 | 76 | 77 | def get_face_mask(im, landmarks): 78 | im = np.zeros(im.shape[:2], dtype=np.float64) 79 | 80 | for group in OVERLAY_POINTS: 81 | draw_convex_hull(im, landmarks[group], color=1) 82 | 83 | im = np.array([im, im, im]).transpose((1, 2, 0)) 84 | im = cv2.GaussianBlur(im, (FEATHER_AMOUNT, FEATHER_AMOUNT), 0) > 0 85 | im = im * 1.0 86 | im = cv2.GaussianBlur(im, (FEATHER_AMOUNT, FEATHER_AMOUNT), 0) 87 | 88 | return im 89 | 90 | 91 | def transformation_f_points(points1, points2): 92 | points1 = points1.astype(np.float64) 93 | points2 = points2.astype(np.float64) 94 | 95 | c1 = np.mean(points1, axis=0) 96 | c2 = np.mean(points2, axis=0) 97 | 98 | points1 -= c1 99 | points2 -= c2 100 | 101 | s1 = np.std(points1) 102 | s2 = np.std(points2) 103 | 104 | points1 /= s1 105 | points2 /= s2 106 | 107 | u, s, vt = np.linalg.svd(points1.T * points2) 108 | r = (u * vt).T 109 | 110 | h_stack = np.hstack(((s2 / s1) * r, c2.T - (s2 / s1) * r * c1.T)) 111 | return np.vstack([h_stack, np.matrix([0., 0., 1.])]) 112 | 113 | 114 | def get_im_w_landmarks(fname): 115 | im = cv2.imread(fname, cv2.IMREAD_COLOR) 116 | im = cv2.resize(im, (im.shape[1] * SCALE_FACTOR, 117 | im.shape[0] * SCALE_FACTOR)) 118 | s = get_landmarks(im) 119 | 120 | return im, s 121 | 122 | 123 | def warp_im(im, m, dshape): 124 | output_im = np.zeros(dshape, dtype=im.dtype) 125 | cv2.warpAffine(im, m[:2], (dshape[1], dshape[0]), dst=output_im, 126 | borderMode=cv2.BORDER_TRANSPARENT, flags=cv2.WARP_INVERSE_MAP) 127 | return output_im 128 | 129 | 130 | def correct_colours(im1, im2, landmarks1): 131 | mean_left = np.mean(landmarks1[LEFT_EYE_POINTS], axis=0) 132 | mean_right = np.mean(landmarks1[RIGHT_EYE_POINTS], axis=0) 133 | 134 | blur_amount = COLOUR_CORRECT_BLUR * np.linalg.norm(mean_left - mean_right) 135 | blur_amount = int(blur_amount) 136 | 137 | if blur_amount % 2 == 0: 138 | blur_amount += 1 139 | 140 | im1_blur = cv2.GaussianBlur(im1, (blur_amount, blur_amount), 0) 141 | im2_blur = cv2.GaussianBlur(im2, (blur_amount, blur_amount), 0) 142 | 143 | # avoid division errors 144 | im2_blur += (128 * (im2_blur <= 1.0)).astype(im2_blur.dtype) 145 | 146 | return (im2.astype(np.float64) * im1_blur.astype(np.float64) / 147 | im2_blur.astype(np.float64)) 148 | 149 | 150 | def face_swap(img1, landmarks1, img2, landmarks2): 151 | m = transformation_f_points(landmarks1[ALIGN_POINTS], landmarks2[ALIGN_POINTS]) 152 | 153 | mask = get_face_mask(img2, landmarks2) 154 | warped_mask = warp_im(mask, m, img1.shape) 155 | combined_mask = np.max([get_face_mask(img1, landmarks1), warped_mask], axis=0) 156 | 157 | warped_img2 = warp_im(img2, m, img1.shape) 158 | warped_corrected_img2 = correct_colours(img1, warped_img2, landmarks1) 159 | 160 | return img1 * (1.0 - combined_mask) + warped_corrected_img2 * combined_mask 161 | 162 | 163 | def get_rotated_points(point, anchor, deg_angle): 164 | angle = math.radians(deg_angle) 165 | px, py = point 166 | ox, oy = anchor 167 | 168 | qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy) 169 | qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy) 170 | return [int(qx), int(qy)] 171 | 172 | 173 | def blend_w_transparency(face_img, overlay_image): 174 | # BGR 175 | overlay_img = overlay_image[:, :, :3] 176 | # A 177 | overlay_mask = overlay_image[:, :, 3:] 178 | 179 | background_mask = 255 - overlay_mask 180 | overlay_mask = cv2.cvtColor(overlay_mask, cv2.COLOR_GRAY2BGR) 181 | background_mask = cv2.cvtColor(background_mask, cv2.COLOR_GRAY2BGR) 182 | 183 | face_part = (face_img * (1 / 255.0)) * (background_mask * (1 / 255.0)) 184 | overlay_part = (overlay_img * (1 / 255.0)) * (overlay_mask * (1 / 255.0)) 185 | 186 | # cast to 8 bit matrix 187 | return np.uint8(cv2.addWeighted(face_part, 255.0, overlay_part, 255.0, 0.0)) 188 | 189 | 190 | def glasses_filter(cam, glasses, should_show_bounds=False): 191 | with TimeProfiler("image capture"): 192 | face = get_cam_frame(cam) 193 | 194 | with TimeProfiler("face pose prediction"): 195 | landmarks = get_landmarks(face) 196 | 197 | # glasses.shape = (height, width, rgba channels) 198 | pts1 = np.float32([[0, 0], [glasses.shape[1], 0], [0, glasses.shape[0]], [glasses.shape[1], glasses.shape[0]]]) 199 | 200 | if type(landmarks) is int: 201 | return 202 | 203 | with TimeProfiler("transformation"): 204 | """ 205 | GLASSES ANCHOR POINTS: 206 | 207 | 17 & 26 edges of left eye and right eye (left and right extrema) 208 | 0 & 16 edges of face across eyes (other left and right extra, interpolate between 0 & 17, 16 & 26 for half way points) 209 | 19 & 24 top of left and right brows (top extreme) 210 | 27 is centre of the eyes on the nose (centre of glasses) 211 | 28 is the bottom threshold of glasses (perhaps interpolate between 27 & 28 if too low) (bottom extreme) 212 | """ 213 | 214 | left_face_extreme = [landmarks[0, 0], landmarks[0, 1]] 215 | right_face_extreme = [landmarks[16, 0], landmarks[16, 1]] 216 | x_diff_face = right_face_extreme[0] - left_face_extreme[0] 217 | y_diff_face = right_face_extreme[1] - left_face_extreme[1] 218 | 219 | face_angle = math.degrees(math.atan2(y_diff_face, x_diff_face)) 220 | 221 | # get hypotenuse 222 | face_width = math.sqrt((right_face_extreme[0] - left_face_extreme[0]) ** 2 + 223 | (right_face_extreme[1] - right_face_extreme[1]) ** 2) 224 | glasses_width = face_width * 1.0 225 | 226 | # top and bottom of left eye 227 | eye_height = math.sqrt((landmarks[19, 0] - landmarks[28, 0]) ** 2 + 228 | (landmarks[19, 1] - landmarks[28, 1]) ** 2) 229 | glasses_height = eye_height * 1.2 230 | 231 | # generate bounding box from the anchor points 232 | anchor_point = [landmarks[27, 0], landmarks[27, 1]] 233 | tl = [int(anchor_point[0] - (glasses_width / 2)), int(anchor_point[1] - (glasses_height / 2))] 234 | rot_tl = get_rotated_points(tl, anchor_point, face_angle) 235 | 236 | tr = [int(anchor_point[0] + (glasses_width / 2)), int(anchor_point[1] - (glasses_height / 2))] 237 | rot_tr = get_rotated_points(tr, anchor_point, face_angle) 238 | 239 | bl = [int(anchor_point[0] - (glasses_width / 2)), int(anchor_point[1] + (glasses_height / 2))] 240 | rot_bl = get_rotated_points(bl, anchor_point, face_angle) 241 | 242 | br = [int(anchor_point[0] + (glasses_width / 2)), int(anchor_point[1] + (glasses_height / 2))] 243 | rot_br = get_rotated_points(br, anchor_point, face_angle) 244 | 245 | pts = np.float32([rot_tl, rot_tr, rot_bl, rot_br]) 246 | m = cv2.getPerspectiveTransform(pts1, pts) 247 | 248 | rotated = cv2.warpPerspective(glasses, m, (face.shape[1], face.shape[0])) 249 | result_2 = blend_w_transparency(face, rotated) 250 | 251 | if should_show_bounds: 252 | for p in pts: 253 | pos = (p[0], p[1]) 254 | cv2.circle(result_2, pos, 2, (0, 0, 255), 2) 255 | cv2.putText(result_2, str(p), pos, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.4, color=(255, 0, 0)) 256 | 257 | cv2.imshow("Glasses Filter", result_2) 258 | 259 | 260 | def moustache_filter(cam, moustache, should_show_bounds=False): 261 | face = get_cam_frame(cam) 262 | landmarks = get_landmarks(face) 263 | 264 | # moustache.shape = (height, width, rgba channels) 265 | pts1 = np.float32([[0, 0], [moustache.shape[1], 0], [0, moustache.shape[0]], [moustache.shape[1], moustache.shape[0]]]) 266 | 267 | """ 268 | MOUSTACHE ANCHOR POINTS 269 | 270 | centre anchor point is midway between 34 (top of philtrum) and 54 (bottom of philtrum) 271 | width can be determined by the eyes as the mouth can move 272 | height also determined by the eyes as before 273 | generate as before and just modify multiplier coefficients & translate to anchor point? 274 | 275 | 276 | ^^^ mouth and jaw can move, use eyes as anchor point initially then translate to philtrum position 277 | """ 278 | 279 | if type(landmarks) is not int: 280 | left_face_extreme = [landmarks[0, 0], landmarks[0, 1]] 281 | right_face_extreme = [landmarks[16, 0], landmarks[16, 1]] 282 | x_diff_face = right_face_extreme[0] - left_face_extreme[0] 283 | y_diff_face = right_face_extreme[1] - left_face_extreme[1] 284 | 285 | face_angle = math.degrees(math.atan2(y_diff_face, x_diff_face)) 286 | 287 | # get hypotenuse 288 | face_width = math.sqrt((right_face_extreme[0] - left_face_extreme[0]) ** 2 + 289 | (right_face_extreme[1] - right_face_extreme[1]) ** 2) 290 | moustache_width = face_width * 0.8 291 | 292 | # top and bottom of left eye 293 | eye_height = math.sqrt((landmarks[19, 0] - landmarks[28, 0]) ** 2 + 294 | (landmarks[19, 1] - landmarks[28, 1]) ** 2) 295 | glasses_height = eye_height * 0.8 296 | 297 | # generate bounding box from the anchor points 298 | brow_anchor = [landmarks[27, 0], landmarks[27, 1]] 299 | tl = [int(brow_anchor[0] - (moustache_width / 2)), int(brow_anchor[1] - (glasses_height / 2))] 300 | rot_tl = get_rotated_points(tl, brow_anchor, face_angle) 301 | 302 | tr = [int(brow_anchor[0] + (moustache_width / 2)), int(brow_anchor[1] - (glasses_height / 2))] 303 | rot_tr = get_rotated_points(tr, brow_anchor, face_angle) 304 | 305 | bl = [int(brow_anchor[0] - (moustache_width / 2)), int(brow_anchor[1] + (glasses_height / 2))] 306 | rot_bl = get_rotated_points(bl, brow_anchor, face_angle) 307 | 308 | br = [int(brow_anchor[0] + (moustache_width / 2)), int(brow_anchor[1] + (glasses_height / 2))] 309 | rot_br = get_rotated_points(br, brow_anchor, face_angle) 310 | 311 | # locate new location for moustache on philtrum 312 | top_philtrum_point = [landmarks[33, 0], landmarks[33, 1]] 313 | bottom_philtrum_point = [landmarks[51, 0], landmarks[51, 1]] 314 | philtrum_anchor = [(top_philtrum_point[0] + bottom_philtrum_point[0]) / 2, 315 | (top_philtrum_point[1] + bottom_philtrum_point[1]) / 2] 316 | 317 | # determine distance from old origin to new origin and translate 318 | anchor_distance = [int(philtrum_anchor[0] - brow_anchor[0]), int(philtrum_anchor[1] - brow_anchor[1])] 319 | rot_tl[0] += anchor_distance[0] 320 | rot_tl[1] += anchor_distance[1] 321 | rot_tr[0] += anchor_distance[0] 322 | rot_tr[1] += anchor_distance[1] 323 | rot_bl[0] += anchor_distance[0] 324 | rot_bl[1] += anchor_distance[1] 325 | rot_br[0] += anchor_distance[0] 326 | rot_br[1] += anchor_distance[1] 327 | 328 | pts = np.float32([rot_tl, rot_tr, rot_bl, rot_br]) 329 | m = cv2.getPerspectiveTransform(pts1, pts) 330 | 331 | rotated = cv2.warpPerspective(moustache, m, (face.shape[1], face.shape[0])) 332 | result_2 = blend_w_transparency(face, rotated) 333 | 334 | # annotate_landmarks(result_2, landmarks) 335 | 336 | if should_show_bounds: 337 | for p in pts: 338 | pos = (p[0], p[1]) 339 | cv2.circle(result_2, pos, 2, (0, 0, 255), 2) 340 | cv2.putText(result_2, str(p), pos, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.4, color=(255, 0, 0)) 341 | 342 | cv2.imshow("Moustache Filter", result_2) 343 | 344 | 345 | def face_swap_filter(cam, swap_img, swap_img_landmarks): 346 | me_img = get_cam_frame(cam) 347 | me_img = cv2.resize(me_img, (me_img.shape[1] * SCALE_FACTOR, me_img.shape[0] * SCALE_FACTOR)) 348 | me_landmarks = get_landmarks(me_img) 349 | 350 | # me_img, me_landmarks = read_im_and_landmarks("resources/bryan_cranston.png") 351 | 352 | if type(me_landmarks) is not int: 353 | m = transformation_f_points(me_landmarks[ALIGN_POINTS], swap_img_landmarks[ALIGN_POINTS]) 354 | 355 | mask = get_face_mask(swap_img, swap_img_landmarks) 356 | warped_mask = warp_im(mask, m, me_img.shape) 357 | combined_mask = np.max([get_face_mask(me_img, me_landmarks), warped_mask], axis=0) 358 | 359 | warped_swap = warp_im(swap_img, m, me_img.shape) 360 | warped_corrected_swap = correct_colours(me_img, warped_swap, me_landmarks) 361 | 362 | output_im = me_img * (1.0 - combined_mask) + warped_corrected_swap * combined_mask 363 | cv2.imwrite("swap_output.png", output_im) 364 | out = cv2.imread("swap_output.png", 1) 365 | cv2.imshow("Swap Output", out) 366 | 367 | 368 | def main(): 369 | argparser = argparse.ArgumentParser() 370 | argparser.add_argument("--filter", type=str, default="glasses") 371 | argparser.add_argument("--footage", type=str, default=None) 372 | argparser.add_argument("--show-bounds", action="store_true") 373 | argparser.add_argument("--video-source", type=int, default=0, help="Video input device number") 374 | 375 | args = argparser.parse_args() 376 | 377 | cam = create_capture(args.video_source) 378 | should_show_bounds = False 379 | 380 | footage = cv2.imread(args.footage, -1) 381 | 382 | if args.filter == "face": 383 | swap_img_landmarks = get_landmarks(footage) 384 | 385 | try: 386 | while True: 387 | with TimeProfiler(args.filter): 388 | if "glasses" == args.filter: 389 | glasses_filter(cam, footage, args.show_bounds) 390 | elif "moustache" == args.filter: 391 | moustache_filter(cam, footage, args.show_bounds) 392 | elif "face" in args: 393 | face_swap_filter(cam, footage, swap_img_landmarks) 394 | 395 | if 0xFF & cv2.waitKey(30) == 27: 396 | break 397 | 398 | except KeyboardInterrupt: 399 | pass 400 | 401 | cv2.destroyAllWindows() 402 | 403 | if __name__ == '__main__': 404 | logging.basicConfig(level=logging.DEBUG) 405 | main() 406 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | dlib 2 | opencv-python 3 | numpy -------------------------------------------------------------------------------- /video.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | def create_capture(source=0): 5 | source = str(source).strip() 6 | chunks = source.split(':') 7 | 8 | if len(chunks) > 1 and len(chunks[0]) == 1 and chunks[0].isalpha(): 9 | chunks[1] = chunks[0] + ':' + chunks[1] 10 | del chunks[0] 11 | 12 | source = int(chunks[0]) 13 | return cv2.VideoCapture(source) 14 | --------------------------------------------------------------------------------