├── dataset ├── extract_full.py ├── mesh_stabilization.py ├── one_euro_filter.py └── points.py ├── lipsync3d ├── .ipynb_checkpoints │ ├── combine-audioDVP-checkpoint.ipynb │ ├── combine-checkpoint.ipynb │ └── combine-half_texture-checkpoint.ipynb ├── __init__.py ├── __pycache__ │ ├── audio.cpython-38.pyc │ ├── dataset.cpython-38.pyc │ ├── hparams.cpython-38.pyc │ ├── loss.cpython-38.pyc │ ├── model.cpython-38.pyc │ ├── options.cpython-38.pyc │ ├── utils.cpython-36.pyc │ └── utils.cpython-38.pyc ├── audio.py ├── audiodvp_utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── util.cpython-36.pyc │ │ ├── util.cpython-38.pyc │ │ └── visualizer.cpython-38.pyc │ ├── audio.py │ ├── build_nfr_dataset.py │ ├── crop_portrait-checkpoint.py │ ├── crop_portrait.py │ ├── face_detection │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── api.cpython-38.pyc │ │ │ ├── models.cpython-38.pyc │ │ │ └── utils.cpython-38.pyc │ │ ├── api.py │ │ ├── detection │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── core.cpython-38.pyc │ │ │ ├── core.py │ │ │ └── sfd │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── bbox.cpython-38.pyc │ │ │ │ ├── detect.cpython-38.pyc │ │ │ │ ├── net_s3fd.cpython-38.pyc │ │ │ │ └── sfd_detector.cpython-38.pyc │ │ │ │ ├── bbox.py │ │ │ │ ├── detect.py │ │ │ │ ├── net_s3fd.py │ │ │ │ └── sfd_detector.py │ │ ├── models.py │ │ └── utils.py │ ├── hparams.py │ ├── rescale_image.py │ ├── util-checkpoint.py │ ├── util.py │ └── visualizer.py ├── combine-audioDVP.ipynb ├── combine-half_texture.ipynb ├── combine.ipynb ├── dataset.py ├── demo.sh ├── face_mesh.ipynb ├── hparams.py ├── loss.py ├── model.py ├── options.py ├── pose_normalization.py ├── test.py ├── train.py └── utils.py └── make_video.py /dataset/extract_full.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import argparse 3 | import os 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument('--target_video', type=str, required=True) 7 | args = parser.parse_args() 8 | 9 | if __name__ == "__main__": 10 | os.makedirs('full', exist_ok=True) 11 | cap = cv2.VideoCapture(args.target_video) 12 | count = 0 13 | 14 | while(cap.isOpened()): 15 | ret, frame = cap.read() 16 | if ret: 17 | cv2.imwrite(os.path.join('full', '{}.png'.format(count)), frame) 18 | count += 1 19 | else: 20 | break 21 | 22 | cap.release() 23 | -------------------------------------------------------------------------------- /dataset/mesh_stabilization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from one_euro_filter import OneEuroFilter 3 | import os 4 | from natsort import natsorted 5 | import math 6 | import cv2 7 | import torch 8 | from points import topology, mouthPoints, chins, rest 9 | 10 | def applyFilter(points, t, min_cutoff, beta, skipPoints = []): 11 | filtered = np.empty_like(points) 12 | filtered[0] = points[0] 13 | one_euro_filter = OneEuroFilter(t[0], points[0], min_cutoff, beta) 14 | 15 | for i in range(1, points.shape[0]): 16 | filtered[i] = one_euro_filter(t[i], points[i]) 17 | 18 | for i in range(1, points.shape[0]): 19 | for skipPoint in skipPoints: 20 | filtered[i, skipPoint] = points[i, skipPoint] 21 | 22 | return filtered 23 | 24 | def draw_image(count, point): 25 | white_image = np.ones((256, 256, 3), np.uint8) * 255 26 | 27 | for start, end in topology: 28 | start_point = point[start,:2] 29 | end_point = point[end,:2] 30 | cv2.line(white_image, start_point.astype(int), end_point.astype(int), (0,0,0), 1) 31 | 32 | cv2.imwrite('test/{}.jpg'.format(count), white_image) 33 | 34 | if __name__ == '__main__': 35 | image_height = 256 36 | image_width = 256 37 | 38 | normalised_mesh_files = natsorted([os.path.join('mesh_dict', x) for x in os.listdir(os.path.join('mesh_dict'))]) 39 | landmarks = [] 40 | for file in normalised_mesh_files: 41 | landmark = torch.load(file) 42 | R = landmark['R'] 43 | t = landmark['t'] 44 | c = landmark['c'] 45 | keys = natsorted([x for x in landmark.keys() if type(x) is int]) 46 | vertices = [] 47 | for key in keys: 48 | vertice = np.array(landmark[key]).reshape(3,1) 49 | norm_vertice = (c * np.matmul(R, vertice) + t).squeeze() 50 | x_px = min(math.floor(norm_vertice[0]), image_width - 1) 51 | y_px = min(math.floor(norm_vertice[1]), image_height - 1) 52 | z_px = min(math.floor(norm_vertice[2]), image_width - 1) 53 | vertices.append([x_px, y_px, z_px]) 54 | landmarks.append(vertices) 55 | 56 | landmarks = np.array(landmarks) 57 | 58 | shape_1, shape_2, shape_3 = landmarks.shape 59 | 60 | xs = landmarks[:,:,0].reshape((shape_1, shape_2)) 61 | ys = landmarks[:,:,1].reshape((shape_1, shape_2)) 62 | zs = landmarks[:,:,2].reshape((shape_1, shape_2)) 63 | 64 | fps = 25 65 | t = np.linspace(0, xs.shape[0]/fps, xs.shape[0]) 66 | 67 | xs_hat = applyFilter(xs, t, 0.005, 0.7) 68 | ys_hat = applyFilter(ys, t, 0.005, 0.7, mouthPoints + chins) 69 | ys_hat = applyFilter(ys_hat, t, 0.000001, 1.5, rest) 70 | zs_hat = applyFilter(zs, t, 0.005, 0.7) 71 | combine = np.stack(((xs_hat, ys_hat, zs_hat)), axis=2) 72 | 73 | count = [i for i in range(combine.shape[0])] 74 | 75 | os.makedirs(os.path.join('stabilized_norm_mesh'),exist_ok=True) 76 | for i in range(combine.shape[0]): 77 | torch.save(combine[i], os.path.join('stabilized_norm_mesh', '{}.pt'.format(count[i]))) 78 | -------------------------------------------------------------------------------- /dataset/one_euro_filter.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def smoothing_factor(t_e, cutoff): 5 | r = 2 * math.pi * cutoff * t_e 6 | return r / (r + 1) 7 | 8 | 9 | def exponential_smoothing(a, x, x_prev): 10 | return a * x + (1 - a) * x_prev 11 | 12 | 13 | class OneEuroFilter: 14 | def __init__(self, t0, x0, dx0=0.0, min_cutoff=1.0, beta=0.0, 15 | d_cutoff=1.0): 16 | """Initialize the one euro filter.""" 17 | # The parameters. 18 | self.min_cutoff = float(min_cutoff) 19 | self.beta = float(beta) 20 | self.d_cutoff = float(d_cutoff) 21 | # Previous values. 22 | self.x_prev = x0 23 | self.dx_prev = dx0 24 | self.t_prev = float(t0) 25 | 26 | def __call__(self, t, x): 27 | """Compute the filtered signal.""" 28 | t_e = t - self.t_prev 29 | 30 | # The filtered derivative of the signal. 31 | a_d = smoothing_factor(t_e, self.d_cutoff) 32 | dx = (x - self.x_prev) / t_e 33 | dx_hat = exponential_smoothing(a_d, dx, self.dx_prev) 34 | 35 | # The filtered signal. 36 | cutoff = self.min_cutoff + self.beta * abs(dx_hat) 37 | a = smoothing_factor(t_e, cutoff) 38 | x_hat = exponential_smoothing(a, x, self.x_prev) 39 | 40 | # Memorize the previous values. 41 | self.x_prev = x_hat 42 | self.dx_prev = dx_hat 43 | self.t_prev = t 44 | 45 | return x_hat 46 | -------------------------------------------------------------------------------- /dataset/points.py: -------------------------------------------------------------------------------- 1 | topology = [ 2 | (127, 34), (34, 139), (139, 127), (11, 0), (0, 37), (37, 11), 3 | (232, 231), (231, 120), (120, 232), (72, 37), (37, 39), (39, 72), 4 | (128, 121), (121, 47), (47, 128), (232, 121), (121, 128), (128, 232), 5 | (104, 69), (69, 67), (67, 104), (175, 171), (171, 148), (148, 175), 6 | (118, 50), (50, 101), (101, 118), (73, 39), (39, 40), (40, 73), 7 | (9, 151), (151, 108), (108, 9), (48, 115), (115, 131), (131, 48), 8 | (194, 204), (204, 211), (211, 194), (74, 40), (40, 185), (185, 74), 9 | (80, 42), (42, 183), (183, 80), (40, 92), (92, 186), (186, 40), 10 | (230, 229), (229, 118), (118, 230), (202, 212), (212, 214), (214, 202), 11 | (83, 18), (18, 17), (17, 83), (76, 61), (61, 146), (146, 76), 12 | (160, 29), (29, 30), (30, 160), (56, 157), (157, 173), (173, 56), 13 | (106, 204), (204, 194), (194, 106), (135, 214), (214, 192), (192, 135), 14 | (203, 165), (165, 98), (98, 203), (21, 71), (71, 68), (68, 21), 15 | (51, 45), (45, 4), (4, 51), (144, 24), (24, 23), (23, 144), 16 | (77, 146), (146, 91), (91, 77), (205, 50), (50, 187), (187, 205), 17 | (201, 200), (200, 18), (18, 201), (91, 106), (106, 182), (182, 91), 18 | (90, 91), (91, 181), (181, 90), (85, 84), (84, 17), (17, 85), 19 | (206, 203), (203, 36), (36, 206), (148, 171), (171, 140), (140, 148), 20 | (92, 40), (40, 39), (39, 92), (193, 189), (189, 244), (244, 193), 21 | (159, 158), (158, 28), (28, 159), (247, 246), (246, 161), (161, 247), 22 | (236, 3), (3, 196), (196, 236), (54, 68), (68, 104), (104, 54), 23 | (193, 168), (168, 8), (8, 193), (117, 228), (228, 31), (31, 117), 24 | (189, 193), (193, 55), (55, 189), (98, 97), (97, 99), (99, 98), 25 | (126, 47), (47, 100), (100, 126), (166, 79), (79, 218), (218, 166), 26 | (155, 154), (154, 26), (26, 155), (209, 49), (49, 131), (131, 209), 27 | (135, 136), (136, 150), (150, 135), (47, 126), (126, 217), (217, 47), 28 | (223, 52), (52, 53), (53, 223), (45, 51), (51, 134), (134, 45), 29 | (211, 170), (170, 140), (140, 211), (67, 69), (69, 108), (108, 67), 30 | (43, 106), (106, 91), (91, 43), (230, 119), (119, 120), (120, 230), 31 | (226, 130), (130, 247), (247, 226), (63, 53), (53, 52), (52, 63), 32 | (238, 20), (20, 242), (242, 238), (46, 70), (70, 156), (156, 46), 33 | (78, 62), (62, 96), (96, 78), (46, 53), (53, 63), (63, 46), 34 | (143, 34), (34, 227), (227, 143), (123, 117), (117, 111), (111, 123), 35 | (44, 125), (125, 19), (19, 44), (236, 134), (134, 51), (51, 236), 36 | (216, 206), (206, 205), (205, 216), (154, 153), (153, 22), (22, 154), 37 | (39, 37), (37, 167), (167, 39), (200, 201), (201, 208), (208, 200), 38 | (36, 142), (142, 100), (100, 36), (57, 212), (212, 202), (202, 57), 39 | (20, 60), (60, 99), (99, 20), (28, 158), (158, 157), (157, 28), 40 | (35, 226), (226, 113), (113, 35), (160, 159), (159, 27), (27, 160), 41 | (204, 202), (202, 210), (210, 204), (113, 225), (225, 46), (46, 113), 42 | (43, 202), (202, 204), (204, 43), (62, 76), (76, 77), (77, 62), 43 | (137, 123), (123, 116), (116, 137), (41, 38), (38, 72), (72, 41), 44 | (203, 129), (129, 142), (142, 203), (64, 98), (98, 240), (240, 64), 45 | (49, 102), (102, 64), (64, 49), (41, 73), (73, 74), (74, 41), 46 | (212, 216), (216, 207), (207, 212), (42, 74), (74, 184), (184, 42), 47 | (169, 170), (170, 211), (211, 169), (170, 149), (149, 176), (176, 170), 48 | (105, 66), (66, 69), (69, 105), (122, 6), (6, 168), (168, 122), 49 | (123, 147), (147, 187), (187, 123), (96, 77), (77, 90), (90, 96), 50 | (65, 55), (55, 107), (107, 65), (89, 90), (90, 180), (180, 89), 51 | (101, 100), (100, 120), (120, 101), (63, 105), (105, 104), (104, 63), 52 | (93, 137), (137, 227), (227, 93), (15, 86), (86, 85), (85, 15), 53 | (129, 102), (102, 49), (49, 129), (14, 87), (87, 86), (86, 14), 54 | (55, 8), (8, 9), (9, 55), (100, 47), (47, 121), (121, 100), 55 | (145, 23), (23, 22), (22, 145), (88, 89), (89, 179), (179, 88), 56 | (6, 122), (122, 196), (196, 6), (88, 95), (95, 96), (96, 88), 57 | (138, 172), (172, 136), (136, 138), (215, 58), (58, 172), (172, 215), 58 | (115, 48), (48, 219), (219, 115), (42, 80), (80, 81), (81, 42), 59 | (195, 3), (3, 51), (51, 195), (43, 146), (146, 61), (61, 43), 60 | (171, 175), (175, 199), (199, 171), (81, 82), (82, 38), (38, 81), 61 | (53, 46), (46, 225), (225, 53), (144, 163), (163, 110), (110, 144), 62 | (52, 65), (65, 66), (66, 52), (229, 228), (228, 117), (117, 229), 63 | (34, 127), (127, 234), (234, 34), (107, 108), (108, 69), (69, 107), 64 | (109, 108), (108, 151), (151, 109), (48, 64), (64, 235), (235, 48), 65 | (62, 78), (78, 191), (191, 62), (129, 209), (209, 126), (126, 129), 66 | (111, 35), (35, 143), (143, 111), (117, 123), (123, 50), (50, 117), 67 | (222, 65), (65, 52), (52, 222), (19, 125), (125, 141), (141, 19), 68 | (221, 55), (55, 65), (65, 221), (3, 195), (195, 197), (197, 3), 69 | (25, 7), (7, 33), (33, 25), (220, 237), (237, 44), (44, 220), 70 | (70, 71), (71, 139), (139, 70), (122, 193), (193, 245), (245, 122), 71 | (247, 130), (130, 33), (33, 247), (71, 21), (21, 162), (162, 71), 72 | (170, 169), (169, 150), (150, 170), (188, 174), (174, 196), (196, 188), 73 | (216, 186), (186, 92), (92, 216), (2, 97), (97, 167), (167, 2), 74 | (141, 125), (125, 241), (241, 141), (164, 167), (167, 37), (37, 164), 75 | (72, 38), (38, 12), (12, 72), (38, 82), (82, 13), (13, 38), 76 | (63, 68), (68, 71), (71, 63), (226, 35), (35, 111), (111, 226), 77 | (101, 50), (50, 205), (205, 101), (206, 92), (92, 165), (165, 206), 78 | (209, 198), (198, 217), (217, 209), (165, 167), (167, 97), (97, 165), 79 | (220, 115), (115, 218), (218, 220), (133, 112), (112, 243), (243, 133), 80 | (239, 238), (238, 241), (241, 239), (214, 135), (135, 169), (169, 214), 81 | (190, 173), (173, 133), (133, 190), (171, 208), (208, 32), (32, 171), 82 | (125, 44), (44, 237), (237, 125), (86, 87), (87, 178), (178, 86), 83 | (85, 86), (86, 179), (179, 85), (84, 85), (85, 180), (180, 84), 84 | (83, 84), (84, 181), (181, 83), (201, 83), (83, 182), (182, 201), 85 | (137, 93), (93, 132), (132, 137), (76, 62), (62, 183), (183, 76), 86 | (61, 76), (76, 184), (184, 61), (57, 61), (61, 185), (185, 57), 87 | (212, 57), (57, 186), (186, 212), (214, 207), (207, 187), (187, 214), 88 | (34, 143), (143, 156), (156, 34), (79, 239), (239, 237), (237, 79), 89 | (123, 137), (137, 177), (177, 123), (44, 1), (1, 4), (4, 44), 90 | (201, 194), (194, 32), (32, 201), (64, 102), (102, 129), (129, 64), 91 | (213, 215), (215, 138), (138, 213), (59, 166), (166, 219), (219, 59), 92 | (242, 99), (99, 97), (97, 242), (2, 94), (94, 141), (141, 2), 93 | (75, 59), (59, 235), (235, 75), (24, 110), (110, 228), (228, 24), 94 | (25, 130), (130, 226), (226, 25), (23, 24), (24, 229), (229, 23), 95 | (22, 23), (23, 230), (230, 22), (26, 22), (22, 231), (231, 26), 96 | (112, 26), (26, 232), (232, 112), (189, 190), (190, 243), (243, 189), 97 | (221, 56), (56, 190), (190, 221), (28, 56), (56, 221), (221, 28), 98 | (27, 28), (28, 222), (222, 27), (29, 27), (27, 223), (223, 29), 99 | (30, 29), (29, 224), (224, 30), (247, 30), (30, 225), (225, 247), 100 | (238, 79), (79, 20), (20, 238), (166, 59), (59, 75), (75, 166), 101 | (60, 75), (75, 240), (240, 60), (147, 177), (177, 215), (215, 147), 102 | (20, 79), (79, 166), (166, 20), (187, 147), (147, 213), (213, 187), 103 | (112, 233), (233, 244), (244, 112), (233, 128), (128, 245), (245, 233), 104 | (128, 114), (114, 188), (188, 128), (114, 217), (217, 174), (174, 114), 105 | (131, 115), (115, 220), (220, 131), (217, 198), (198, 236), (236, 217), 106 | (198, 131), (131, 134), (134, 198), (177, 132), (132, 58), (58, 177), 107 | (143, 35), (35, 124), (124, 143), (110, 163), (163, 7), (7, 110), 108 | (228, 110), (110, 25), (25, 228), (356, 389), (389, 368), (368, 356), 109 | (11, 302), (302, 267), (267, 11), (452, 350), (350, 349), (349, 452), 110 | (302, 303), (303, 269), (269, 302), (357, 343), (343, 277), (277, 357), 111 | (452, 453), (453, 357), (357, 452), (333, 332), (332, 297), (297, 333), 112 | (175, 152), (152, 377), (377, 175), (347, 348), (348, 330), (330, 347), 113 | (303, 304), (304, 270), (270, 303), (9, 336), (336, 337), (337, 9), 114 | (278, 279), (279, 360), (360, 278), (418, 262), (262, 431), (431, 418), 115 | (304, 408), (408, 409), (409, 304), (310, 415), (415, 407), (407, 310), 116 | (270, 409), (409, 410), (410, 270), (450, 348), (348, 347), (347, 450), 117 | (422, 430), (430, 434), (434, 422), (313, 314), (314, 17), (17, 313), 118 | (306, 307), (307, 375), (375, 306), (387, 388), (388, 260), (260, 387), 119 | (286, 414), (414, 398), (398, 286), (335, 406), (406, 418), (418, 335), 120 | (364, 367), (367, 416), (416, 364), (423, 358), (358, 327), (327, 423), 121 | (251, 284), (284, 298), (298, 251), (281, 5), (5, 4), (4, 281), 122 | (373, 374), (374, 253), (253, 373), (307, 320), (320, 321), (321, 307), 123 | (425, 427), (427, 411), (411, 425), (421, 313), (313, 18), (18, 421), 124 | (321, 405), (405, 406), (406, 321), (320, 404), (404, 405), (405, 320), 125 | (315, 16), (16, 17), (17, 315), (426, 425), (425, 266), (266, 426), 126 | (377, 400), (400, 369), (369, 377), (322, 391), (391, 269), (269, 322), 127 | (417, 465), (465, 464), (464, 417), (386, 257), (257, 258), (258, 386), 128 | (466, 260), (260, 388), (388, 466), (456, 399), (399, 419), (419, 456), 129 | (284, 332), (332, 333), (333, 284), (417, 285), (285, 8), (8, 417), 130 | (346, 340), (340, 261), (261, 346), (413, 441), (441, 285), (285, 413), 131 | (327, 460), (460, 328), (328, 327), (355, 371), (371, 329), (329, 355), 132 | (392, 439), (439, 438), (438, 392), (382, 341), (341, 256), (256, 382), 133 | (429, 420), (420, 360), (360, 429), (364, 394), (394, 379), (379, 364), 134 | (277, 343), (343, 437), (437, 277), (443, 444), (444, 283), (283, 443), 135 | (275, 440), (440, 363), (363, 275), (431, 262), (262, 369), (369, 431), 136 | (297, 338), (338, 337), (337, 297), (273, 375), (375, 321), (321, 273), 137 | (450, 451), (451, 349), (349, 450), (446, 342), (342, 467), (467, 446), 138 | (293, 334), (334, 282), (282, 293), (458, 461), (461, 462), (462, 458), 139 | (276, 353), (353, 383), (383, 276), (308, 324), (324, 325), (325, 308), 140 | (276, 300), (300, 293), (293, 276), (372, 345), (345, 447), (447, 372), 141 | (352, 345), (345, 340), (340, 352), (274, 1), (1, 19), (19, 274), 142 | (456, 248), (248, 281), (281, 456), (436, 427), (427, 425), (425, 436), 143 | (381, 256), (256, 252), (252, 381), (269, 391), (391, 393), (393, 269), 144 | (200, 199), (199, 428), (428, 200), (266, 330), (330, 329), (329, 266), 145 | (287, 273), (273, 422), (422, 287), (250, 462), (462, 328), (328, 250), 146 | (258, 286), (286, 384), (384, 258), (265, 353), (353, 342), (342, 265), 147 | (387, 259), (259, 257), (257, 387), (424, 431), (431, 430), (430, 424), 148 | (342, 353), (353, 276), (276, 342), (273, 335), (335, 424), (424, 273), 149 | (292, 325), (325, 307), (307, 292), (366, 447), (447, 345), (345, 366), 150 | (271, 303), (303, 302), (302, 271), (423, 266), (266, 371), (371, 423), 151 | (294, 455), (455, 460), (460, 294), (279, 278), (278, 294), (294, 279), 152 | (271, 272), (272, 304), (304, 271), (432, 434), (434, 427), (427, 432), 153 | (272, 407), (407, 408), (408, 272), (394, 430), (430, 431), (431, 394), 154 | (395, 369), (369, 400), (400, 395), (334, 333), (333, 299), (299, 334), 155 | (351, 417), (417, 168), (168, 351), (352, 280), (280, 411), (411, 352), 156 | (325, 319), (319, 320), (320, 325), (295, 296), (296, 336), (336, 295), 157 | (319, 403), (403, 404), (404, 319), (330, 348), (348, 349), (349, 330), 158 | (293, 298), (298, 333), (333, 293), (323, 454), (454, 447), (447, 323), 159 | (15, 16), (16, 315), (315, 15), (358, 429), (429, 279), (279, 358), 160 | (14, 15), (15, 316), (316, 14), (285, 336), (336, 9), (9, 285), 161 | (329, 349), (349, 350), (350, 329), (374, 380), (380, 252), (252, 374), 162 | (318, 402), (402, 403), (403, 318), (6, 197), (197, 419), (419, 6), 163 | (318, 319), (319, 325), (325, 318), (367, 364), (364, 365), (365, 367), 164 | (435, 367), (367, 397), (397, 435), (344, 438), (438, 439), (439, 344), 165 | (272, 271), (271, 311), (311, 272), (195, 5), (5, 281), (281, 195), 166 | (273, 287), (287, 291), (291, 273), (396, 428), (428, 199), (199, 396), 167 | (311, 271), (271, 268), (268, 311), (283, 444), (444, 445), (445, 283), 168 | (373, 254), (254, 339), (339, 373), (282, 334), (334, 296), (296, 282), 169 | (449, 347), (347, 346), (346, 449), (264, 447), (447, 454), (454, 264), 170 | (336, 296), (296, 299), (299, 336), (338, 10), (10, 151), (151, 338), 171 | (278, 439), (439, 455), (455, 278), (292, 407), (407, 415), (415, 292), 172 | (358, 371), (371, 355), (355, 358), (340, 345), (345, 372), (372, 340), 173 | (346, 347), (347, 280), (280, 346), (442, 443), (443, 282), (282, 442), 174 | (19, 94), (94, 370), (370, 19), (441, 442), (442, 295), (295, 441), 175 | (248, 419), (419, 197), (197, 248), (263, 255), (255, 359), (359, 263), 176 | (440, 275), (275, 274), (274, 440), (300, 383), (383, 368), (368, 300), 177 | (351, 412), (412, 465), (465, 351), (263, 467), (467, 466), (466, 263), 178 | (301, 368), (368, 389), (389, 301), (395, 378), (378, 379), (379, 395), 179 | (412, 351), (351, 419), (419, 412), (436, 426), (426, 322), (322, 436), 180 | (2, 164), (164, 393), (393, 2), (370, 462), (462, 461), (461, 370), 181 | (164, 0), (0, 267), (267, 164), (302, 11), (11, 12), (12, 302), 182 | (268, 12), (12, 13), (13, 268), (293, 300), (300, 301), (301, 293), 183 | (446, 261), (261, 340), (340, 446), (330, 266), (266, 425), (425, 330), 184 | (426, 423), (423, 391), (391, 426), (429, 355), (355, 437), (437, 429), 185 | (391, 327), (327, 326), (326, 391), (440, 457), (457, 438), (438, 440), 186 | (341, 382), (382, 362), (362, 341), (459, 457), (457, 461), (461, 459), 187 | (434, 430), (430, 394), (394, 434), (414, 463), (463, 362), (362, 414), 188 | (396, 369), (369, 262), (262, 396), (354, 461), (461, 457), (457, 354), 189 | (316, 403), (403, 402), (402, 316), (315, 404), (404, 403), (403, 315), 190 | (314, 405), (405, 404), (404, 314), (313, 406), (406, 405), (405, 313), 191 | (421, 418), (418, 406), (406, 421), (366, 401), (401, 361), (361, 366), 192 | (306, 408), (408, 407), (407, 306), (291, 409), (409, 408), (408, 291), 193 | (287, 410), (410, 409), (409, 287), (432, 436), (436, 410), (410, 432), 194 | (434, 416), (416, 411), (411, 434), (264, 368), (368, 383), (383, 264), 195 | (309, 438), (438, 457), (457, 309), (352, 376), (376, 401), (401, 352), 196 | (274, 275), (275, 4), (4, 274), (421, 428), (428, 262), (262, 421), 197 | (294, 327), (327, 358), (358, 294), (433, 416), (416, 367), (367, 433), 198 | (289, 455), (455, 439), (439, 289), (462, 370), (370, 326), (326, 462), 199 | (2, 326), (326, 370), (370, 2), (305, 460), (460, 455), (455, 305), 200 | (254, 449), (449, 448), (448, 254), (255, 261), (261, 446), (446, 255), 201 | (253, 450), (450, 449), (449, 253), (252, 451), (451, 450), (450, 252), 202 | (256, 452), (452, 451), (451, 256), (341, 453), (453, 452), (452, 341), 203 | (413, 464), (464, 463), (463, 413), (441, 413), (413, 414), (414, 441), 204 | (258, 442), (442, 441), (441, 258), (257, 443), (443, 442), (442, 257), 205 | (259, 444), (444, 443), (443, 259), (260, 445), (445, 444), (444, 260), 206 | (467, 342), (342, 445), (445, 467), (459, 458), (458, 250), (250, 459), 207 | (289, 392), (392, 290), (290, 289), (290, 328), (328, 460), (460, 290), 208 | (376, 433), (433, 435), (435, 376), (250, 290), (290, 392), (392, 250), 209 | (411, 416), (416, 433), (433, 411), (341, 463), (463, 464), (464, 341), 210 | (453, 464), (464, 465), (465, 453), (357, 465), (465, 412), (412, 357), 211 | (343, 412), (412, 399), (399, 343), (360, 363), (363, 440), (440, 360), 212 | (437, 399), (399, 456), (456, 437), (420, 456), (456, 363), (363, 420), 213 | (401, 435), (435, 288), (288, 401), (372, 383), (383, 353), (353, 372), 214 | (339, 255), (255, 249), (249, 339), (448, 261), (261, 255), (255, 448), 215 | (133, 243), (243, 190), (190, 133), (133, 155), (155, 112), (112, 133), 216 | (33, 246), (246, 247), (247, 33), (33, 130), (130, 25), (25, 33), 217 | (398, 384), (384, 286), (286, 398), (362, 398), (398, 414), (414, 362), 218 | (362, 463), (463, 341), (341, 362), (263, 359), (359, 467), (467, 263), 219 | (263, 249), (249, 255), (255, 263), (466, 467), (467, 260), (260, 466), 220 | (75, 60), (60, 166), (166, 75), (238, 239), (239, 79), (79, 238), 221 | (162, 127), (127, 139), (139, 162), (72, 11), (11, 37), (37, 72), 222 | (121, 232), (232, 120), (120, 121), (73, 72), (72, 39), (39, 73), 223 | (114, 128), (128, 47), (47, 114), (233, 232), (232, 128), (128, 233), 224 | (103, 104), (104, 67), (67, 103), (152, 175), (175, 148), (148, 152), 225 | (119, 118), (118, 101), (101, 119), (74, 73), (73, 40), (40, 74), 226 | (107, 9), (9, 108), (108, 107), (49, 48), (48, 131), (131, 49), 227 | (32, 194), (194, 211), (211, 32), (184, 74), (74, 185), (185, 184), 228 | (191, 80), (80, 183), (183, 191), (185, 40), (40, 186), (186, 185), 229 | (119, 230), (230, 118), (118, 119), (210, 202), (202, 214), (214, 210), 230 | (84, 83), (83, 17), (17, 84), (77, 76), (76, 146), (146, 77), 231 | (161, 160), (160, 30), (30, 161), (190, 56), (56, 173), (173, 190), 232 | (182, 106), (106, 194), (194, 182), (138, 135), (135, 192), (192, 138), 233 | (129, 203), (203, 98), (98, 129), (54, 21), (21, 68), (68, 54), 234 | (5, 51), (51, 4), (4, 5), (145, 144), (144, 23), (23, 145), 235 | (90, 77), (77, 91), (91, 90), (207, 205), (205, 187), (187, 207), 236 | (83, 201), (201, 18), (18, 83), (181, 91), (91, 182), (182, 181), 237 | (180, 90), (90, 181), (181, 180), (16, 85), (85, 17), (17, 16), 238 | (205, 206), (206, 36), (36, 205), (176, 148), (148, 140), (140, 176), 239 | (165, 92), (92, 39), (39, 165), (245, 193), (193, 244), (244, 245), 240 | (27, 159), (159, 28), (28, 27), (30, 247), (247, 161), (161, 30), 241 | (174, 236), (236, 196), (196, 174), (103, 54), (54, 104), (104, 103), 242 | (55, 193), (193, 8), (8, 55), (111, 117), (117, 31), (31, 111), 243 | (221, 189), (189, 55), (55, 221), (240, 98), (98, 99), (99, 240), 244 | (142, 126), (126, 100), (100, 142), (219, 166), (166, 218), (218, 219), 245 | (112, 155), (155, 26), (26, 112), (198, 209), (209, 131), (131, 198), 246 | (169, 135), (135, 150), (150, 169), (114, 47), (47, 217), (217, 114), 247 | (224, 223), (223, 53), (53, 224), (220, 45), (45, 134), (134, 220), 248 | (32, 211), (211, 140), (140, 32), (109, 67), (67, 108), (108, 109), 249 | (146, 43), (43, 91), (91, 146), (231, 230), (230, 120), (120, 231), 250 | (113, 226), (226, 247), (247, 113), (105, 63), (63, 52), (52, 105), 251 | (241, 238), (238, 242), (242, 241), (124, 46), (46, 156), (156, 124), 252 | (95, 78), (78, 96), (96, 95), (70, 46), (46, 63), (63, 70), 253 | (116, 143), (143, 227), (227, 116), (116, 123), (123, 111), (111, 116), 254 | (1, 44), (44, 19), (19, 1), (3, 236), (236, 51), (51, 3), 255 | (207, 216), (216, 205), (205, 207), (26, 154), (154, 22), (22, 26), 256 | (165, 39), (39, 167), (167, 165), (199, 200), (200, 208), (208, 199), 257 | (101, 36), (36, 100), (100, 101), (43, 57), (57, 202), (202, 43), 258 | (242, 20), (20, 99), (99, 242), (56, 28), (28, 157), (157, 56), 259 | (124, 35), (35, 113), (113, 124), (29, 160), (160, 27), (27, 29), 260 | (211, 204), (204, 210), (210, 211), (124, 113), (113, 46), (46, 124), 261 | (106, 43), (43, 204), (204, 106), (96, 62), (62, 77), (77, 96), 262 | (227, 137), (137, 116), (116, 227), (73, 41), (41, 72), (72, 73), 263 | (36, 203), (203, 142), (142, 36), (235, 64), (64, 240), (240, 235), 264 | (48, 49), (49, 64), (64, 48), (42, 41), (41, 74), (74, 42), 265 | (214, 212), (212, 207), (207, 214), (183, 42), (42, 184), (184, 183), 266 | (210, 169), (169, 211), (211, 210), (140, 170), (170, 176), (176, 140), 267 | (104, 105), (105, 69), (69, 104), (193, 122), (122, 168), (168, 193), 268 | (50, 123), (123, 187), (187, 50), (89, 96), (96, 90), (90, 89), 269 | (66, 65), (65, 107), (107, 66), (179, 89), (89, 180), (180, 179), 270 | (119, 101), (101, 120), (120, 119), (68, 63), (63, 104), (104, 68), 271 | (234, 93), (93, 227), (227, 234), (16, 15), (15, 85), (85, 16), 272 | (209, 129), (129, 49), (49, 209), (15, 14), (14, 86), (86, 15), 273 | (107, 55), (55, 9), (9, 107), (120, 100), (100, 121), (121, 120), 274 | (153, 145), (145, 22), (22, 153), (178, 88), (88, 179), (179, 178), 275 | (197, 6), (6, 196), (196, 197), (89, 88), (88, 96), (96, 89), 276 | (135, 138), (138, 136), (136, 135), (138, 215), (215, 172), (172, 138), 277 | (218, 115), (115, 219), (219, 218), (41, 42), (42, 81), (81, 41), 278 | (5, 195), (195, 51), (51, 5), (57, 43), (43, 61), (61, 57), 279 | (208, 171), (171, 199), (199, 208), (41, 81), (81, 38), (38, 41), 280 | (224, 53), (53, 225), (225, 224), (24, 144), (144, 110), (110, 24), 281 | (105, 52), (52, 66), (66, 105), (118, 229), (229, 117), (117, 118), 282 | (227, 34), (34, 234), (234, 227), (66, 107), (107, 69), (69, 66), 283 | (10, 109), (109, 151), (151, 10), (219, 48), (48, 235), (235, 219), 284 | (183, 62), (62, 191), (191, 183), (142, 129), (129, 126), (126, 142), 285 | (116, 111), (111, 143), (143, 116), (118, 117), (117, 50), (50, 118), 286 | (223, 222), (222, 52), (52, 223), (94, 19), (19, 141), (141, 94), 287 | (222, 221), (221, 65), (65, 222), (196, 3), (3, 197), (197, 196), 288 | (45, 220), (220, 44), (44, 45), (156, 70), (70, 139), (139, 156), 289 | (188, 122), (122, 245), (245, 188), (139, 71), (71, 162), (162, 139), 290 | (149, 170), (170, 150), (150, 149), (122, 188), (188, 196), (196, 122), 291 | (206, 216), (216, 92), (92, 206), (164, 2), (2, 167), (167, 164), 292 | (242, 141), (141, 241), (241, 242), (0, 164), (164, 37), (37, 0), 293 | (11, 72), (72, 12), (12, 11), (12, 38), (38, 13), (13, 12), 294 | (70, 63), (63, 71), (71, 70), (31, 226), (226, 111), (111, 31), 295 | (36, 101), (101, 205), (205, 36), (203, 206), (206, 165), (165, 203), 296 | (126, 209), (209, 217), (217, 126), (98, 165), (165, 97), (97, 98), 297 | (237, 220), (220, 218), (218, 237), (237, 239), (239, 241), (241, 237), 298 | (210, 214), (214, 169), (169, 210), (140, 171), (171, 32), (32, 140), 299 | (241, 125), (125, 237), (237, 241), (179, 86), (86, 178), (178, 179), 300 | (180, 85), (85, 179), (179, 180), (181, 84), (84, 180), (180, 181), 301 | (182, 83), (83, 181), (181, 182), (194, 201), (201, 182), (182, 194), 302 | (177, 137), (137, 132), (132, 177), (184, 76), (76, 183), (183, 184), 303 | (185, 61), (61, 184), (184, 185), (186, 57), (57, 185), (185, 186), 304 | (216, 212), (212, 186), (186, 216), (192, 214), (214, 187), (187, 192), 305 | (139, 34), (34, 156), (156, 139), (218, 79), (79, 237), (237, 218), 306 | (147, 123), (123, 177), (177, 147), (45, 44), (44, 4), (4, 45), 307 | (208, 201), (201, 32), (32, 208), (98, 64), (64, 129), (129, 98), 308 | (192, 213), (213, 138), (138, 192), (235, 59), (59, 219), (219, 235), 309 | (141, 242), (242, 97), (97, 141), (97, 2), (2, 141), (141, 97), 310 | (240, 75), (75, 235), (235, 240), (229, 24), (24, 228), (228, 229), 311 | (31, 25), (25, 226), (226, 31), (230, 23), (23, 229), (229, 230), 312 | (231, 22), (22, 230), (230, 231), (232, 26), (26, 231), (231, 232), 313 | (233, 112), (112, 232), (232, 233), (244, 189), (189, 243), (243, 244), 314 | (189, 221), (221, 190), (190, 189), (222, 28), (28, 221), (221, 222), 315 | (223, 27), (27, 222), (222, 223), (224, 29), (29, 223), (223, 224), 316 | (225, 30), (30, 224), (224, 225), (113, 247), (247, 225), (225, 113), 317 | (99, 60), (60, 240), (240, 99), (213, 147), (147, 215), (215, 213), 318 | (60, 20), (20, 166), (166, 60), (192, 187), (187, 213), (213, 192), 319 | (243, 112), (112, 244), (244, 243), (244, 233), (233, 245), (245, 244), 320 | (245, 128), (128, 188), (188, 245), (188, 114), (114, 174), (174, 188), 321 | (134, 131), (131, 220), (220, 134), (174, 217), (217, 236), (236, 174), 322 | (236, 198), (198, 134), (134, 236), (215, 177), (177, 58), (58, 215), 323 | (156, 143), (143, 124), (124, 156), (25, 110), (110, 7), (7, 25), 324 | (31, 228), (228, 25), (25, 31), (264, 356), (356, 368), (368, 264), 325 | (0, 11), (11, 267), (267, 0), (451, 452), (452, 349), (349, 451), 326 | (267, 302), (302, 269), (269, 267), (350, 357), (357, 277), (277, 350), 327 | (350, 452), (452, 357), (357, 350), (299, 333), (333, 297), (297, 299), 328 | (396, 175), (175, 377), (377, 396), (280, 347), (347, 330), (330, 280), 329 | (269, 303), (303, 270), (270, 269), (151, 9), (9, 337), (337, 151), 330 | (344, 278), (278, 360), (360, 344), (424, 418), (418, 431), (431, 424), 331 | (270, 304), (304, 409), (409, 270), (272, 310), (310, 407), (407, 272), 332 | (322, 270), (270, 410), (410, 322), (449, 450), (450, 347), (347, 449), 333 | (432, 422), (422, 434), (434, 432), (18, 313), (313, 17), (17, 18), 334 | (291, 306), (306, 375), (375, 291), (259, 387), (387, 260), (260, 259), 335 | (424, 335), (335, 418), (418, 424), (434, 364), (364, 416), (416, 434), 336 | (391, 423), (423, 327), (327, 391), (301, 251), (251, 298), (298, 301), 337 | (275, 281), (281, 4), (4, 275), (254, 373), (373, 253), (253, 254), 338 | (375, 307), (307, 321), (321, 375), (280, 425), (425, 411), (411, 280), 339 | (200, 421), (421, 18), (18, 200), (335, 321), (321, 406), (406, 335), 340 | (321, 320), (320, 405), (405, 321), (314, 315), (315, 17), (17, 314), 341 | (423, 426), (426, 266), (266, 423), (396, 377), (377, 369), (369, 396), 342 | (270, 322), (322, 269), (269, 270), (413, 417), (417, 464), (464, 413), 343 | (385, 386), (386, 258), (258, 385), (248, 456), (456, 419), (419, 248), 344 | (298, 284), (284, 333), (333, 298), (168, 417), (417, 8), (8, 168), 345 | (448, 346), (346, 261), (261, 448), (417, 413), (413, 285), (285, 417), 346 | (326, 327), (327, 328), (328, 326), (277, 355), (355, 329), (329, 277), 347 | (309, 392), (392, 438), (438, 309), (381, 382), (382, 256), (256, 381), 348 | (279, 429), (429, 360), (360, 279), (365, 364), (364, 379), (379, 365), 349 | (355, 277), (277, 437), (437, 355), (282, 443), (443, 283), (283, 282), 350 | (281, 275), (275, 363), (363, 281), (395, 431), (431, 369), (369, 395), 351 | (299, 297), (297, 337), (337, 299), (335, 273), (273, 321), (321, 335), 352 | (348, 450), (450, 349), (349, 348), (359, 446), (446, 467), (467, 359), 353 | (283, 293), (293, 282), (282, 283), (250, 458), (458, 462), (462, 250), 354 | (300, 276), (276, 383), (383, 300), (292, 308), (308, 325), (325, 292), 355 | (283, 276), (276, 293), (293, 283), (264, 372), (372, 447), (447, 264), 356 | (346, 352), (352, 340), (340, 346), (354, 274), (274, 19), (19, 354), 357 | (363, 456), (456, 281), (281, 363), (426, 436), (436, 425), (425, 426), 358 | (380, 381), (381, 252), (252, 380), (267, 269), (269, 393), (393, 267), 359 | (421, 200), (200, 428), (428, 421), (371, 266), (266, 329), (329, 371), 360 | (432, 287), (287, 422), (422, 432), (290, 250), (250, 328), (328, 290), 361 | (385, 258), (258, 384), (384, 385), (446, 265), (265, 342), (342, 446), 362 | (386, 387), (387, 257), (257, 386), (422, 424), (424, 430), (430, 422), 363 | (445, 342), (342, 276), (276, 445), (422, 273), (273, 424), (424, 422), 364 | (306, 292), (292, 307), (307, 306), (352, 366), (366, 345), (345, 352), 365 | (268, 271), (271, 302), (302, 268), (358, 423), (423, 371), (371, 358), 366 | (327, 294), (294, 460), (460, 327), (331, 279), (279, 294), (294, 331), 367 | (303, 271), (271, 304), (304, 303), (436, 432), (432, 427), (427, 436), 368 | (304, 272), (272, 408), (408, 304), (395, 394), (394, 431), (431, 395), 369 | (378, 395), (395, 400), (400, 378), (296, 334), (334, 299), (299, 296), 370 | (6, 351), (351, 168), (168, 6), (376, 352), (352, 411), (411, 376), 371 | (307, 325), (325, 320), (320, 307), (285, 295), (295, 336), (336, 285), 372 | (320, 319), (319, 404), (404, 320), (329, 330), (330, 349), (349, 329), 373 | (334, 293), (293, 333), (333, 334), (366, 323), (323, 447), (447, 366), 374 | (316, 15), (15, 315), (315, 316), (331, 358), (358, 279), (279, 331), 375 | (317, 14), (14, 316), (316, 317), (8, 285), (285, 9), (9, 8), 376 | (277, 329), (329, 350), (350, 277), (253, 374), (374, 252), (252, 253), 377 | (319, 318), (318, 403), (403, 319), (351, 6), (6, 419), (419, 351), 378 | (324, 318), (318, 325), (325, 324), (397, 367), (367, 365), (365, 397), 379 | (288, 435), (435, 397), (397, 288), (278, 344), (344, 439), (439, 278), 380 | (310, 272), (272, 311), (311, 310), (248, 195), (195, 281), (281, 248), 381 | (375, 273), (273, 291), (291, 375), (175, 396), (396, 199), (199, 175), 382 | (312, 311), (311, 268), (268, 312), (276, 283), (283, 445), (445, 276), 383 | (390, 373), (373, 339), (339, 390), (295, 282), (282, 296), (296, 295), 384 | (448, 449), (449, 346), (346, 448), (356, 264), (264, 454), (454, 356), 385 | (337, 336), (336, 299), (299, 337), (337, 338), (338, 151), (151, 337), 386 | (294, 278), (278, 455), (455, 294), (308, 292), (292, 415), (415, 308), 387 | (429, 358), (358, 355), (355, 429), (265, 340), (340, 372), (372, 265), 388 | (352, 346), (346, 280), (280, 352), (295, 442), (442, 282), (282, 295), 389 | (354, 19), (19, 370), (370, 354), (285, 441), (441, 295), (295, 285), 390 | (195, 248), (248, 197), (197, 195), (457, 440), (440, 274), (274, 457), 391 | (301, 300), (300, 368), (368, 301), (417, 351), (351, 465), (465, 417), 392 | (251, 301), (301, 389), (389, 251), (394, 395), (395, 379), (379, 394), 393 | (399, 412), (412, 419), (419, 399), (410, 436), (436, 322), (322, 410), 394 | (326, 2), (2, 393), (393, 326), (354, 370), (370, 461), (461, 354), 395 | (393, 164), (164, 267), (267, 393), (268, 302), (302, 12), (12, 268), 396 | (312, 268), (268, 13), (13, 312), (298, 293), (293, 301), (301, 298), 397 | (265, 446), (446, 340), (340, 265), (280, 330), (330, 425), (425, 280), 398 | (322, 426), (426, 391), (391, 322), (420, 429), (429, 437), (437, 420), 399 | (393, 391), (391, 326), (326, 393), (344, 440), (440, 438), (438, 344), 400 | (458, 459), (459, 461), (461, 458), (364, 434), (434, 394), (394, 364), 401 | (428, 396), (396, 262), (262, 428), (274, 354), (354, 457), (457, 274), 402 | (317, 316), (316, 402), (402, 317), (316, 315), (315, 403), (403, 316), 403 | (315, 314), (314, 404), (404, 315), (314, 313), (313, 405), (405, 314), 404 | (313, 421), (421, 406), (406, 313), (323, 366), (366, 361), (361, 323), 405 | (292, 306), (306, 407), (407, 292), (306, 291), (291, 408), (408, 306), 406 | (291, 287), (287, 409), (409, 291), (287, 432), (432, 410), (410, 287), 407 | (427, 434), (434, 411), (411, 427), (372, 264), (264, 383), (383, 372), 408 | (459, 309), (309, 457), (457, 459), (366, 352), (352, 401), (401, 366), 409 | (1, 274), (274, 4), (4, 1), (418, 421), (421, 262), (262, 418), 410 | (331, 294), (294, 358), (358, 331), (435, 433), (433, 367), (367, 435), 411 | (392, 289), (289, 439), (439, 392), (328, 462), (462, 326), (326, 328), 412 | (94, 2), (2, 370), (370, 94), (289, 305), (305, 455), (455, 289), 413 | (339, 254), (254, 448), (448, 339), (359, 255), (255, 446), (446, 359), 414 | (254, 253), (253, 449), (449, 254), (253, 252), (252, 450), (450, 253), 415 | (252, 256), (256, 451), (451, 252), (256, 341), (341, 452), (452, 256), 416 | (414, 413), (413, 463), (463, 414), (286, 441), (441, 414), (414, 286), 417 | (286, 258), (258, 441), (441, 286), (258, 257), (257, 442), (442, 258), 418 | (257, 259), (259, 443), (443, 257), (259, 260), (260, 444), (444, 259), 419 | (260, 467), (467, 445), (445, 260), (309, 459), (459, 250), (250, 309), 420 | (305, 289), (289, 290), (290, 305), (305, 290), (290, 460), (460, 305), 421 | (401, 376), (376, 435), (435, 401), (309, 250), (250, 392), (392, 309), 422 | (376, 411), (411, 433), (433, 376), (453, 341), (341, 464), (464, 453), 423 | (357, 453), (453, 465), (465, 357), (343, 357), (357, 412), (412, 343), 424 | (437, 343), (343, 399), (399, 437), (344, 360), (360, 440), (440, 344), 425 | (420, 437), (437, 456), (456, 420), (360, 420), (420, 363), (363, 360), 426 | (361, 401), (401, 288), (288, 361), (265, 372), (372, 353), (353, 265), 427 | (390, 339), (339, 249), (249, 390), (339, 448), (448, 255), (255, 339)] 428 | 429 | mouthPoints = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 430 | 314, 17, 84, 181, 91, 146, 76, 184, 74, 73, 72, 11, 302, 303, 304, 431 | 408, 206, 307, 320, 404, 315, 16, 85, 180, 90, 77, 62, 183, 42, 432 | 41, 38, 12, 268, 271, 272, 407, 293, 325, 319, 403, 316, 15, 86, 433 | 179, 89, 96, 78, 191, 95, 80, 88, 81, 178, 82, 87, 13, 14, 312, 317, 434 | 311, 402, 310, 318, 415, 324, 308] 435 | 436 | chins = [93, 137, 123, 50, 205, 206, 165, 167, 164, 393, 391, 426, 425, 280, 352, 437 | 366, 323, 361, 401, 376, 411, 427, 436, 322, 92, 216, 207, 187, 147, 177, 438 | 132, 58, 215, 213, 192, 214, 212, 57, 186, 43, 106, 182, 83, 18, 313, 406, 439 | 335, 273, 287, 432, 434, 416, 433, 435, 288, 297, 367, 364, 365, 430, 394, 440 | 379, 422, 397, 424, 431, 395, 378, 418, 262, 369, 400, 421, 428, 396, 377, 441 | 200, 199, 175, 152, 201, 208, 171, 148, 194, 32, 140, 176, 204, 211, 170, 442 | 149, 202, 210, 169, 150, 135, 136, 138, 172] 443 | 444 | rest = [i for i in range(468) if i not in chins] -------------------------------------------------------------------------------- /lipsync3d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__init__.py -------------------------------------------------------------------------------- /lipsync3d/__pycache__/audio.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/audio.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/__pycache__/hparams.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/hparams.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/__pycache__/loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/loss.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/__pycache__/model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/model.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/__pycache__/options.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/options.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /lipsync3d/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audio.py: -------------------------------------------------------------------------------- 1 | import librosa 2 | import librosa.filters 3 | import numpy as np 4 | # import tensorflow as tf 5 | from scipy import signal 6 | from scipy.io import wavfile 7 | from hparams import hparams as hp 8 | 9 | def load_wav(path, sr): 10 | return librosa.core.load(path, sr=sr)[0] 11 | 12 | def save_wav(wav, path, sr): 13 | wav *= 32767 / max(0.01, np.max(np.abs(wav))) 14 | #proposed by @dsmiller 15 | wavfile.write(path, sr, wav.astype(np.int16)) 16 | 17 | def save_wavenet_wav(wav, path, sr): 18 | librosa.output.write_wav(path, wav, sr=sr) 19 | 20 | def preemphasis(wav, k, preemphasize=True): 21 | if preemphasize: 22 | return signal.lfilter([1, -k], [1], wav) 23 | return wav 24 | 25 | def inv_preemphasis(wav, k, inv_preemphasize=True): 26 | if inv_preemphasize: 27 | return signal.lfilter([1], [1, -k], wav) 28 | return wav 29 | 30 | def get_hop_size(): 31 | hop_size = hp.hop_size 32 | if hop_size is None: 33 | assert hp.frame_shift_ms is not None 34 | hop_size = int(hp.frame_shift_ms / 1000 * hp.sample_rate) 35 | return hop_size 36 | 37 | def linearspectrogram(wav): 38 | D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize)) 39 | S = _amp_to_db(np.abs(D)) - hp.ref_level_db 40 | 41 | if hp.signal_normalization: 42 | return _normalize(S) 43 | return S 44 | 45 | def melspectrogram(wav): 46 | D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize)) 47 | S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db 48 | 49 | if hp.signal_normalization: 50 | return _normalize(S) 51 | return S 52 | 53 | def _lws_processor(): 54 | import lws 55 | return lws.lws(hp.n_fft, get_hop_size(), fftsize=hp.win_size, mode="speech") 56 | 57 | def _stft(y): 58 | if hp.use_lws: 59 | return _lws_processor(hp).stft(y).T 60 | else: 61 | return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size) 62 | 63 | ########################################################## 64 | #Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!) 65 | def num_frames(length, fsize, fshift): 66 | """Compute number of time frames of spectrogram 67 | """ 68 | pad = (fsize - fshift) 69 | if length % fshift == 0: 70 | M = (length + pad * 2 - fsize) // fshift + 1 71 | else: 72 | M = (length + pad * 2 - fsize) // fshift + 2 73 | return M 74 | 75 | 76 | def pad_lr(x, fsize, fshift): 77 | """Compute left and right padding 78 | """ 79 | M = num_frames(len(x), fsize, fshift) 80 | pad = (fsize - fshift) 81 | T = len(x) + 2 * pad 82 | r = (M - 1) * fshift + fsize - T 83 | return pad, pad + r 84 | ########################################################## 85 | #Librosa correct padding 86 | def librosa_pad_lr(x, fsize, fshift): 87 | return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0] 88 | 89 | # Conversions 90 | _mel_basis = None 91 | 92 | def _linear_to_mel(spectogram): 93 | global _mel_basis 94 | if _mel_basis is None: 95 | _mel_basis = _build_mel_basis() 96 | return np.dot(_mel_basis, spectogram) 97 | 98 | def _build_mel_basis(): 99 | assert hp.fmax <= hp.sample_rate // 2 100 | return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, 101 | fmin=hp.fmin, fmax=hp.fmax) 102 | 103 | def _amp_to_db(x): 104 | min_level = np.exp(hp.min_level_db / 20 * np.log(10)) 105 | return 20 * np.log10(np.maximum(min_level, x)) 106 | 107 | def _db_to_amp(x): 108 | return np.power(10.0, (x) * 0.05) 109 | 110 | def _normalize(S): 111 | if hp.allow_clipping_in_normalization: 112 | if hp.symmetric_mels: 113 | return np.clip((2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value, 114 | -hp.max_abs_value, hp.max_abs_value) 115 | else: 116 | return np.clip(hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)), 0, hp.max_abs_value) 117 | 118 | assert S.max() <= 0 and S.min() - hp.min_level_db >= 0 119 | if hp.symmetric_mels: 120 | return (2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value 121 | else: 122 | return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)) 123 | 124 | def _denormalize(D): 125 | if hp.allow_clipping_in_normalization: 126 | if hp.symmetric_mels: 127 | return (((np.clip(D, -hp.max_abs_value, 128 | hp.max_abs_value) + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) 129 | + hp.min_level_db) 130 | else: 131 | return ((np.clip(D, 0, hp.max_abs_value) * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db) 132 | 133 | if hp.symmetric_mels: 134 | return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db) 135 | else: 136 | return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db) 137 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__init__.py -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/__pycache__/util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__pycache__/util.cpython-36.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/__pycache__/util.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__pycache__/util.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/__pycache__/visualizer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__pycache__/visualizer.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/audio.py: -------------------------------------------------------------------------------- 1 | import librosa 2 | import librosa.filters 3 | import numpy as np 4 | # import tensorflow as tf 5 | from scipy import signal 6 | from scipy.io import wavfile 7 | from .hparams import hparams as hp 8 | 9 | def load_wav(path, sr): 10 | return librosa.core.load(path, sr=sr)[0] 11 | 12 | def save_wav(wav, path, sr): 13 | wav *= 32767 / max(0.01, np.max(np.abs(wav))) 14 | #proposed by @dsmiller 15 | wavfile.write(path, sr, wav.astype(np.int16)) 16 | 17 | def save_wavenet_wav(wav, path, sr): 18 | librosa.output.write_wav(path, wav, sr=sr) 19 | 20 | def preemphasis(wav, k, preemphasize=True): 21 | if preemphasize: 22 | return signal.lfilter([1, -k], [1], wav) 23 | return wav 24 | 25 | def inv_preemphasis(wav, k, inv_preemphasize=True): 26 | if inv_preemphasize: 27 | return signal.lfilter([1], [1, -k], wav) 28 | return wav 29 | 30 | def get_hop_size(): 31 | hop_size = hp.hop_size 32 | if hop_size is None: 33 | assert hp.frame_shift_ms is not None 34 | hop_size = int(hp.frame_shift_ms / 1000 * hp.sample_rate) 35 | return hop_size 36 | 37 | def linearspectrogram(wav): 38 | D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize)) 39 | S = _amp_to_db(np.abs(D)) - hp.ref_level_db 40 | 41 | if hp.signal_normalization: 42 | return _normalize(S) 43 | return S 44 | 45 | def melspectrogram(wav): 46 | D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize)) 47 | S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db 48 | 49 | if hp.signal_normalization: 50 | return _normalize(S) 51 | return S 52 | 53 | def _lws_processor(): 54 | import lws 55 | return lws.lws(hp.n_fft, get_hop_size(), fftsize=hp.win_size, mode="speech") 56 | 57 | def _stft(y): 58 | if hp.use_lws: 59 | return _lws_processor(hp).stft(y).T 60 | else: 61 | return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size) 62 | 63 | ########################################################## 64 | #Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!) 65 | def num_frames(length, fsize, fshift): 66 | """Compute number of time frames of spectrogram 67 | """ 68 | pad = (fsize - fshift) 69 | if length % fshift == 0: 70 | M = (length + pad * 2 - fsize) // fshift + 1 71 | else: 72 | M = (length + pad * 2 - fsize) // fshift + 2 73 | return M 74 | 75 | 76 | def pad_lr(x, fsize, fshift): 77 | """Compute left and right padding 78 | """ 79 | M = num_frames(len(x), fsize, fshift) 80 | pad = (fsize - fshift) 81 | T = len(x) + 2 * pad 82 | r = (M - 1) * fshift + fsize - T 83 | return pad, pad + r 84 | ########################################################## 85 | #Librosa correct padding 86 | def librosa_pad_lr(x, fsize, fshift): 87 | return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0] 88 | 89 | # Conversions 90 | _mel_basis = None 91 | 92 | def _linear_to_mel(spectogram): 93 | global _mel_basis 94 | if _mel_basis is None: 95 | _mel_basis = _build_mel_basis() 96 | return np.dot(_mel_basis, spectogram) 97 | 98 | def _build_mel_basis(): 99 | assert hp.fmax <= hp.sample_rate // 2 100 | return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, 101 | fmin=hp.fmin, fmax=hp.fmax) 102 | 103 | def _amp_to_db(x): 104 | min_level = np.exp(hp.min_level_db / 20 * np.log(10)) 105 | return 20 * np.log10(np.maximum(min_level, x)) 106 | 107 | def _db_to_amp(x): 108 | return np.power(10.0, (x) * 0.05) 109 | 110 | def _normalize(S): 111 | if hp.allow_clipping_in_normalization: 112 | if hp.symmetric_mels: 113 | return np.clip((2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value, 114 | -hp.max_abs_value, hp.max_abs_value) 115 | else: 116 | return np.clip(hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)), 0, hp.max_abs_value) 117 | 118 | assert S.max() <= 0 and S.min() - hp.min_level_db >= 0 119 | if hp.symmetric_mels: 120 | return (2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value 121 | else: 122 | return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)) 123 | 124 | def _denormalize(D): 125 | if hp.allow_clipping_in_normalization: 126 | if hp.symmetric_mels: 127 | return (((np.clip(D, -hp.max_abs_value, 128 | hp.max_abs_value) + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) 129 | + hp.min_level_db) 130 | else: 131 | return ((np.clip(D, 0, hp.max_abs_value) * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db) 132 | 133 | if hp.symmetric_mels: 134 | return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db) 135 | else: 136 | return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db) 137 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/build_nfr_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Following https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/datasets/combine_A_and_B.py 3 | """ 4 | 5 | import os 6 | import cv2 7 | import numpy as np 8 | from tqdm import tqdm 9 | 10 | import sys 11 | sys.path.append(".") 12 | 13 | from models import networks 14 | from options.options import Options 15 | from audiodvp_utils.util import create_dir, load_coef, get_file_list, load_face_emb, get_max_crop_region 16 | from audiodvp_utils.rescale_image import rescale_and_paste 17 | 18 | 19 | if __name__ == '__main__': 20 | opt = Options().parse_args() 21 | 22 | create_dir(os.path.join(opt.data_dir, 'mask')) 23 | 24 | alpha_list = load_coef(os.path.join(opt.data_dir, 'alpha')) 25 | beta_list = load_coef(os.path.join(opt.data_dir, 'beta')) 26 | delta_list = load_coef(os.path.join(opt.data_dir, 'delta')) 27 | gamma_list = load_coef(os.path.join(opt.data_dir, 'gamma')) 28 | angle_list = load_coef(os.path.join(opt.data_dir, 'rotation')) 29 | translation_list = load_coef(os.path.join(opt.data_dir, 'translation')) 30 | face_emb_list = load_face_emb(opt.data_dir) 31 | 32 | crop_region_list = load_coef(os.path.join(opt.data_dir, 'crop_region')) 33 | full_image_list = get_file_list(os.path.join(opt.data_dir, 'full')) 34 | 35 | top, bottom, left, right = get_max_crop_region(crop_region_list) 36 | H, W, _ = cv2.imread(full_image_list[0]).shape 37 | mouth_mask = networks.MouthMask(opt).to(opt.device) 38 | 39 | for i in tqdm(range(len(alpha_list))): 40 | alpha = alpha_list[i].unsqueeze(0).cuda() 41 | beta = beta_list[i].unsqueeze(0).cuda() 42 | delta = delta_list[i].unsqueeze(0).cuda() 43 | gamma = gamma_list[i].unsqueeze(0).cuda() 44 | rotation = angle_list[i].unsqueeze(0).cuda() 45 | translation = translation_list[i].unsqueeze(0).cuda() 46 | face_emb = face_emb_list[i].unsqueeze(0).cuda() 47 | crop_region = crop_region_list[i] 48 | empty_image = np.zeros((H, W), np.uint8) 49 | 50 | mask = mouth_mask(alpha, delta, beta, gamma, rotation, translation, face_emb) 51 | mask = mask.squeeze(0).detach().cpu().permute(1, 2, 0).numpy() * 255.0 52 | mask = cv2.dilate(mask, np.ones((3,3), np.uint8), iterations=4) 53 | rescaled_mask = rescale_and_paste(crop_region, empty_image, mask) 54 | rescaled_mask = rescaled_mask[top:bottom, left:right] 55 | rescaled_mask = cv2.resize(rescaled_mask, (opt.image_width, opt.image_height), interpolation=cv2.INTER_AREA) 56 | 57 | cv2.imwrite(os.path.join(opt.data_dir, 'mask', '%05d.png' % (i+1)), rescaled_mask) 58 | 59 | create_dir(os.path.join(opt.data_dir, 'nfr', 'A', 'train')) 60 | create_dir(os.path.join(opt.data_dir, 'nfr', 'B', 'train')) 61 | 62 | masks = get_file_list(os.path.join(opt.data_dir, 'mask')) 63 | renders = get_file_list(os.path.join(opt.data_dir, 'render')) 64 | 65 | for i in tqdm(range(len(masks))): 66 | mask = cv2.imread(masks[i]) 67 | render = cv2.imread(renders[i]) 68 | full = cv2.imread(full_image_list[i]) 69 | crop_region = crop_region_list[i] 70 | 71 | empty_image = np.zeros((H, W, 3), np.uint8) 72 | 73 | rescaled_render = rescale_and_paste(crop_region, empty_image, render) 74 | rescaled_render = rescaled_render[top:bottom, left:right] 75 | rescaled_render = cv2.resize(rescaled_render, (opt.image_width, opt.image_height), interpolation=cv2.INTER_AREA) 76 | 77 | rescaled_crop = full[top:bottom, left:right] 78 | rescaled_crop = cv2.resize(rescaled_crop, (opt.image_width, opt.image_height), interpolation=cv2.INTER_AREA) 79 | 80 | masked_crop = cv2.bitwise_and(rescaled_crop, mask) 81 | masked_render = cv2.bitwise_and(rescaled_render, mask) 82 | 83 | cv2.imwrite(os.path.join(opt.data_dir, 'nfr', 'A', 'train', '%05d.png' % (i+1)), masked_crop) 84 | cv2.imwrite(os.path.join(opt.data_dir, 'nfr', 'B', 'train', '%05d.png' % (i+1)), masked_render) 85 | 86 | splits = os.listdir(os.path.join(opt.data_dir, 'nfr', 'A')) 87 | 88 | for sp in splits: 89 | image_fold_A = os.path.join(os.path.join(opt.data_dir, 'nfr', 'A'), sp) 90 | image_fold_B = os.path.join(os.path.join(opt.data_dir, 'nfr', 'B'), sp) 91 | image_list = os.listdir(image_fold_A) 92 | 93 | image_fold_AB = os.path.join(opt.data_dir, 'nfr', 'AB', sp) 94 | if not os.path.isdir(image_fold_AB): 95 | os.makedirs(image_fold_AB) 96 | 97 | for n in tqdm(range(len(image_list))): 98 | name_A = image_list[n] 99 | path_A = os.path.join(image_fold_A, name_A) 100 | 101 | name_B = name_A 102 | path_B = os.path.join(image_fold_B, name_B) 103 | 104 | if os.path.isfile(path_A) and os.path.isfile(path_B): 105 | name_AB = name_A 106 | path_AB = os.path.join(image_fold_AB, name_AB) 107 | im_A = cv2.imread(path_A, 1) # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR 108 | im_B = cv2.imread(path_B, 1) # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR 109 | im_AB = np.concatenate([im_A, im_B], 1) 110 | cv2.imwrite(path_AB, im_AB) 111 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/crop_portrait-checkpoint.py: -------------------------------------------------------------------------------- 1 | """ 2 | Crop upper boddy in every video frame, square bounding box is averaged among all frames and fixed. 3 | """ 4 | 5 | import os 6 | import cv2 7 | import argparse 8 | from tqdm import tqdm 9 | import face_recognition 10 | 11 | import util 12 | 13 | 14 | def calc_bbox(image_list, batch_size=5): 15 | """Batch infer of face location, batch_size should be factor of total frame number.""" 16 | top_sum = right_sum = bottom_sum = left_sum = 0 17 | 18 | for i in tqdm(range(len(image_list) // batch_size - batch_size)): 19 | image_batch = [] 20 | 21 | for j in range(i * batch_size, (i + 1) * batch_size): 22 | image = face_recognition.load_image_file(image_list[j]) 23 | image_batch.append(image) 24 | 25 | face_locations = face_recognition.batch_face_locations(image_batch, number_of_times_to_upsample=0, batch_size=batch_size) 26 | for face_location in face_locations: 27 | top, right, bottom, left = face_location[0] # assuming only one face detected in the frame 28 | top_sum += top 29 | right_sum += right 30 | bottom_sum += bottom 31 | left_sum += left 32 | 33 | return (top_sum // len(image_list), right_sum // len(image_list), bottom_sum // len(image_list), left_sum // len(image_list)) 34 | 35 | 36 | def crop_image(data_dir, dest_size, crop_level, vertical_adjust): 37 | image_list = util.get_file_list(os.path.join(data_dir, 'full')) 38 | top, right, bottom, left = calc_bbox(image_list) 39 | 40 | height = bottom - top 41 | width = right - left 42 | 43 | crop_size = int(height * crop_level) 44 | 45 | horizontal_delta = (crop_size - width) // 2 46 | left -= horizontal_delta 47 | right += horizontal_delta 48 | 49 | top = int(top * vertical_adjust) 50 | bottom = top + crop_size 51 | 52 | for i in tqdm(range(len(image_list))): 53 | image =cv2.imread(image_list[i]) 54 | image = image[top:bottom, left:right] 55 | 56 | image = cv2.resize(image, (dest_size, dest_size), interpolation=cv2.INTER_AREA) 57 | cv2.imwrite(os.path.join(args.data_dir, 'crop', os.path.basename(image_list[i])), image) 58 | 59 | 60 | if __name__ == '__main__': 61 | parser = argparse.ArgumentParser(description='Process some integers.') 62 | parser.add_argument('--data_dir', type=str, default=None) 63 | parser.add_argument('--dest_size', type=int, default=256) 64 | parser.add_argument('--crop_level', type=float, default=2.0, help='Adjust crop image size.') 65 | parser.add_argument('--vertical_adjust', type=float, default=0.3, help='Adjust vertical location of portrait in image.') 66 | args = parser.parse_args() 67 | crop_image(args.data_dir, dest_size=args.dest_size, crop_level=args.crop_level, vertical_adjust=args.vertical_adjust) 68 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/crop_portrait.py: -------------------------------------------------------------------------------- 1 | """ 2 | Crop upper boddy in every video frame, square bounding box is averaged among all frames and fixed. 3 | """ 4 | 5 | import os 6 | import cv2 7 | import argparse 8 | from tqdm import tqdm 9 | import face_recognition 10 | import torch 11 | import util 12 | import numpy as np 13 | import face_detection 14 | 15 | def calc_bbox(image_list, batch_size=5): 16 | """Batch infer of face location, batch_size should be factor of total frame number.""" 17 | fa = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device='cuda') 18 | 19 | top_best = 10000 20 | bottom_best = 0 21 | right_best = 0 22 | left_best = 10000 23 | 24 | for i in tqdm(range(len(image_list) // batch_size - batch_size)): 25 | image_batch = [] 26 | 27 | for j in range(i * batch_size, (i + 1) * batch_size): 28 | image = face_recognition.load_image_file(image_list[j]) 29 | image_batch.append(image) 30 | 31 | # face_locations = face_recognition.batch_face_locations(image_batch, number_of_times_to_upsample=0, batch_size=batch_size) 32 | preds = fa.get_detections_for_batch(np.asarray(image_batch)) 33 | 34 | for face_location in preds: 35 | left, top, right, bottom = face_location # assuming only one face detected in the frame 36 | if top_best > top: 37 | top_best = top 38 | if bottom_best < bottom: 39 | bottom_best = bottom 40 | if right_best < right: 41 | right_best = right 42 | if left_best > left: 43 | left_best = left 44 | 45 | return top_best, right_best, bottom_best, left_best 46 | 47 | 48 | def crop_image(data_dir, dest_size, crop_level, vertical_adjust): 49 | image_list = util.get_file_list(os.path.join(data_dir, 'full')) 50 | H, W, _ = face_recognition.load_image_file(image_list[0]).shape 51 | top, right, bottom, left = calc_bbox(image_list) 52 | height = bottom - top 53 | width = right - left 54 | 55 | crop_size = int(height * crop_level) 56 | 57 | horizontal_delta = (crop_size - width) // 2 58 | vertical_delta = (crop_size - height) // 2 59 | 60 | left = max(left - horizontal_delta, 0) 61 | right = min(right + horizontal_delta, W) 62 | 63 | top = max(top - int(vertical_delta * 0.5), 0) 64 | bottom = min(bottom + int(vertical_delta * 1.5), H) 65 | 66 | for i in tqdm(range(len(image_list))): 67 | image =cv2.imread(image_list[i]) 68 | image = image[top:bottom, left:right] 69 | 70 | image = cv2.resize(image, (dest_size, dest_size), interpolation=cv2.INTER_AREA) 71 | cv2.imwrite(os.path.join(args.data_dir, 'crop', os.path.basename(image_list[i])), image) 72 | torch.save([top, bottom, left, right], os.path.join(data_dir, 'crop_region', os.path.basename(image_list[i]))[:-4]+'.pt') 73 | 74 | 75 | 76 | def crop_per_image(data_dir, dest_size, crop_level): 77 | fa = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device='cuda') 78 | 79 | image_list = util.get_file_list(os.path.join(data_dir, 'full')) 80 | batch_size = 5 81 | frames = [] 82 | 83 | for i in tqdm(range(len(image_list))): 84 | frame = face_recognition.load_image_file(image_list[i]) 85 | frames.append(frame) 86 | 87 | H, W, _ = frames[0].shape 88 | 89 | batches = [frames[i:i + batch_size] for i in range(0, len(frames), batch_size)] 90 | 91 | for idx in tqdm(range(len(batches))): 92 | fb = batches[idx] 93 | preds = fa.get_detections_for_batch(np.asarray(fb)) 94 | 95 | for j, f in enumerate(preds): 96 | if f is None: 97 | print('no face in image {}'.format(idx * batch_size + j)) 98 | else: 99 | left, top, right, bottom = f 100 | 101 | 102 | height = bottom - top 103 | width = right - left 104 | crop_size = int(height * crop_level) 105 | 106 | horizontal_delta = (crop_size - width) // 2 107 | vertical_delta = (crop_size - height) // 2 108 | 109 | left = max(left - horizontal_delta, 0) 110 | right = min(right + horizontal_delta, W) 111 | top = max(top - int(vertical_delta * 0.5), 0) 112 | bottom = min(bottom + int(vertical_delta * 1.5), H) 113 | 114 | crop_f = cv2.imread(image_list[idx * batch_size + j]) 115 | crop_f = crop_f[top:bottom, left:right] 116 | crop_f = cv2.resize(crop_f, (dest_size, dest_size), interpolation=cv2.INTER_AREA) 117 | cv2.imwrite(os.path.join(data_dir, 'crop', os.path.basename(image_list[idx * batch_size + j])), crop_f) 118 | torch.save([top, bottom, left, right], os.path.join(data_dir, 'crop_region', os.path.basename(image_list[idx * batch_size + j]))[:-4]+'.pt') 119 | 120 | 121 | if __name__ == '__main__': 122 | parser = argparse.ArgumentParser(description='Process some integers.') 123 | parser.add_argument('--data_dir', type=str, default=None) 124 | parser.add_argument('--dest_size', type=int, default=256) 125 | parser.add_argument('--crop_level', type=float, default=2.0, help='Adjust crop image size.') 126 | parser.add_argument('--vertical_adjust', type=float, default=0.3, help='Adjust vertical location of portrait in image.') 127 | args = parser.parse_args() 128 | util.create_dir(os.path.join(args.data_dir,'crop')) 129 | util.create_dir(os.path.join(args.data_dir, 'crop_region')) 130 | # crop_per_image(args.data_dir, dest_size=args.dest_size, crop_level=args.crop_level) 131 | crop_image(args.data_dir, dest_size=args.dest_size, crop_level=args.crop_level, vertical_adjust=args.vertical_adjust) 132 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/README.md: -------------------------------------------------------------------------------- 1 | The code for Face Detection in this folder has been taken from the wonderful [face_alignment](https://github.com/1adrianb/face-alignment) repository. This has been modified to take batches of faces at a time. -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __author__ = """Adrian Bulat""" 4 | __email__ = 'adrian.bulat@nottingham.ac.uk' 5 | __version__ = '1.0.1' 6 | 7 | from .api import FaceAlignment, LandmarksType, NetworkSize 8 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/__pycache__/api.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/__pycache__/api.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/__pycache__/models.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/__pycache__/models.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/api.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.model_zoo import load_url 5 | from enum import Enum 6 | import numpy as np 7 | import cv2 8 | try: 9 | import urllib.request as request_file 10 | except BaseException: 11 | import urllib as request_file 12 | 13 | from .models import FAN, ResNetDepth 14 | from .utils import * 15 | 16 | 17 | class LandmarksType(Enum): 18 | """Enum class defining the type of landmarks to detect. 19 | 20 | ``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face 21 | ``_2halfD`` - this points represent the projection of the 3D points into 3D 22 | ``_3D`` - detect the points ``(x,y,z)``` in a 3D space 23 | 24 | """ 25 | _2D = 1 26 | _2halfD = 2 27 | _3D = 3 28 | 29 | 30 | class NetworkSize(Enum): 31 | # TINY = 1 32 | # SMALL = 2 33 | # MEDIUM = 3 34 | LARGE = 4 35 | 36 | def __new__(cls, value): 37 | member = object.__new__(cls) 38 | member._value_ = value 39 | return member 40 | 41 | def __int__(self): 42 | return self.value 43 | 44 | ROOT = os.path.dirname(os.path.abspath(__file__)) 45 | 46 | class FaceAlignment: 47 | def __init__(self, landmarks_type, network_size=NetworkSize.LARGE, 48 | device='cuda', flip_input=False, face_detector='sfd', verbose=False): 49 | self.device = device 50 | self.flip_input = flip_input 51 | self.landmarks_type = landmarks_type 52 | self.verbose = verbose 53 | 54 | network_size = int(network_size) 55 | 56 | if 'cuda' in device: 57 | torch.backends.cudnn.benchmark = True 58 | 59 | # Get the face detector 60 | face_detector_module = __import__('face_detection.detection.' + face_detector, 61 | globals(), locals(), [face_detector], 0) 62 | self.face_detector = face_detector_module.FaceDetector(device=device, verbose=verbose) 63 | 64 | def get_detections_for_batch(self, images): 65 | images = images[..., ::-1] 66 | detected_faces = self.face_detector.detect_from_batch(images.copy()) 67 | results = [] 68 | 69 | for i, d in enumerate(detected_faces): 70 | if len(d) == 0: 71 | results.append(None) 72 | continue 73 | d = d[0] 74 | d = np.clip(d, 0, None) 75 | 76 | x1, y1, x2, y2 = map(int, d[:-1]) 77 | results.append((x1, y1, x2, y2)) 78 | 79 | return results -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import FaceDetector -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/__pycache__/core.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/__pycache__/core.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/core.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import glob 3 | from tqdm import tqdm 4 | import numpy as np 5 | import torch 6 | import cv2 7 | 8 | 9 | class FaceDetector(object): 10 | """An abstract class representing a face detector. 11 | 12 | Any other face detection implementation must subclass it. All subclasses 13 | must implement ``detect_from_image``, that return a list of detected 14 | bounding boxes. Optionally, for speed considerations detect from path is 15 | recommended. 16 | """ 17 | 18 | def __init__(self, device, verbose): 19 | self.device = device 20 | self.verbose = verbose 21 | 22 | if verbose: 23 | if 'cpu' in device: 24 | logger = logging.getLogger(__name__) 25 | logger.warning("Detection running on CPU, this may be potentially slow.") 26 | 27 | if 'cpu' not in device and 'cuda' not in device: 28 | if verbose: 29 | logger.error("Expected values for device are: {cpu, cuda} but got: %s", device) 30 | raise ValueError 31 | 32 | def detect_from_image(self, tensor_or_path): 33 | """Detects faces in a given image. 34 | 35 | This function detects the faces present in a provided BGR(usually) 36 | image. The input can be either the image itself or the path to it. 37 | 38 | Arguments: 39 | tensor_or_path {numpy.ndarray, torch.tensor or string} -- the path 40 | to an image or the image itself. 41 | 42 | Example:: 43 | 44 | >>> path_to_image = 'data/image_01.jpg' 45 | ... detected_faces = detect_from_image(path_to_image) 46 | [A list of bounding boxes (x1, y1, x2, y2)] 47 | >>> image = cv2.imread(path_to_image) 48 | ... detected_faces = detect_from_image(image) 49 | [A list of bounding boxes (x1, y1, x2, y2)] 50 | 51 | """ 52 | raise NotImplementedError 53 | 54 | def detect_from_directory(self, path, extensions=['.jpg', '.png'], recursive=False, show_progress_bar=True): 55 | """Detects faces from all the images present in a given directory. 56 | 57 | Arguments: 58 | path {string} -- a string containing a path that points to the folder containing the images 59 | 60 | Keyword Arguments: 61 | extensions {list} -- list of string containing the extensions to be 62 | consider in the following format: ``.extension_name`` (default: 63 | {['.jpg', '.png']}) recursive {bool} -- option wherever to scan the 64 | folder recursively (default: {False}) show_progress_bar {bool} -- 65 | display a progressbar (default: {True}) 66 | 67 | Example: 68 | >>> directory = 'data' 69 | ... detected_faces = detect_from_directory(directory) 70 | {A dictionary of [lists containing bounding boxes(x1, y1, x2, y2)]} 71 | 72 | """ 73 | if self.verbose: 74 | logger = logging.getLogger(__name__) 75 | 76 | if len(extensions) == 0: 77 | if self.verbose: 78 | logger.error("Expected at list one extension, but none was received.") 79 | raise ValueError 80 | 81 | if self.verbose: 82 | logger.info("Constructing the list of images.") 83 | additional_pattern = '/**/*' if recursive else '/*' 84 | files = [] 85 | for extension in extensions: 86 | files.extend(glob.glob(path + additional_pattern + extension, recursive=recursive)) 87 | 88 | if self.verbose: 89 | logger.info("Finished searching for images. %s images found", len(files)) 90 | logger.info("Preparing to run the detection.") 91 | 92 | predictions = {} 93 | for image_path in tqdm(files, disable=not show_progress_bar): 94 | if self.verbose: 95 | logger.info("Running the face detector on image: %s", image_path) 96 | predictions[image_path] = self.detect_from_image(image_path) 97 | 98 | if self.verbose: 99 | logger.info("The detector was successfully run on all %s images", len(files)) 100 | 101 | return predictions 102 | 103 | @property 104 | def reference_scale(self): 105 | raise NotImplementedError 106 | 107 | @property 108 | def reference_x_shift(self): 109 | raise NotImplementedError 110 | 111 | @property 112 | def reference_y_shift(self): 113 | raise NotImplementedError 114 | 115 | @staticmethod 116 | def tensor_or_path_to_ndarray(tensor_or_path, rgb=True): 117 | """Convert path (represented as a string) or torch.tensor to a numpy.ndarray 118 | 119 | Arguments: 120 | tensor_or_path {numpy.ndarray, torch.tensor or string} -- path to the image, or the image itself 121 | """ 122 | if isinstance(tensor_or_path, str): 123 | return cv2.imread(tensor_or_path) if not rgb else cv2.imread(tensor_or_path)[..., ::-1] 124 | elif torch.is_tensor(tensor_or_path): 125 | # Call cpu in case its coming from cuda 126 | return tensor_or_path.cpu().numpy()[..., ::-1].copy() if not rgb else tensor_or_path.cpu().numpy() 127 | elif isinstance(tensor_or_path, np.ndarray): 128 | return tensor_or_path[..., ::-1].copy() if not rgb else tensor_or_path 129 | else: 130 | raise TypeError 131 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/sfd/__init__.py: -------------------------------------------------------------------------------- 1 | from .sfd_detector import SFDDetector as FaceDetector -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/bbox.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/bbox.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/detect.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/detect.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/net_s3fd.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/net_s3fd.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/sfd_detector.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/sfd_detector.cpython-38.pyc -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/sfd/bbox.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | import cv2 5 | import random 6 | import datetime 7 | import time 8 | import math 9 | import argparse 10 | import numpy as np 11 | import torch 12 | 13 | try: 14 | from iou import IOU 15 | except BaseException: 16 | # IOU cython speedup 10x 17 | def IOU(ax1, ay1, ax2, ay2, bx1, by1, bx2, by2): 18 | sa = abs((ax2 - ax1) * (ay2 - ay1)) 19 | sb = abs((bx2 - bx1) * (by2 - by1)) 20 | x1, y1 = max(ax1, bx1), max(ay1, by1) 21 | x2, y2 = min(ax2, bx2), min(ay2, by2) 22 | w = x2 - x1 23 | h = y2 - y1 24 | if w < 0 or h < 0: 25 | return 0.0 26 | else: 27 | return 1.0 * w * h / (sa + sb - w * h) 28 | 29 | 30 | def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh): 31 | xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1 32 | dx, dy = (xc - axc) / aww, (yc - ayc) / ahh 33 | dw, dh = math.log(ww / aww), math.log(hh / ahh) 34 | return dx, dy, dw, dh 35 | 36 | 37 | def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh): 38 | xc, yc = dx * aww + axc, dy * ahh + ayc 39 | ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh 40 | x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2 41 | return x1, y1, x2, y2 42 | 43 | 44 | def nms(dets, thresh): 45 | if 0 == len(dets): 46 | return [] 47 | x1, y1, x2, y2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4] 48 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | order = scores.argsort()[::-1] 50 | 51 | keep = [] 52 | while order.size > 0: 53 | i = order[0] 54 | keep.append(i) 55 | xx1, yy1 = np.maximum(x1[i], x1[order[1:]]), np.maximum(y1[i], y1[order[1:]]) 56 | xx2, yy2 = np.minimum(x2[i], x2[order[1:]]), np.minimum(y2[i], y2[order[1:]]) 57 | 58 | w, h = np.maximum(0.0, xx2 - xx1 + 1), np.maximum(0.0, yy2 - yy1 + 1) 59 | ovr = w * h / (areas[i] + areas[order[1:]] - w * h) 60 | 61 | inds = np.where(ovr <= thresh)[0] 62 | order = order[inds + 1] 63 | 64 | return keep 65 | 66 | 67 | def encode(matched, priors, variances): 68 | """Encode the variances from the priorbox layers into the ground truth boxes 69 | we have matched (based on jaccard overlap) with the prior boxes. 70 | Args: 71 | matched: (tensor) Coords of ground truth for each prior in point-form 72 | Shape: [num_priors, 4]. 73 | priors: (tensor) Prior boxes in center-offset form 74 | Shape: [num_priors,4]. 75 | variances: (list[float]) Variances of priorboxes 76 | Return: 77 | encoded boxes (tensor), Shape: [num_priors, 4] 78 | """ 79 | 80 | # dist b/t match center and prior's center 81 | g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2] 82 | # encode variance 83 | g_cxcy /= (variances[0] * priors[:, 2:]) 84 | # match wh / prior wh 85 | g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] 86 | g_wh = torch.log(g_wh) / variances[1] 87 | # return target for smooth_l1_loss 88 | return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] 89 | 90 | 91 | def decode(loc, priors, variances): 92 | """Decode locations from predictions using priors to undo 93 | the encoding we did for offset regression at train time. 94 | Args: 95 | loc (tensor): location predictions for loc layers, 96 | Shape: [num_priors,4] 97 | priors (tensor): Prior boxes in center-offset form. 98 | Shape: [num_priors,4]. 99 | variances: (list[float]) Variances of priorboxes 100 | Return: 101 | decoded bounding box predictions 102 | """ 103 | 104 | boxes = torch.cat(( 105 | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], 106 | priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) 107 | boxes[:, :2] -= boxes[:, 2:] / 2 108 | boxes[:, 2:] += boxes[:, :2] 109 | return boxes 110 | 111 | def batch_decode(loc, priors, variances): 112 | """Decode locations from predictions using priors to undo 113 | the encoding we did for offset regression at train time. 114 | Args: 115 | loc (tensor): location predictions for loc layers, 116 | Shape: [num_priors,4] 117 | priors (tensor): Prior boxes in center-offset form. 118 | Shape: [num_priors,4]. 119 | variances: (list[float]) Variances of priorboxes 120 | Return: 121 | decoded bounding box predictions 122 | """ 123 | 124 | boxes = torch.cat(( 125 | priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:], 126 | priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2) 127 | boxes[:, :, :2] -= boxes[:, :, 2:] / 2 128 | boxes[:, :, 2:] += boxes[:, :, :2] 129 | return boxes 130 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/sfd/detect.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | import os 5 | import sys 6 | import cv2 7 | import random 8 | import datetime 9 | import math 10 | import argparse 11 | import numpy as np 12 | 13 | import scipy.io as sio 14 | import zipfile 15 | from .net_s3fd import s3fd 16 | from .bbox import * 17 | 18 | 19 | def detect(net, img, device): 20 | img = img - np.array([104, 117, 123]) 21 | img = img.transpose(2, 0, 1) 22 | img = img.reshape((1,) + img.shape) 23 | 24 | if 'cuda' in device: 25 | torch.backends.cudnn.benchmark = True 26 | 27 | img = torch.from_numpy(img).float().to(device) 28 | BB, CC, HH, WW = img.size() 29 | with torch.no_grad(): 30 | olist = net(img) 31 | 32 | bboxlist = [] 33 | for i in range(len(olist) // 2): 34 | olist[i * 2] = F.softmax(olist[i * 2], dim=1) 35 | olist = [oelem.data.cpu() for oelem in olist] 36 | for i in range(len(olist) // 2): 37 | ocls, oreg = olist[i * 2], olist[i * 2 + 1] 38 | FB, FC, FH, FW = ocls.size() # feature map size 39 | stride = 2**(i + 2) # 4,8,16,32,64,128 40 | anchor = stride * 4 41 | poss = zip(*np.where(ocls[:, 1, :, :] > 0.05)) 42 | for Iindex, hindex, windex in poss: 43 | axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride 44 | score = ocls[0, 1, hindex, windex] 45 | loc = oreg[0, :, hindex, windex].contiguous().view(1, 4) 46 | priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]) 47 | variances = [0.1, 0.2] 48 | box = decode(loc, priors, variances) 49 | x1, y1, x2, y2 = box[0] * 1.0 50 | # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1) 51 | bboxlist.append([x1, y1, x2, y2, score]) 52 | bboxlist = np.array(bboxlist) 53 | if 0 == len(bboxlist): 54 | bboxlist = np.zeros((1, 5)) 55 | 56 | return bboxlist 57 | 58 | def batch_detect(net, imgs, device): 59 | imgs = imgs - np.array([104, 117, 123]) 60 | imgs = imgs.transpose(0, 3, 1, 2) 61 | 62 | if 'cuda' in device: 63 | torch.backends.cudnn.benchmark = True 64 | 65 | imgs = torch.from_numpy(imgs).float().to(device) 66 | BB, CC, HH, WW = imgs.size() 67 | with torch.no_grad(): 68 | olist = net(imgs) 69 | 70 | bboxlist = [] 71 | for i in range(len(olist) // 2): 72 | olist[i * 2] = F.softmax(olist[i * 2], dim=1) 73 | olist = [oelem.data.cpu() for oelem in olist] 74 | for i in range(len(olist) // 2): 75 | ocls, oreg = olist[i * 2], olist[i * 2 + 1] 76 | FB, FC, FH, FW = ocls.size() # feature map size 77 | stride = 2**(i + 2) # 4,8,16,32,64,128 78 | anchor = stride * 4 79 | poss = zip(*np.where(ocls[:, 1, :, :] > 0.05)) 80 | for Iindex, hindex, windex in poss: 81 | axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride 82 | score = ocls[:, 1, hindex, windex] 83 | loc = oreg[:, :, hindex, windex].contiguous().view(BB, 1, 4) 84 | priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]).view(1, 1, 4) 85 | variances = [0.1, 0.2] 86 | box = batch_decode(loc, priors, variances) 87 | box = box[:, 0] * 1.0 88 | # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1) 89 | bboxlist.append(torch.cat([box, score.unsqueeze(1)], 1).cpu().numpy()) 90 | bboxlist = np.array(bboxlist) 91 | if 0 == len(bboxlist): 92 | bboxlist = np.zeros((1, BB, 5)) 93 | 94 | return bboxlist 95 | 96 | def flip_detect(net, img, device): 97 | img = cv2.flip(img, 1) 98 | b = detect(net, img, device) 99 | 100 | bboxlist = np.zeros(b.shape) 101 | bboxlist[:, 0] = img.shape[1] - b[:, 2] 102 | bboxlist[:, 1] = b[:, 1] 103 | bboxlist[:, 2] = img.shape[1] - b[:, 0] 104 | bboxlist[:, 3] = b[:, 3] 105 | bboxlist[:, 4] = b[:, 4] 106 | return bboxlist 107 | 108 | 109 | def pts_to_bb(pts): 110 | min_x, min_y = np.min(pts, axis=0) 111 | max_x, max_y = np.max(pts, axis=0) 112 | return np.array([min_x, min_y, max_x, max_y]) 113 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/sfd/net_s3fd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class L2Norm(nn.Module): 7 | def __init__(self, n_channels, scale=1.0): 8 | super(L2Norm, self).__init__() 9 | self.n_channels = n_channels 10 | self.scale = scale 11 | self.eps = 1e-10 12 | self.weight = nn.Parameter(torch.Tensor(self.n_channels)) 13 | self.weight.data *= 0.0 14 | self.weight.data += self.scale 15 | 16 | def forward(self, x): 17 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps 18 | x = x / norm * self.weight.view(1, -1, 1, 1) 19 | return x 20 | 21 | 22 | class s3fd(nn.Module): 23 | def __init__(self): 24 | super(s3fd, self).__init__() 25 | self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1) 26 | self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1) 27 | 28 | self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1) 29 | self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) 30 | 31 | self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1) 32 | self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 33 | self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) 34 | 35 | self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1) 36 | self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) 37 | self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) 38 | 39 | self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) 40 | self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) 41 | self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1) 42 | 43 | self.fc6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=3) 44 | self.fc7 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0) 45 | 46 | self.conv6_1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) 47 | self.conv6_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1) 48 | 49 | self.conv7_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0) 50 | self.conv7_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1) 51 | 52 | self.conv3_3_norm = L2Norm(256, scale=10) 53 | self.conv4_3_norm = L2Norm(512, scale=8) 54 | self.conv5_3_norm = L2Norm(512, scale=5) 55 | 56 | self.conv3_3_norm_mbox_conf = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1) 57 | self.conv3_3_norm_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1) 58 | self.conv4_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1) 59 | self.conv4_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1) 60 | self.conv5_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1) 61 | self.conv5_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1) 62 | 63 | self.fc7_mbox_conf = nn.Conv2d(1024, 2, kernel_size=3, stride=1, padding=1) 64 | self.fc7_mbox_loc = nn.Conv2d(1024, 4, kernel_size=3, stride=1, padding=1) 65 | self.conv6_2_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1) 66 | self.conv6_2_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1) 67 | self.conv7_2_mbox_conf = nn.Conv2d(256, 2, kernel_size=3, stride=1, padding=1) 68 | self.conv7_2_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1) 69 | 70 | def forward(self, x): 71 | h = F.relu(self.conv1_1(x)) 72 | h = F.relu(self.conv1_2(h)) 73 | h = F.max_pool2d(h, 2, 2) 74 | 75 | h = F.relu(self.conv2_1(h)) 76 | h = F.relu(self.conv2_2(h)) 77 | h = F.max_pool2d(h, 2, 2) 78 | 79 | h = F.relu(self.conv3_1(h)) 80 | h = F.relu(self.conv3_2(h)) 81 | h = F.relu(self.conv3_3(h)) 82 | f3_3 = h 83 | h = F.max_pool2d(h, 2, 2) 84 | 85 | h = F.relu(self.conv4_1(h)) 86 | h = F.relu(self.conv4_2(h)) 87 | h = F.relu(self.conv4_3(h)) 88 | f4_3 = h 89 | h = F.max_pool2d(h, 2, 2) 90 | 91 | h = F.relu(self.conv5_1(h)) 92 | h = F.relu(self.conv5_2(h)) 93 | h = F.relu(self.conv5_3(h)) 94 | f5_3 = h 95 | h = F.max_pool2d(h, 2, 2) 96 | 97 | h = F.relu(self.fc6(h)) 98 | h = F.relu(self.fc7(h)) 99 | ffc7 = h 100 | h = F.relu(self.conv6_1(h)) 101 | h = F.relu(self.conv6_2(h)) 102 | f6_2 = h 103 | h = F.relu(self.conv7_1(h)) 104 | h = F.relu(self.conv7_2(h)) 105 | f7_2 = h 106 | 107 | f3_3 = self.conv3_3_norm(f3_3) 108 | f4_3 = self.conv4_3_norm(f4_3) 109 | f5_3 = self.conv5_3_norm(f5_3) 110 | 111 | cls1 = self.conv3_3_norm_mbox_conf(f3_3) 112 | reg1 = self.conv3_3_norm_mbox_loc(f3_3) 113 | cls2 = self.conv4_3_norm_mbox_conf(f4_3) 114 | reg2 = self.conv4_3_norm_mbox_loc(f4_3) 115 | cls3 = self.conv5_3_norm_mbox_conf(f5_3) 116 | reg3 = self.conv5_3_norm_mbox_loc(f5_3) 117 | cls4 = self.fc7_mbox_conf(ffc7) 118 | reg4 = self.fc7_mbox_loc(ffc7) 119 | cls5 = self.conv6_2_mbox_conf(f6_2) 120 | reg5 = self.conv6_2_mbox_loc(f6_2) 121 | cls6 = self.conv7_2_mbox_conf(f7_2) 122 | reg6 = self.conv7_2_mbox_loc(f7_2) 123 | 124 | # max-out background label 125 | chunk = torch.chunk(cls1, 4, 1) 126 | bmax = torch.max(torch.max(chunk[0], chunk[1]), chunk[2]) 127 | cls1 = torch.cat([bmax, chunk[3]], dim=1) 128 | 129 | return [cls1, reg1, cls2, reg2, cls3, reg3, cls4, reg4, cls5, reg5, cls6, reg6] 130 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/detection/sfd/sfd_detector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from torch.utils.model_zoo import load_url 4 | 5 | from ..core import FaceDetector 6 | 7 | from .net_s3fd import s3fd 8 | from .bbox import * 9 | from .detect import * 10 | 11 | models_urls = { 12 | 's3fd': 'https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth', 13 | } 14 | 15 | 16 | class SFDDetector(FaceDetector): 17 | def __init__(self, device, path_to_detector=os.path.join(os.path.dirname(os.path.abspath(__file__)), 's3fd.pth'), verbose=False): 18 | super(SFDDetector, self).__init__(device, verbose) 19 | 20 | # Initialise the face detector 21 | if not os.path.isfile(path_to_detector): 22 | model_weights = load_url(models_urls['s3fd']) 23 | else: 24 | model_weights = torch.load(path_to_detector) 25 | 26 | self.face_detector = s3fd() 27 | self.face_detector.load_state_dict(model_weights) 28 | self.face_detector.to(device) 29 | self.face_detector.eval() 30 | 31 | def detect_from_image(self, tensor_or_path): 32 | image = self.tensor_or_path_to_ndarray(tensor_or_path) 33 | 34 | bboxlist = detect(self.face_detector, image, device=self.device) 35 | keep = nms(bboxlist, 0.3) 36 | bboxlist = bboxlist[keep, :] 37 | bboxlist = [x for x in bboxlist if x[-1] > 0.5] 38 | 39 | return bboxlist 40 | 41 | def detect_from_batch(self, images): 42 | bboxlists = batch_detect(self.face_detector, images, device=self.device) 43 | keeps = [nms(bboxlists[:, i, :], 0.3) for i in range(bboxlists.shape[1])] 44 | bboxlists = [bboxlists[keep, i, :] for i, keep in enumerate(keeps)] 45 | bboxlists = [[x for x in bboxlist if x[-1] > 0.5] for bboxlist in bboxlists] 46 | 47 | return bboxlists 48 | 49 | @property 50 | def reference_scale(self): 51 | return 195 52 | 53 | @property 54 | def reference_x_shift(self): 55 | return 0 56 | 57 | @property 58 | def reference_y_shift(self): 59 | return 0 60 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | 6 | 7 | def conv3x3(in_planes, out_planes, strd=1, padding=1, bias=False): 8 | "3x3 convolution with padding" 9 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, 10 | stride=strd, padding=padding, bias=bias) 11 | 12 | 13 | class ConvBlock(nn.Module): 14 | def __init__(self, in_planes, out_planes): 15 | super(ConvBlock, self).__init__() 16 | self.bn1 = nn.BatchNorm2d(in_planes) 17 | self.conv1 = conv3x3(in_planes, int(out_planes / 2)) 18 | self.bn2 = nn.BatchNorm2d(int(out_planes / 2)) 19 | self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4)) 20 | self.bn3 = nn.BatchNorm2d(int(out_planes / 4)) 21 | self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4)) 22 | 23 | if in_planes != out_planes: 24 | self.downsample = nn.Sequential( 25 | nn.BatchNorm2d(in_planes), 26 | nn.ReLU(True), 27 | nn.Conv2d(in_planes, out_planes, 28 | kernel_size=1, stride=1, bias=False), 29 | ) 30 | else: 31 | self.downsample = None 32 | 33 | def forward(self, x): 34 | residual = x 35 | 36 | out1 = self.bn1(x) 37 | out1 = F.relu(out1, True) 38 | out1 = self.conv1(out1) 39 | 40 | out2 = self.bn2(out1) 41 | out2 = F.relu(out2, True) 42 | out2 = self.conv2(out2) 43 | 44 | out3 = self.bn3(out2) 45 | out3 = F.relu(out3, True) 46 | out3 = self.conv3(out3) 47 | 48 | out3 = torch.cat((out1, out2, out3), 1) 49 | 50 | if self.downsample is not None: 51 | residual = self.downsample(residual) 52 | 53 | out3 += residual 54 | 55 | return out3 56 | 57 | 58 | class Bottleneck(nn.Module): 59 | 60 | expansion = 4 61 | 62 | def __init__(self, inplanes, planes, stride=1, downsample=None): 63 | super(Bottleneck, self).__init__() 64 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 65 | self.bn1 = nn.BatchNorm2d(planes) 66 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 67 | padding=1, bias=False) 68 | self.bn2 = nn.BatchNorm2d(planes) 69 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 70 | self.bn3 = nn.BatchNorm2d(planes * 4) 71 | self.relu = nn.ReLU(inplace=True) 72 | self.downsample = downsample 73 | self.stride = stride 74 | 75 | def forward(self, x): 76 | residual = x 77 | 78 | out = self.conv1(x) 79 | out = self.bn1(out) 80 | out = self.relu(out) 81 | 82 | out = self.conv2(out) 83 | out = self.bn2(out) 84 | out = self.relu(out) 85 | 86 | out = self.conv3(out) 87 | out = self.bn3(out) 88 | 89 | if self.downsample is not None: 90 | residual = self.downsample(x) 91 | 92 | out += residual 93 | out = self.relu(out) 94 | 95 | return out 96 | 97 | 98 | class HourGlass(nn.Module): 99 | def __init__(self, num_modules, depth, num_features): 100 | super(HourGlass, self).__init__() 101 | self.num_modules = num_modules 102 | self.depth = depth 103 | self.features = num_features 104 | 105 | self._generate_network(self.depth) 106 | 107 | def _generate_network(self, level): 108 | self.add_module('b1_' + str(level), ConvBlock(self.features, self.features)) 109 | 110 | self.add_module('b2_' + str(level), ConvBlock(self.features, self.features)) 111 | 112 | if level > 1: 113 | self._generate_network(level - 1) 114 | else: 115 | self.add_module('b2_plus_' + str(level), ConvBlock(self.features, self.features)) 116 | 117 | self.add_module('b3_' + str(level), ConvBlock(self.features, self.features)) 118 | 119 | def _forward(self, level, inp): 120 | # Upper branch 121 | up1 = inp 122 | up1 = self._modules['b1_' + str(level)](up1) 123 | 124 | # Lower branch 125 | low1 = F.avg_pool2d(inp, 2, stride=2) 126 | low1 = self._modules['b2_' + str(level)](low1) 127 | 128 | if level > 1: 129 | low2 = self._forward(level - 1, low1) 130 | else: 131 | low2 = low1 132 | low2 = self._modules['b2_plus_' + str(level)](low2) 133 | 134 | low3 = low2 135 | low3 = self._modules['b3_' + str(level)](low3) 136 | 137 | up2 = F.interpolate(low3, scale_factor=2, mode='nearest') 138 | 139 | return up1 + up2 140 | 141 | def forward(self, x): 142 | return self._forward(self.depth, x) 143 | 144 | 145 | class FAN(nn.Module): 146 | 147 | def __init__(self, num_modules=1): 148 | super(FAN, self).__init__() 149 | self.num_modules = num_modules 150 | 151 | # Base part 152 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) 153 | self.bn1 = nn.BatchNorm2d(64) 154 | self.conv2 = ConvBlock(64, 128) 155 | self.conv3 = ConvBlock(128, 128) 156 | self.conv4 = ConvBlock(128, 256) 157 | 158 | # Stacking part 159 | for hg_module in range(self.num_modules): 160 | self.add_module('m' + str(hg_module), HourGlass(1, 4, 256)) 161 | self.add_module('top_m_' + str(hg_module), ConvBlock(256, 256)) 162 | self.add_module('conv_last' + str(hg_module), 163 | nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0)) 164 | self.add_module('bn_end' + str(hg_module), nn.BatchNorm2d(256)) 165 | self.add_module('l' + str(hg_module), nn.Conv2d(256, 166 | 68, kernel_size=1, stride=1, padding=0)) 167 | 168 | if hg_module < self.num_modules - 1: 169 | self.add_module( 170 | 'bl' + str(hg_module), nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0)) 171 | self.add_module('al' + str(hg_module), nn.Conv2d(68, 172 | 256, kernel_size=1, stride=1, padding=0)) 173 | 174 | def forward(self, x): 175 | x = F.relu(self.bn1(self.conv1(x)), True) 176 | x = F.avg_pool2d(self.conv2(x), 2, stride=2) 177 | x = self.conv3(x) 178 | x = self.conv4(x) 179 | 180 | previous = x 181 | 182 | outputs = [] 183 | for i in range(self.num_modules): 184 | hg = self._modules['m' + str(i)](previous) 185 | 186 | ll = hg 187 | ll = self._modules['top_m_' + str(i)](ll) 188 | 189 | ll = F.relu(self._modules['bn_end' + str(i)] 190 | (self._modules['conv_last' + str(i)](ll)), True) 191 | 192 | # Predict heatmaps 193 | tmp_out = self._modules['l' + str(i)](ll) 194 | outputs.append(tmp_out) 195 | 196 | if i < self.num_modules - 1: 197 | ll = self._modules['bl' + str(i)](ll) 198 | tmp_out_ = self._modules['al' + str(i)](tmp_out) 199 | previous = previous + ll + tmp_out_ 200 | 201 | return outputs 202 | 203 | 204 | class ResNetDepth(nn.Module): 205 | 206 | def __init__(self, block=Bottleneck, layers=[3, 8, 36, 3], num_classes=68): 207 | self.inplanes = 64 208 | super(ResNetDepth, self).__init__() 209 | self.conv1 = nn.Conv2d(3 + 68, 64, kernel_size=7, stride=2, padding=3, 210 | bias=False) 211 | self.bn1 = nn.BatchNorm2d(64) 212 | self.relu = nn.ReLU(inplace=True) 213 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 214 | self.layer1 = self._make_layer(block, 64, layers[0]) 215 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 216 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 217 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 218 | self.avgpool = nn.AvgPool2d(7) 219 | self.fc = nn.Linear(512 * block.expansion, num_classes) 220 | 221 | for m in self.modules(): 222 | if isinstance(m, nn.Conv2d): 223 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 224 | m.weight.data.normal_(0, math.sqrt(2. / n)) 225 | elif isinstance(m, nn.BatchNorm2d): 226 | m.weight.data.fill_(1) 227 | m.bias.data.zero_() 228 | 229 | def _make_layer(self, block, planes, blocks, stride=1): 230 | downsample = None 231 | if stride != 1 or self.inplanes != planes * block.expansion: 232 | downsample = nn.Sequential( 233 | nn.Conv2d(self.inplanes, planes * block.expansion, 234 | kernel_size=1, stride=stride, bias=False), 235 | nn.BatchNorm2d(planes * block.expansion), 236 | ) 237 | 238 | layers = [] 239 | layers.append(block(self.inplanes, planes, stride, downsample)) 240 | self.inplanes = planes * block.expansion 241 | for i in range(1, blocks): 242 | layers.append(block(self.inplanes, planes)) 243 | 244 | return nn.Sequential(*layers) 245 | 246 | def forward(self, x): 247 | x = self.conv1(x) 248 | x = self.bn1(x) 249 | x = self.relu(x) 250 | x = self.maxpool(x) 251 | 252 | x = self.layer1(x) 253 | x = self.layer2(x) 254 | x = self.layer3(x) 255 | x = self.layer4(x) 256 | 257 | x = self.avgpool(x) 258 | x = x.view(x.size(0), -1) 259 | x = self.fc(x) 260 | 261 | return x 262 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/face_detection/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | import time 5 | import torch 6 | import math 7 | import numpy as np 8 | import cv2 9 | 10 | 11 | def _gaussian( 12 | size=3, sigma=0.25, amplitude=1, normalize=False, width=None, 13 | height=None, sigma_horz=None, sigma_vert=None, mean_horz=0.5, 14 | mean_vert=0.5): 15 | # handle some defaults 16 | if width is None: 17 | width = size 18 | if height is None: 19 | height = size 20 | if sigma_horz is None: 21 | sigma_horz = sigma 22 | if sigma_vert is None: 23 | sigma_vert = sigma 24 | center_x = mean_horz * width + 0.5 25 | center_y = mean_vert * height + 0.5 26 | gauss = np.empty((height, width), dtype=np.float32) 27 | # generate kernel 28 | for i in range(height): 29 | for j in range(width): 30 | gauss[i][j] = amplitude * math.exp(-(math.pow((j + 1 - center_x) / ( 31 | sigma_horz * width), 2) / 2.0 + math.pow((i + 1 - center_y) / (sigma_vert * height), 2) / 2.0)) 32 | if normalize: 33 | gauss = gauss / np.sum(gauss) 34 | return gauss 35 | 36 | 37 | def draw_gaussian(image, point, sigma): 38 | # Check if the gaussian is inside 39 | ul = [math.floor(point[0] - 3 * sigma), math.floor(point[1] - 3 * sigma)] 40 | br = [math.floor(point[0] + 3 * sigma), math.floor(point[1] + 3 * sigma)] 41 | if (ul[0] > image.shape[1] or ul[1] > image.shape[0] or br[0] < 1 or br[1] < 1): 42 | return image 43 | size = 6 * sigma + 1 44 | g = _gaussian(size) 45 | g_x = [int(max(1, -ul[0])), int(min(br[0], image.shape[1])) - int(max(1, ul[0])) + int(max(1, -ul[0]))] 46 | g_y = [int(max(1, -ul[1])), int(min(br[1], image.shape[0])) - int(max(1, ul[1])) + int(max(1, -ul[1]))] 47 | img_x = [int(max(1, ul[0])), int(min(br[0], image.shape[1]))] 48 | img_y = [int(max(1, ul[1])), int(min(br[1], image.shape[0]))] 49 | assert (g_x[0] > 0 and g_y[1] > 0) 50 | image[img_y[0] - 1:img_y[1], img_x[0] - 1:img_x[1] 51 | ] = image[img_y[0] - 1:img_y[1], img_x[0] - 1:img_x[1]] + g[g_y[0] - 1:g_y[1], g_x[0] - 1:g_x[1]] 52 | image[image > 1] = 1 53 | return image 54 | 55 | 56 | def transform(point, center, scale, resolution, invert=False): 57 | """Generate and affine transformation matrix. 58 | 59 | Given a set of points, a center, a scale and a targer resolution, the 60 | function generates and affine transformation matrix. If invert is ``True`` 61 | it will produce the inverse transformation. 62 | 63 | Arguments: 64 | point {torch.tensor} -- the input 2D point 65 | center {torch.tensor or numpy.array} -- the center around which to perform the transformations 66 | scale {float} -- the scale of the face/object 67 | resolution {float} -- the output resolution 68 | 69 | Keyword Arguments: 70 | invert {bool} -- define wherever the function should produce the direct or the 71 | inverse transformation matrix (default: {False}) 72 | """ 73 | _pt = torch.ones(3) 74 | _pt[0] = point[0] 75 | _pt[1] = point[1] 76 | 77 | h = 200.0 * scale 78 | t = torch.eye(3) 79 | t[0, 0] = resolution / h 80 | t[1, 1] = resolution / h 81 | t[0, 2] = resolution * (-center[0] / h + 0.5) 82 | t[1, 2] = resolution * (-center[1] / h + 0.5) 83 | 84 | if invert: 85 | t = torch.inverse(t) 86 | 87 | new_point = (torch.matmul(t, _pt))[0:2] 88 | 89 | return new_point.int() 90 | 91 | 92 | def crop(image, center, scale, resolution=256.0): 93 | """Center crops an image or set of heatmaps 94 | 95 | Arguments: 96 | image {numpy.array} -- an rgb image 97 | center {numpy.array} -- the center of the object, usually the same as of the bounding box 98 | scale {float} -- scale of the face 99 | 100 | Keyword Arguments: 101 | resolution {float} -- the size of the output cropped image (default: {256.0}) 102 | 103 | Returns: 104 | [type] -- [description] 105 | """ # Crop around the center point 106 | """ Crops the image around the center. Input is expected to be an np.ndarray """ 107 | ul = transform([1, 1], center, scale, resolution, True) 108 | br = transform([resolution, resolution], center, scale, resolution, True) 109 | # pad = math.ceil(torch.norm((ul - br).float()) / 2.0 - (br[0] - ul[0]) / 2.0) 110 | if image.ndim > 2: 111 | newDim = np.array([br[1] - ul[1], br[0] - ul[0], 112 | image.shape[2]], dtype=np.int32) 113 | newImg = np.zeros(newDim, dtype=np.uint8) 114 | else: 115 | newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int) 116 | newImg = np.zeros(newDim, dtype=np.uint8) 117 | ht = image.shape[0] 118 | wd = image.shape[1] 119 | newX = np.array( 120 | [max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32) 121 | newY = np.array( 122 | [max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32) 123 | oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32) 124 | oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32) 125 | newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] 126 | ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :] 127 | newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), 128 | interpolation=cv2.INTER_LINEAR) 129 | return newImg 130 | 131 | 132 | def get_preds_fromhm(hm, center=None, scale=None): 133 | """Obtain (x,y) coordinates given a set of N heatmaps. If the center 134 | and the scale is provided the function will return the points also in 135 | the original coordinate frame. 136 | 137 | Arguments: 138 | hm {torch.tensor} -- the predicted heatmaps, of shape [B, N, W, H] 139 | 140 | Keyword Arguments: 141 | center {torch.tensor} -- the center of the bounding box (default: {None}) 142 | scale {float} -- face scale (default: {None}) 143 | """ 144 | max, idx = torch.max( 145 | hm.view(hm.size(0), hm.size(1), hm.size(2) * hm.size(3)), 2) 146 | idx += 1 147 | preds = idx.view(idx.size(0), idx.size(1), 1).repeat(1, 1, 2).float() 148 | preds[..., 0].apply_(lambda x: (x - 1) % hm.size(3) + 1) 149 | preds[..., 1].add_(-1).div_(hm.size(2)).floor_().add_(1) 150 | 151 | for i in range(preds.size(0)): 152 | for j in range(preds.size(1)): 153 | hm_ = hm[i, j, :] 154 | pX, pY = int(preds[i, j, 0]) - 1, int(preds[i, j, 1]) - 1 155 | if pX > 0 and pX < 63 and pY > 0 and pY < 63: 156 | diff = torch.FloatTensor( 157 | [hm_[pY, pX + 1] - hm_[pY, pX - 1], 158 | hm_[pY + 1, pX] - hm_[pY - 1, pX]]) 159 | preds[i, j].add_(diff.sign_().mul_(.25)) 160 | 161 | preds.add_(-.5) 162 | 163 | preds_orig = torch.zeros(preds.size()) 164 | if center is not None and scale is not None: 165 | for i in range(hm.size(0)): 166 | for j in range(hm.size(1)): 167 | preds_orig[i, j] = transform( 168 | preds[i, j], center, scale, hm.size(2), True) 169 | 170 | return preds, preds_orig 171 | 172 | def get_preds_fromhm_batch(hm, centers=None, scales=None): 173 | """Obtain (x,y) coordinates given a set of N heatmaps. If the centers 174 | and the scales is provided the function will return the points also in 175 | the original coordinate frame. 176 | 177 | Arguments: 178 | hm {torch.tensor} -- the predicted heatmaps, of shape [B, N, W, H] 179 | 180 | Keyword Arguments: 181 | centers {torch.tensor} -- the centers of the bounding box (default: {None}) 182 | scales {float} -- face scales (default: {None}) 183 | """ 184 | max, idx = torch.max( 185 | hm.view(hm.size(0), hm.size(1), hm.size(2) * hm.size(3)), 2) 186 | idx += 1 187 | preds = idx.view(idx.size(0), idx.size(1), 1).repeat(1, 1, 2).float() 188 | preds[..., 0].apply_(lambda x: (x - 1) % hm.size(3) + 1) 189 | preds[..., 1].add_(-1).div_(hm.size(2)).floor_().add_(1) 190 | 191 | for i in range(preds.size(0)): 192 | for j in range(preds.size(1)): 193 | hm_ = hm[i, j, :] 194 | pX, pY = int(preds[i, j, 0]) - 1, int(preds[i, j, 1]) - 1 195 | if pX > 0 and pX < 63 and pY > 0 and pY < 63: 196 | diff = torch.FloatTensor( 197 | [hm_[pY, pX + 1] - hm_[pY, pX - 1], 198 | hm_[pY + 1, pX] - hm_[pY - 1, pX]]) 199 | preds[i, j].add_(diff.sign_().mul_(.25)) 200 | 201 | preds.add_(-.5) 202 | 203 | preds_orig = torch.zeros(preds.size()) 204 | if centers is not None and scales is not None: 205 | for i in range(hm.size(0)): 206 | for j in range(hm.size(1)): 207 | preds_orig[i, j] = transform( 208 | preds[i, j], centers[i], scales[i], hm.size(2), True) 209 | 210 | return preds, preds_orig 211 | 212 | def shuffle_lr(parts, pairs=None): 213 | """Shuffle the points left-right according to the axis of symmetry 214 | of the object. 215 | 216 | Arguments: 217 | parts {torch.tensor} -- a 3D or 4D object containing the 218 | heatmaps. 219 | 220 | Keyword Arguments: 221 | pairs {list of integers} -- [order of the flipped points] (default: {None}) 222 | """ 223 | if pairs is None: 224 | pairs = [16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 225 | 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 27, 28, 29, 30, 35, 226 | 34, 33, 32, 31, 45, 44, 43, 42, 47, 46, 39, 38, 37, 36, 41, 227 | 40, 54, 53, 52, 51, 50, 49, 48, 59, 58, 57, 56, 55, 64, 63, 228 | 62, 61, 60, 67, 66, 65] 229 | if parts.ndimension() == 3: 230 | parts = parts[pairs, ...] 231 | else: 232 | parts = parts[:, pairs, ...] 233 | 234 | return parts 235 | 236 | 237 | def flip(tensor, is_label=False): 238 | """Flip an image or a set of heatmaps left-right 239 | 240 | Arguments: 241 | tensor {numpy.array or torch.tensor} -- [the input image or heatmaps] 242 | 243 | Keyword Arguments: 244 | is_label {bool} -- [denote wherever the input is an image or a set of heatmaps ] (default: {False}) 245 | """ 246 | if not torch.is_tensor(tensor): 247 | tensor = torch.from_numpy(tensor) 248 | 249 | if is_label: 250 | tensor = shuffle_lr(tensor).flip(tensor.ndimension() - 1) 251 | else: 252 | tensor = tensor.flip(tensor.ndimension() - 1) 253 | 254 | return tensor 255 | 256 | # From pyzolib/paths.py (https://bitbucket.org/pyzo/pyzolib/src/tip/paths.py) 257 | 258 | 259 | def appdata_dir(appname=None, roaming=False): 260 | """ appdata_dir(appname=None, roaming=False) 261 | 262 | Get the path to the application directory, where applications are allowed 263 | to write user specific files (e.g. configurations). For non-user specific 264 | data, consider using common_appdata_dir(). 265 | If appname is given, a subdir is appended (and created if necessary). 266 | If roaming is True, will prefer a roaming directory (Windows Vista/7). 267 | """ 268 | 269 | # Define default user directory 270 | userDir = os.getenv('FACEALIGNMENT_USERDIR', None) 271 | if userDir is None: 272 | userDir = os.path.expanduser('~') 273 | if not os.path.isdir(userDir): # pragma: no cover 274 | userDir = '/var/tmp' # issue #54 275 | 276 | # Get system app data dir 277 | path = None 278 | if sys.platform.startswith('win'): 279 | path1, path2 = os.getenv('LOCALAPPDATA'), os.getenv('APPDATA') 280 | path = (path2 or path1) if roaming else (path1 or path2) 281 | elif sys.platform.startswith('darwin'): 282 | path = os.path.join(userDir, 'Library', 'Application Support') 283 | # On Linux and as fallback 284 | if not (path and os.path.isdir(path)): 285 | path = userDir 286 | 287 | # Maybe we should store things local to the executable (in case of a 288 | # portable distro or a frozen application that wants to be portable) 289 | prefix = sys.prefix 290 | if getattr(sys, 'frozen', None): 291 | prefix = os.path.abspath(os.path.dirname(sys.executable)) 292 | for reldir in ('settings', '../settings'): 293 | localpath = os.path.abspath(os.path.join(prefix, reldir)) 294 | if os.path.isdir(localpath): # pragma: no cover 295 | try: 296 | open(os.path.join(localpath, 'test.write'), 'wb').close() 297 | os.remove(os.path.join(localpath, 'test.write')) 298 | except IOError: 299 | pass # We cannot write in this directory 300 | else: 301 | path = localpath 302 | break 303 | 304 | # Get path specific for this app 305 | if appname: 306 | if path == userDir: 307 | appname = '.' + appname.lstrip('.') # Make it a hidden directory 308 | path = os.path.join(path, appname) 309 | if not os.path.isdir(path): # pragma: no cover 310 | os.mkdir(path) 311 | 312 | # Done 313 | return path 314 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/hparams.py: -------------------------------------------------------------------------------- 1 | from glob import glob 2 | import os 3 | 4 | def get_image_list(data_root, split): 5 | filelist = [] 6 | 7 | with open('filelists/{}.txt'.format(split)) as f: 8 | for line in f: 9 | line = line.strip() 10 | if ' ' in line: line = line.split()[0] 11 | filelist.append(os.path.join(data_root, line)) 12 | 13 | return filelist 14 | 15 | class HParams: 16 | def __init__(self, **kwargs): 17 | self.data = {} 18 | 19 | for key, value in kwargs.items(): 20 | self.data[key] = value 21 | 22 | def __getattr__(self, key): 23 | if key not in self.data: 24 | raise AttributeError("'HParams' object has no attribute %s" % key) 25 | return self.data[key] 26 | 27 | def set_hparam(self, key, value): 28 | self.data[key] = value 29 | 30 | 31 | # Default hyperparameters 32 | hparams = HParams( 33 | num_mels=80, # Number of mel-spectrogram channels and local conditioning dimensionality 34 | # network 35 | rescale=True, # Whether to rescale audio prior to preprocessing 36 | rescaling_max=0.9, # Rescaling value 37 | 38 | # Use LWS (https://github.com/Jonathan-LeRoux/lws) for STFT and phase reconstruction 39 | # It"s preferred to set True to use with https://github.com/r9y9/wavenet_vocoder 40 | # Does not work if n_ffit is not multiple of hop_size!! 41 | use_lws=False, 42 | 43 | n_fft=800, # Extra window size is filled with 0 paddings to match this parameter 44 | hop_size=200, # For 16000Hz, 200 = 12.5 ms (0.0125 * sample_rate) 45 | win_size=800, # For 16000Hz, 800 = 50 ms (If None, win_size = n_fft) (0.05 * sample_rate) 46 | sample_rate=16000, # 16000Hz (corresponding to librispeech) (sox --i ) 47 | 48 | frame_shift_ms=None, # Can replace hop_size parameter. (Recommended: 12.5) 49 | 50 | # Mel and Linear spectrograms normalization/scaling and clipping 51 | signal_normalization=True, 52 | # Whether to normalize mel spectrograms to some predefined range (following below parameters) 53 | allow_clipping_in_normalization=True, # Only relevant if mel_normalization = True 54 | symmetric_mels=True, 55 | # Whether to scale the data to be symmetric around 0. (Also multiplies the output range by 2, 56 | # faster and cleaner convergence) 57 | max_abs_value=4., 58 | # max absolute value of data. If symmetric, data will be [-max, max] else [0, max] (Must not 59 | # be too big to avoid gradient explosion, 60 | # not too small for fast convergence) 61 | # Contribution by @begeekmyfriend 62 | # Spectrogram Pre-Emphasis (Lfilter: Reduce spectrogram noise and helps model certitude 63 | # levels. Also allows for better G&L phase reconstruction) 64 | preemphasize=True, # whether to apply filter 65 | preemphasis=0.97, # filter coefficient. 66 | 67 | # Limits 68 | min_level_db=-100, 69 | ref_level_db=20, 70 | fmin=55, 71 | # Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To 72 | # test depending on dataset. Pitch info: male~[65, 260], female~[100, 525]) 73 | fmax=7600, # To be increased/reduced depending on data. 74 | 75 | ###################### Our training parameters ################################# 76 | img_size=96, 77 | fps=25, 78 | 79 | batch_size=8, 80 | initial_learning_rate=1e-4, 81 | nepochs=200000000000000000, ### ctrl + c, stop whenever eval loss is consistently greater than train loss for ~10 epochs 82 | num_workers=16, 83 | checkpoint_interval=10000, 84 | #eval_interval= 10 85 | save_img_interval = 10, 86 | save_optimizer_state=True, 87 | 88 | syncnet_wt=0.00, # is initially zero, will be set automatically to 0.03 later. Leads to faster convergence. 89 | syncnet_batch_size=64, 90 | syncnet_lr=1e-4, 91 | syncnet_eval_interval=10000, 92 | syncnet_checkpoint_interval=10000, 93 | 94 | disc_wt=0.07, 95 | disc_initial_learning_rate= 1e-5, #1e-4, 96 | 97 | LC_wt = 0.01 98 | ) 99 | 100 | 101 | def hparams_debug_string(): 102 | values = hparams.values() 103 | hp = [" %s: %s" % (name, values[name]) for name in sorted(values) if name != "sentences"] 104 | return "Hyperparameters:\n" + "\n".join(hp) 105 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/rescale_image.py: -------------------------------------------------------------------------------- 1 | """ 2 | Crop upper boddy in every video frame, square bounding box is averaged among all frames and fixed. 3 | """ 4 | 5 | import os 6 | import cv2 7 | import argparse 8 | from torch import full 9 | from tqdm import tqdm 10 | import numpy as np 11 | from .util import load_coef, create_dir, get_file_list 12 | 13 | # Resacle overlay and render images. Paste it on full image. 14 | def rescale_and_paste(crop_region, full_image, target_image): 15 | top, bottom, left, right = crop_region 16 | height = bottom - top 17 | width = right - left 18 | 19 | pasted_image = full_image.copy() 20 | rescaled_target = cv2.resize(target_image, (width, height), interpolation=cv2.INTER_AREA) 21 | pasted_image[top:bottom, left:right] = rescaled_target 22 | 23 | return pasted_image 24 | 25 | 26 | if __name__ == '__main__': 27 | parser = argparse.ArgumentParser(description='Process some integers.') 28 | parser.add_argument('--data_dir', type=str, default=None) 29 | args = parser.parse_args() 30 | 31 | crop_region_list = load_coef(os.path.join(args.data_dir, 'crop_region')) 32 | full_image_list = get_file_list(os.path.join(args.data_dir, 'full')) 33 | overlay_image_list = get_file_list(os.path.join(args.data_dir, 'overlay')) 34 | render_image_list = get_file_list(os.path.join(args.data_dir, 'render')) 35 | 36 | create_dir(os.path.join(args.data_dir, 'rescaled_overlay')) 37 | create_dir(os.path.join(args.data_dir, 'rescaled_render')) 38 | 39 | for i in tqdm(range(len(full_image_list))): 40 | full_image = cv2.imread(full_image_list[i]) 41 | overlay_image = cv2.imread(overlay_image_list[i]) 42 | render_image = cv2.imread(render_image_list[i]) 43 | crop_region = crop_region_list[i] 44 | 45 | H, W, _ = full_image.shape 46 | empty_image = np.zeros((H, W, 3), np.uint8) 47 | 48 | pasted_overlay = rescale_and_paste(crop_region, full_image, overlay_image) 49 | pasted_render = rescale_and_paste(crop_region, empty_image, render_image) 50 | 51 | cv2.imwrite(os.path.join(args.data_dir, 'rescaled_overlay', os.path.basename(full_image_list[i])), pasted_overlay) 52 | cv2.imwrite(os.path.join(args.data_dir, 'rescaled_render', os.path.basename(full_image_list[i])), pasted_render) -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/util-checkpoint.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from tqdm import tqdm 4 | import cv2 5 | from skimage import io 6 | import torch 7 | import face_alignment 8 | 9 | 10 | def create_dir(dir_name): 11 | if not os.path.exists(dir_name): 12 | os.makedirs(dir_name) 13 | 14 | 15 | def get_file_list(data_dir, suffix=""): 16 | file_list = [] 17 | 18 | for dirpath, _, filenames in os.walk(data_dir): 19 | for filename in filenames: 20 | if suffix in filename: 21 | file_list.append(os.path.join(dirpath, filename)) 22 | 23 | file_list = sorted(file_list) 24 | 25 | return file_list 26 | 27 | 28 | def load_state_dict(model, fname): 29 | """ 30 | Set parameters converted from Caffe models authors of VGGFace2 provide. 31 | See https://www.robots.ox.ac.uk/~vgg/data/vgg_face2/. 32 | 33 | Arguments: 34 | model: model 35 | fname: file name of parameters converted from a Caffe model, assuming the file format is Pickle. 36 | """ 37 | with open(fname, 'rb') as f: 38 | weights = pickle.load(f, encoding='latin1') 39 | 40 | own_state = model.state_dict() 41 | 42 | for name, param in weights.items(): 43 | if name in own_state: 44 | try: 45 | own_state[name].copy_(torch.from_numpy(param)) 46 | except Exception: 47 | raise RuntimeError('While copying the parameter named {}, whose dimensions in the model are {} and whose ' 48 | 'dimensions in the checkpoint are {}.'.format(name, own_state[name].size(), param.size)) 49 | else: 50 | # raise KeyError('unexpected key "{}" in state_dict'.format(name)) 51 | pass 52 | 53 | 54 | def load_coef(data_dir, load_num=float('inf')): 55 | coef_list = [] 56 | count = 0 57 | 58 | for filename in tqdm(get_file_list(data_dir)): 59 | coef = torch.load(filename) 60 | coef_list.append(coef) 61 | count += 1 62 | if count >= load_num: 63 | break 64 | 65 | return coef_list 66 | 67 | 68 | def landmark_detection(image_list, save_path): 69 | fa_3d = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, flip_input=False, device='cuda') 70 | 71 | landmark_dict = {} 72 | 73 | for i in tqdm(range(len(image_list))): 74 | image_name = image_list[i] 75 | image = io.imread(image_name) 76 | preds = fa_3d.get_landmarks(image) 77 | 78 | assert preds is not None 79 | 80 | landmark_dict[image_name] = preds[0][:, :2] 81 | 82 | with open(save_path, 'wb') as f: 83 | pickle.dump(landmark_dict, f) 84 | 85 | 86 | def plot_landmark(data_dir): 87 | create_dir(os.path.join(data_dir, 'landmark')) 88 | 89 | with open(os.path.join(data_dir, 'landmark.pkl'), 'rb') as f: 90 | landmark_dict = pickle.load(f) 91 | 92 | image_list = get_file_list(os.path.join(data_dir, 'crop')) 93 | 94 | for image_name in tqdm(image_list): 95 | image = cv2.imread(image_name) 96 | landmark = landmark_dict[image_name] 97 | 98 | for point in landmark: 99 | image = cv2.circle(image, (point[0], point[1]), radius=0, color=(255, 0, 0), thickness=-1) 100 | 101 | cv2.imwrite(os.path.join(data_dir, 'landmark', os.path.basename(image_name)), image) 102 | -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from tqdm import tqdm 4 | import cv2 5 | from skimage import io 6 | import torch 7 | import numpy as np 8 | import face_alignment 9 | from facenet_pytorch import InceptionResnetV1 10 | from PIL import Image 11 | from natsort import natsorted 12 | 13 | def create_dir(dir_name): 14 | if not os.path.exists(dir_name): 15 | os.makedirs(dir_name) 16 | 17 | 18 | def get_file_list(data_dir, suffix=""): 19 | file_list = [] 20 | 21 | for dirpath, _, filenames in os.walk(data_dir): 22 | for filename in filenames: 23 | if suffix in filename: 24 | file_list.append(os.path.join(dirpath, filename)) 25 | 26 | file_list = natsorted(file_list) 27 | 28 | return file_list 29 | 30 | 31 | def load_state_dict(model, fname): 32 | """ 33 | Set parameters converted from Caffe models authors of VGGFace2 provide. 34 | See https://www.robots.ox.ac.uk/~vgg/data/vgg_face2/. 35 | 36 | Arguments: 37 | model: model 38 | fname: file name of parameters converted from a Caffe model, assuming the file format is Pickle. 39 | """ 40 | with open(fname, 'rb') as f: 41 | weights = pickle.load(f, encoding='latin1') 42 | 43 | own_state = model.state_dict() 44 | 45 | for name, param in weights.items(): 46 | if name in own_state: 47 | try: 48 | own_state[name].copy_(torch.from_numpy(param)) 49 | except Exception: 50 | raise RuntimeError('While copying the parameter named {}, whose dimensions in the model are {} and whose ' 51 | 'dimensions in the checkpoint are {}.'.format(name, own_state[name].size(), param.size)) 52 | else: 53 | # raise KeyError('unexpected key "{}" in state_dict'.format(name)) 54 | pass 55 | 56 | 57 | def load_coef(data_dir, load_num=float('inf')): 58 | coef_list = [] 59 | count = 0 60 | 61 | for filename in tqdm(get_file_list(data_dir)): 62 | coef = torch.load(filename) 63 | coef_list.append(coef) 64 | count += 1 65 | if count >= load_num: 66 | break 67 | 68 | return coef_list 69 | 70 | 71 | def landmark_detection(image_list, save_path): 72 | fa_3d = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, flip_input=False, device='cuda') 73 | 74 | landmark_dict = {} 75 | 76 | for i in tqdm(range(len(image_list))): 77 | image_name = image_list[i] 78 | image = io.imread(image_name) 79 | preds = fa_3d.get_landmarks(image) 80 | 81 | assert preds is not None 82 | 83 | landmark_dict[image_name] = preds[0][:, :2] 84 | 85 | with open(save_path, 'wb') as f: 86 | pickle.dump(landmark_dict, f) 87 | 88 | 89 | def plot_landmark(data_dir): 90 | create_dir(os.path.join(data_dir, 'landmark')) 91 | 92 | with open(os.path.join(data_dir, 'landmark.pkl'), 'rb') as f: 93 | landmark_dict = pickle.load(f) 94 | 95 | image_list = get_file_list(os.path.join(data_dir, 'crop')) 96 | 97 | for image_name in tqdm(image_list): 98 | image = cv2.imread(image_name) 99 | landmark = landmark_dict[image_name] 100 | 101 | for point in landmark: 102 | image = cv2.circle(image, (point[0], point[1]), radius=0, color=(255, 0, 0), thickness=-1) 103 | 104 | cv2.imwrite(os.path.join(data_dir, 'landmark', os.path.basename(image_name)), image) 105 | 106 | 107 | def extract_face_emb(image_list, save_path, transforms_input): 108 | facenet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda') 109 | 110 | face_emb_dict = {} 111 | 112 | for i in tqdm(range(len(image_list))): 113 | image_name = image_list[i] 114 | image = Image.open(image_name).convert('RGB') 115 | 116 | input = transforms_input(image).to('cuda') 117 | input = input.reshape(1, 3, 224, 224) 118 | face_emb = facenet(input) 119 | 120 | face_emb_dict[image_name] = face_emb.squeeze().detach().to('cpu') 121 | 122 | with open(save_path, 'wb') as f: 123 | pickle.dump(face_emb_dict, f) 124 | 125 | 126 | def load_face_emb(data_dir): 127 | face_emb_dir = os.path.join(data_dir, 'face_emb.pkl') 128 | 129 | with open(face_emb_dir, 'rb') as f: 130 | face_emb_dict = pickle.load(f) 131 | face_emb_list = list(face_emb_dict.values()) 132 | return face_emb_list 133 | 134 | def get_max_crop_region(crop_region_list): 135 | top, bottom, left, right = np.inf, 0, np.inf, 0 136 | 137 | for t, b, l, r in crop_region_list: 138 | if top > t: 139 | top = t 140 | 141 | if bottom < b: 142 | bottom = b 143 | 144 | if left > l: 145 | left = l 146 | 147 | if right < r: 148 | right = r 149 | 150 | return top, bottom, left, right -------------------------------------------------------------------------------- /lipsync3d/audiodvp_utils/visualizer.py: -------------------------------------------------------------------------------- 1 | import torchvision 2 | from torch.utils.tensorboard import SummaryWriter 3 | 4 | 5 | class Visualizer: 6 | def __init__(self, opt): 7 | 8 | self.opt = opt # cache the option 9 | self.port = opt.display_port 10 | self.writer = SummaryWriter() 11 | 12 | def display_current_results(self, visuals, steps): 13 | for label, image in visuals.items(): 14 | self.writer.add_image(label, torchvision.utils.make_grid(image), steps) 15 | 16 | def plot_current_losses(self, total_iters, losses): 17 | """display the current losses on tensorboard display: dictionary of error labels and values 18 | Parameters: 19 | total_iters(int) -- total_iters 20 | losses (OrderedDict) -- training losses stored in the format of (name, float) pairs 21 | """ 22 | for label, loss in losses.items(): 23 | self.writer.add_scalar(label, loss, total_iters) 24 | 25 | def plot_current_texture(self, total_iters, predicted_mouths, gt_mouths): 26 | self.writer.add_image('Training/Predicted Texture', torchvision.utils.make_grid(predicted_mouths), total_iters) 27 | self.writer.add_image('Training/Ground Truth Texture', torchvision.utils.make_grid(gt_mouths), total_iters) 28 | 29 | def print_current_losses(self, epoch, iters, losses, t_comp, t_data): 30 | """print current losses on console; also save the losses to the disk 31 | Parameters: 32 | epoch (int) -- current epoch 33 | iters (int) -- current training iteration during this epoch (reset to 0 at the end of every epoch) 34 | losses (OrderedDict) -- training losses stored in the format of (name, float) pairs 35 | t_comp (float) -- computational time per data point (normalized by batch_size) 36 | t_data (float) -- data loading time per data point (not normalized by batch_size) 37 | """ 38 | message = '(epoch: %d, iters: %d, data: %.3f, comp: %.3f) ' % (epoch, iters, t_data, t_comp) 39 | for k, v in losses.items(): 40 | message += '%s: %.9f ' % (k, v) 41 | 42 | print(message) # print the message 43 | -------------------------------------------------------------------------------- /lipsync3d/combine-audioDVP.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import cv2\n", 11 | "import numpy as np\n", 12 | "from natsort import natsorted\n", 13 | "import torch\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "from PIL import Image\n", 16 | "from multiprocessing import Pool\n", 17 | "from sklearn.preprocessing import MinMaxScaler\n", 18 | "from skimage import exposure\n", 19 | "import math\n", 20 | "import shutil" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 6, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "def toTexture(image, gt_image, count):\n", 30 | " \n", 31 | " background = cv2.imread(gt_image)\n", 32 | " groundTruth = background.copy()\n", 33 | " texture_img = cv2.imread(image)\n", 34 | " \n", 35 | " texture_img_gray = cv2.cvtColor(texture_img, cv2.COLOR_BGR2GRAY)\n", 36 | " th, texture_th = cv2.threshold(texture_img_gray, 30, 50, cv2.THRESH_BINARY_INV)\n", 37 | " \n", 38 | " texture_th_floodfill = texture_th.copy()\n", 39 | " \n", 40 | " mask = np.zeros((image_height + 2, image_width + 2), np.uint8)\n", 41 | " \n", 42 | " cv2.floodFill(texture_th_floodfill, mask, (0,0), 255)\n", 43 | " \n", 44 | " texture_th_floodfill_inv = cv2.bitwise_not(texture_th_floodfill)\n", 45 | " \n", 46 | " im_out = texture_th | texture_th_floodfill_inv\n", 47 | " \n", 48 | " index_texture = np.array(list(zip(*np.where(im_out == 255))))\n", 49 | " \n", 50 | " background[index_texture[:,0], index_texture[:,1]] = texture_img[index_texture[:,0], index_texture[:,1]]\n", 51 | " \n", 52 | " texture_result = cv2.seamlessClone(background, groundTruth, im_out, (image_height//2, image_width // 2), cv2.NORMAL_CLONE)\n", 53 | " \n", 54 | " cv2.imwrite(os.path.join(src_directory, 'merged_texture', '{}.jpg'.format(count)), texture_result)\n", 55 | " " 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 11, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "image_height = 256\n", 65 | "image_width = 256\n", 66 | "texture_height = 280\n", 67 | "texture_width = 280\n", 68 | "src_directory = '../audioDVP_files/'" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 13, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "texture_image_files = natsorted([os.path.join(src_directory,'predicted_texture', x) for x in os.listdir(os.path.join(src_directory, 'predicted_texture'))])\n", 78 | "gt_frames = natsorted([os.path.join(src_directory, 'crop', x) for x in os.listdir(os.path.join(src_directory, 'crop'))])\n", 79 | "# reference_mouth = Image.fromarray(reference_mouth)\n", 80 | "count = [i for i in range(len(texture_image_files))]" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 17, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "if os.path.exists(os.path.join(src_directory, 'merged_texture')):\n", 90 | " shutil.rmtree(os.path.join(src_directory, 'merged_texture'))\n", 91 | "os.makedirs(os.path.join(src_directory, 'merged_texture'))\n", 92 | "pool = Pool(processes=40)\n", 93 | "pool.starmap(toTexture, zip(texture_image_files, gt_frames, count))\n", 94 | "pool.terminate()\n", 95 | "pool.join()" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 24, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "0" 107 | ] 108 | }, 109 | "execution_count": 24, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "# os.system('ffmpeg -y -i {}/%d.jpg -i {} -c:v libopenh264 -r 25 {}'.format(os.path.join(src_directory, 'predicted_face'), os.path.join(src_directory, 'audio','audio.wav'), os.path.join(src_directory, 'results', 'predicted_face.mp4')))\n", 116 | "os.system('ffmpeg -y -i {}/%d.jpg -c:v libx264 -crf 1 -r 25 {}'.format(os.path.join(src_directory, 'merged_texture'), os.path.join(src_directory, 'results', 'final_outcome.mp4')))\n", 117 | "os.system('ffmpeg -y -i {} -i {} -c:v copy -c:a copy {}'.format(os.path.join(src_directory, 'results', 'final_outcome.mp4'), os.path.join(src_directory,'reenact_audio2bfm_1e_2.mp4'), os.path.join(src_directory, 'results', 'with_audio.mp4')))" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 66, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "data": { 127 | "text/plain": [ 128 | "0" 129 | ] 130 | }, 131 | "execution_count": 66, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": [ 137 | "os.system('ffmpeg -y -i {}/%d.jpg -i {} -c:v libx264 -crf 1 -r 25 {}'.format(os.path.join(src_directory, 'merged_texture'), os.path.join(src_directory, 'audio','audio.wav'), os.path.join(src_directory, 'results', 'final_texture.mp4')))" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 47, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "0" 149 | ] 150 | }, 151 | "execution_count": 47, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "out = cv2.VideoWriter(os.path.join(src_directory, 'temp_mesh.mp4'), cv2.VideoWriter_fourcc(*'mp4v'), 25, (image_width, image_height))\n", 158 | "\n", 159 | "imageFiles = natsorted([os.path.join(src_directory, 'reenact_mesh_image', x) for x in os.listdir(os.path.join(src_directory, 'reenact_mesh_image'))])\n", 160 | "\n", 161 | "for im in imageFiles:\n", 162 | " image = cv2.imread(im)\n", 163 | " out.write(image)\n", 164 | "\n", 165 | "out.release()\n", 166 | "os.system('ffmpeg -y -i {} -i {} -c:v copy -c:a aac {}'.format(os.path.join(src_directory, 'temp_mesh.mp4')\n", 167 | ", os.path.join(src_directory, 'audio','audio.wav'), os.path.join(src_directory, 'results', 'predicted_mesh.mp4')\n", 168 | "))" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 17, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "0" 180 | ] 181 | }, 182 | "execution_count": 17, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "os.system('ffmpeg -y -i {}/results/predicted_face.mp4 -i {}/results/predicted_mesh.mp4 -filter_complex hstack -c:v libopenh264 {}/results/face_mesh_comparison.mp4'.format(src_directory,src_directory,src_directory))" 189 | ] 190 | } 191 | ], 192 | "metadata": { 193 | "interpreter": { 194 | "hash": "9968410507dc6acd82900f38c76c24d3f252bf51bc1b0c3680d51a23b0e86376" 195 | }, 196 | "kernelspec": { 197 | "display_name": "Python 3", 198 | "language": "python", 199 | "name": "python3" 200 | }, 201 | "language_info": { 202 | "codemirror_mode": { 203 | "name": "ipython", 204 | "version": 3 205 | }, 206 | "file_extension": ".py", 207 | "mimetype": "text/x-python", 208 | "name": "python", 209 | "nbconvert_exporter": "python", 210 | "pygments_lexer": "ipython3", 211 | "version": "3.8.11" 212 | } 213 | }, 214 | "nbformat": 4, 215 | "nbformat_minor": 2 216 | } 217 | -------------------------------------------------------------------------------- /lipsync3d/dataset.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from hparams import hparams 4 | 5 | sys.path.append('/home/server01/jyeongho_workspace/3d_face_gcns/') 6 | 7 | import os 8 | import torch 9 | import numpy as np 10 | import librosa 11 | from utils import landmarkdict_to_normalized_mesh_tensor, landmarkdict_to_mesh_tensor 12 | from audiodvp_utils import util 13 | from torch.utils.data import Dataset 14 | from natsort import natsorted 15 | import torchvision.transforms as transforms 16 | import cv2 17 | from PIL import Image 18 | import audio as audioLibrary 19 | import random 20 | 21 | class Lipsync3DMeshDataset(Dataset): 22 | def __init__(self, opt): 23 | super().__init__() 24 | self.opt = opt 25 | self.src_dir = opt.src_dir 26 | self.tgt_dir = opt.tgt_dir 27 | 28 | self.stabilized_mesh = [os.path.join(self.tgt_dir, 'stabilized_norm_mesh', x) for x in natsorted(os.listdir(os.path.join(self.tgt_dir, 'stabilized_norm_mesh')))] 29 | 30 | 31 | stft_path = os.path.join(self.src_dir, 'audio/audio_stft.pt') 32 | if not os.path.exists(stft_path): 33 | audio = librosa.load(os.path.join(self.src_dir, 'audio/audio.wav'),16000)[0] 34 | audio_stft = librosa.stft(audio, n_fft=510, hop_length=160, win_length=480) 35 | self.audio_stft = torch.from_numpy(np.stack((audio_stft.real, audio_stft.imag))) 36 | torch.save(self.audio_stft, os.path.join(self.src_dir, 'audio/audio_stft.pt')) 37 | else: 38 | self.audio_stft = torch.load(os.path.join(self.src_dir, 'audio/audio_stft.pt')) 39 | 40 | self.mesh_dict_list = util.load_coef(os.path.join(self.tgt_dir, 'mesh_dict')) 41 | self.filenames = util.get_file_list(os.path.join(self.tgt_dir, 'mesh_dict')) 42 | reference_mesh_dict = torch.load(os.path.join(self.tgt_dir, 'reference_mesh.pt')) 43 | 44 | self.reference_mesh = landmarkdict_to_normalized_mesh_tensor(reference_mesh_dict) 45 | 46 | if opt.isTrain: 47 | minlen = min(len(self.mesh_dict_list), self.audio_stft.shape[2] // 4) 48 | train_idx = int(minlen * self.opt.train_rate) 49 | self.mesh_dict_list = self.mesh_dict_list[:train_idx] 50 | self.filenames = self.filenames[:train_idx] 51 | 52 | print('Training set size: ', len(self.filenames)) 53 | 54 | def __len__(self): 55 | return min(self.audio_stft.shape[2] // 4, len(self.filenames)) 56 | 57 | def __getitem__(self, index): 58 | 59 | audio_idx = index * 4 60 | 61 | audio_feature_list = [] 62 | for i in range(audio_idx - 12, audio_idx + 12): 63 | if i < 0: 64 | audio_feature_list.append(self.audio_stft[:, :, 0]) 65 | elif i >= self.audio_stft.shape[2]: 66 | audio_feature_list.append(self.audio_stft[:, :, -1]) 67 | else: 68 | audio_feature_list.append(self.audio_stft[:, :, i]) 69 | 70 | audio_feature = torch.stack(audio_feature_list, 2) 71 | 72 | filename = os.path.basename(self.filenames[index]) 73 | 74 | if not self.opt.isTrain: 75 | landmark_dict = self.mesh_dict_list[index] 76 | normalized_mesh = landmarkdict_to_normalized_mesh_tensor(landmark_dict) 77 | # stabilized_mesh = torch.tensor(torch.load(self.stabilized_mesh[index])) 78 | 79 | R = torch.from_numpy(landmark_dict['R']).float() 80 | t = torch.from_numpy(landmark_dict['t']).float() 81 | c = float(landmark_dict['c']) 82 | 83 | return {'audio_feature': audio_feature, 'filename': filename, 84 | 'reference_mesh': self.reference_mesh, 'normalized_mesh': normalized_mesh, 85 | 'R': R, 't': t, 'c': c} 86 | 87 | else: 88 | landmark_dict = self.mesh_dict_list[index] 89 | normalized_mesh = landmarkdict_to_normalized_mesh_tensor(landmark_dict) 90 | # stabilized_mesh = torch.tensor(torch.load(self.stabilized_mesh[index])) 91 | return { 92 | 'audio_feature': audio_feature, 'filename': filename, 93 | 'reference_mesh' : self.reference_mesh, 'normalized_mesh': normalized_mesh 94 | } -------------------------------------------------------------------------------- /lipsync3d/demo.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | 3 | # set data path 4 | # target_dir : directory for training data 5 | # source_dir : directory for inference data, put test audio in source_dir/audio directory 6 | # video_dir : path for training video 7 | 8 | target_dir="data/kkj/kkj04_lipsync3d" 9 | source_dir="data/kkj/kkj04_lipsync3d" 10 | video_dir="data/kkj/kkj04_lipsync3d/KKJ_slow_04_stand.mp4" 11 | 12 | 13 | # set video clip duration 14 | start_time="00:00:00" 15 | end_time="240" 16 | 17 | # mkdir -p $target_dir/full 18 | # mkdir -p $target_dir/crop 19 | # mkdir -p $target_dir/audio 20 | # mkdir -p $target_dir/results 21 | # mkdir -p $source_dir/audio 22 | # mkdir -p $source_dir/results 23 | 24 | # 1. Take all frames and audio of training data 25 | # warning! the number of extracted frames should be dividable by 5. 26 | # If the number of frames of training video is not dividable by 5, delete some frames manually to make the number of frames dividable by 5 27 | 28 | # ffmpeg -hide_banner -y -i $video_dir -r 25 $target_dir/full/%05d.png 29 | # ffmpeg -hide_banner -y -i $video_dir -ar 16000 $target_dir/audio/audio.wav 30 | 31 | # # crop and resize video frames 32 | # python audiodvp_utils/crop_portrait.py \ 33 | # --data_dir $target_dir \ 34 | # --crop_level 1.5 \ 35 | # --vertical_adjust 0.2 36 | 37 | # pose normalization 38 | # python lipsync3d/pose_normalization.py --data_dir $target_dir --gpu_ids 0 39 | 40 | # train lipsync3d net 41 | # python lipsync3d/train.py --src_dir $target_dir --tgt_dir $target_dir 42 | 43 | # test lipsync3d net 44 | python lipsync3d/test.py \ 45 | --batch_size 1 \ 46 | --serial_batches False \ 47 | --isTrain False \ 48 | --gpu_ids 0 \ 49 | --src_dir $source_dir \ 50 | --tgt_dir $target_dir 51 | 52 | # ffmpeg -y -loglevel warning \ 53 | # -thread_queue_size 8192 -i $target_dir/mesh_image/%05d.png \ 54 | # -thread_queue_size 8192 -i $source_dir/reenact_mesh_image/%05d.png \ 55 | # -i $source_dir/audio/audio.wav \ 56 | # -filter_complex hstack=inputs=2 -shortest -vcodec libx264 -preset slower -profile:v high -crf 18 -pix_fmt yuv420p $source_dir/results/tgt_kkj04_src_kkj00_mesh_reenact.mp4 57 | -------------------------------------------------------------------------------- /lipsync3d/hparams.py: -------------------------------------------------------------------------------- 1 | from glob import glob 2 | import os 3 | 4 | def get_image_list(data_root, split): 5 | filelist = [] 6 | 7 | with open('filelists/{}.txt'.format(split)) as f: 8 | for line in f: 9 | line = line.strip() 10 | if ' ' in line: line = line.split()[0] 11 | filelist.append(os.path.join(data_root, line)) 12 | 13 | return filelist 14 | 15 | class HParams: 16 | def __init__(self, **kwargs): 17 | self.data = {} 18 | 19 | for key, value in kwargs.items(): 20 | self.data[key] = value 21 | 22 | def __getattr__(self, key): 23 | if key not in self.data: 24 | raise AttributeError("'HParams' object has no attribute %s" % key) 25 | return self.data[key] 26 | 27 | def set_hparam(self, key, value): 28 | self.data[key] = value 29 | 30 | 31 | # Default hyperparameters 32 | hparams = HParams( 33 | num_mels=80, # Number of mel-spectrogram channels and local conditioning dimensionality 34 | # network 35 | rescale=True, # Whether to rescale audio prior to preprocessing 36 | rescaling_max=0.9, # Rescaling value 37 | 38 | # Use LWS (https://github.com/Jonathan-LeRoux/lws) for STFT and phase reconstruction 39 | # It"s preferred to set True to use with https://github.com/r9y9/wavenet_vocoder 40 | # Does not work if n_ffit is not multiple of hop_size!! 41 | use_lws=False, 42 | 43 | n_fft=800, # Extra window size is filled with 0 paddings to match this parameter 44 | hop_size=200, # For 16000Hz, 200 = 12.5 ms (0.0125 * sample_rate) 45 | win_size=800, # For 16000Hz, 800 = 50 ms (If None, win_size = n_fft) (0.05 * sample_rate) 46 | sample_rate=16000, # 16000Hz (corresponding to librispeech) (sox --i ) 47 | 48 | frame_shift_ms=None, # Can replace hop_size parameter. (Recommended: 12.5) 49 | 50 | # Mel and Linear spectrograms normalization/scaling and clipping 51 | signal_normalization=True, 52 | # Whether to normalize mel spectrograms to some predefined range (following below parameters) 53 | allow_clipping_in_normalization=True, # Only relevant if mel_normalization = True 54 | symmetric_mels=True, 55 | # Whether to scale the data to be symmetric around 0. (Also multiplies the output range by 2, 56 | # faster and cleaner convergence) 57 | max_abs_value=4., 58 | # max absolute value of data. If symmetric, data will be [-max, max] else [0, max] (Must not 59 | # be too big to avoid gradient explosion, 60 | # not too small for fast convergence) 61 | # Contribution by @begeekmyfriend 62 | # Spectrogram Pre-Emphasis (Lfilter: Reduce spectrogram noise and helps model certitude 63 | # levels. Also allows for better G&L phase reconstruction) 64 | preemphasize=True, # whether to apply filter 65 | preemphasis=0.97, # filter coefficient. 66 | 67 | # Limits 68 | min_level_db=-100, 69 | ref_level_db=20, 70 | fmin=55, 71 | # Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To 72 | # test depending on dataset. Pitch info: male~[65, 260], female~[100, 525]) 73 | fmax=7600, # To be increased/reduced depending on data. 74 | 75 | ###################### Our training parameters ################################# 76 | img_size=96, 77 | fps=25, 78 | 79 | batch_size=24, 80 | initial_learning_rate=5e-3, 81 | nepochs=200000000000000000, ### ctrl + c, stop whenever eval loss is consistently greater than train loss for ~10 epochs 82 | num_workers=40, 83 | checkpoint_interval=10000, 84 | eval_interval=10000, 85 | save_optimizer_state=True, 86 | 87 | syncnet_wt=0.01, # is initially zero, will be set automatically to 0.03 later. Leads to faster convergence. 88 | syncnet_batch_size=64, 89 | syncnet_lr=1e-2, 90 | syncnet_eval_interval=4000, 91 | syncnet_checkpoint_interval=4000, 92 | 93 | disc_wt=0.05, 94 | disc_initial_learning_rate=1e-4, 95 | ) 96 | 97 | 98 | def hparams_debug_string(): 99 | values = hparams.values() 100 | hp = [" %s: %s" % (name, values[name]) for name in sorted(values) if name != "sentences"] 101 | return "Hyperparameters:\n" + "\n".join(hp) 102 | -------------------------------------------------------------------------------- /lipsync3d/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class L2Loss(nn.Module): 5 | def __init__(self): 6 | super(L2Loss, self).__init__() 7 | 8 | def forward(self, input, target): 9 | l2_loss = (target - input) ** 2 10 | l2_loss = torch.mean(l2_loss) 11 | 12 | return l2_loss 13 | 14 | class L1Loss(nn.Module): 15 | def __init__(self): 16 | super(L1Loss, self).__init__() 17 | 18 | def forward(self, input, target): 19 | 20 | return (torch.abs(input - target)).mean() -------------------------------------------------------------------------------- /lipsync3d/model.py: -------------------------------------------------------------------------------- 1 | from cv2 import getOptimalNewCameraMatrix 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import functional as F 5 | 6 | class View(nn.Module): 7 | def __init__(self, shape): 8 | super(View, self).__init__() 9 | self.shape = shape 10 | 11 | def forward(self, x): 12 | return x.view(*self.shape) 13 | 14 | class Lipsync3DMesh(nn.Module): 15 | def __init__(self): 16 | super().__init__() 17 | 18 | #TODO 19 | self.AudioEncoder = nn.Sequential( 20 | # Define Network Architecture (Hint: Architecture mentioned in the paper, Change in latent space dimensions are as follows) 21 | # 2 x 256 x 24 -> 72 x 128 x 24 22 | # 72 x 128 x 24 -> 108 x 64 x 24 23 | # 108 x 64 x 24 -> 162 x 32 x 24 24 | # 162 x 32 x 24 -> 243 x 16 x 24 25 | # 243 x 16 x 24 -> 256 x 8 x 24 26 | # 256 x 8 x 24 -> 256 x 4 x 24 27 | # 256 x 4 x 24 -> 128 x 4 x 13 28 | # 128 x 4 x 13 -> 64 x 4 x 8 29 | # 64 x 4 x 8 -> 32 x 4 x 5 30 | # 32 x 4 x 5 -> 16 x 4 x 4 31 | # 16 x 4 x 4 -> 8 x 4 x 3 32 | # 8 x 4 x 3 -> 4 x 4 x 2 33 | View([-1, 32]), 34 | ) 35 | 36 | self.GeometryDecoder = nn.Sequential( 37 | nn.Linear(32, 150), 38 | nn.Dropout(0.5), 39 | nn.Linear(150, 1434) 40 | ) 41 | 42 | def forward(self, spec, latentMode=False): 43 | # spec : B x 2 x 256 x 24 44 | # texture : B x 3 x 128 x 128 45 | 46 | latent = self.AudioEncoder(spec) 47 | if latentMode: 48 | return latent 49 | geometry_diff = self.GeometryDecoder(latent) 50 | 51 | return geometry_diff -------------------------------------------------------------------------------- /lipsync3d/options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | 5 | class Options: 6 | def __init__(self): 7 | self.parser = argparse.ArgumentParser() 8 | self.parser.add_argument('--data_dir', type=str, default=None) 9 | self.parser.add_argument('--src_dir', type=str, default=None) 10 | self.parser.add_argument('--tgt_dir', type=str, default=None) 11 | 12 | self.parser.add_argument('--train_rate', type=float, default=0.8) 13 | self.parser.add_argument('--num_epoch', type=int, default=250) 14 | self.parser.add_argument('--batch_size', type=int, default=128) 15 | self.parser.add_argument('--serial_batches', type=self.str2bool, default=False) 16 | self.parser.add_argument('--num_workers', type=int, default=4) 17 | self.parser.add_argument('--isTrain', type=self.str2bool, default=True) 18 | self.parser.add_argument('--lr', type=float, default=2e-5, help='initial learning rate for adam') 19 | self.parser.add_argument('--lambda_geo', type=float, default=0.3) 20 | self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') 21 | 22 | self.parser.add_argument('--display_port', type=int, default=11111, help='tensorboard port of the web display') 23 | self.parser.add_argument('--display_freq', type=int, default=2000, help='frequency of showing training results on screen') 24 | self.parser.add_argument('--print_freq', type=int, default=200, help='frequency of showing training results on console') 25 | self.parser.add_argument('--freeze_mesh', type=bool, default=False, help='Choose if you want to freeze mesh training pipeline or not') 26 | self.parser.add_argument('--load_model', type=bool, default=False, help='Load model from the checkpoint') 27 | self.parser.add_argument('--model_name', type=str, default=None, help='Name of the checkpoint file') 28 | self.parser.add_argument('--mesh_model_path', type=str, default='', help='Path of the mesh model checkpoint file') 29 | self.parser.add_argument('--checkpoint_interval', type=int, default=10, help='Checkpoint interval') 30 | 31 | 32 | def parse_args(self): 33 | self.args = self.parser.parse_args() 34 | self.args.device = torch.device('cuda:{}'.format(self.args.gpu_ids[0])) if self.args.gpu_ids else torch.device('cpu') 35 | return self.args 36 | 37 | def str2bool(self, v): 38 | if isinstance(v, bool): 39 | return v 40 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 41 | return True 42 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 43 | return False 44 | else: 45 | raise argparse.ArgumentTypeError('Boolean value expected.') 46 | -------------------------------------------------------------------------------- /lipsync3d/pose_normalization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Crop upper boddy in every video frame, square bounding box is averaged among all frames and fixed. 3 | """ 4 | import sys 5 | import os 6 | import cv2 7 | import argparse 8 | import math 9 | from tqdm import tqdm 10 | import torch 11 | import utils 12 | from utils import landmark_to_dict 13 | import numpy as np 14 | import cv2 15 | import mediapipe as mp 16 | import matplotlib.pyplot as plt 17 | from audiodvp_utils import util 18 | import mediapipe.python.solutions.face_mesh as mp_face_mesh 19 | import mediapipe.python.solutions.drawing_utils as mp_drawing 20 | import mediapipe.python.solutions.drawing_styles as mp_drawing_styles 21 | from multiprocessing import Pool 22 | 23 | 24 | def get_reference_dict(data_dir): 25 | image = cv2.imread(os.path.join(data_dir, 'reference_frame.png')) 26 | image_rows, image_cols, _ = image.shape 27 | 28 | with mp_face_mesh.FaceMesh( 29 | static_image_mode=True, 30 | max_num_faces=1, 31 | refine_landmarks=True, 32 | min_detection_confidence=0.5) as face_mesh: 33 | 34 | results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) 35 | reference_dict = landmark_to_dict(results.multi_face_landmarks[0].landmark) 36 | reference_dict = normalized_to_pixel_coordinates(reference_dict, image_cols, image_rows) 37 | return reference_dict 38 | 39 | def draw_landmark(results, image, save_path): 40 | drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1) 41 | 42 | mp_drawing.draw_landmarks( 43 | image=image, 44 | landmark_list=results.multi_face_landmarks[0], 45 | connections=mp_face_mesh.FACEMESH_TESSELATION, 46 | landmark_drawing_spec=None, 47 | connection_drawing_spec=mp_drawing_styles 48 | .get_default_face_mesh_tesselation_style()) 49 | 50 | cv2.imwrite(save_path, image) 51 | 52 | 53 | def normalized_to_pixel_coordinates(landmark_dict, image_width, image_height): 54 | def is_valid_normalized_value(value): 55 | return (value > 0 or math.isclose(0, value)) and (value < 1 or math.isclose(1, value)) 56 | 57 | landmark_pixel_coord_dict = {} 58 | 59 | for idx, coord in landmark_dict.items(): 60 | if (idx == 'R') or (idx == 't') or (idx == 'c'): 61 | continue 62 | 63 | if not (is_valid_normalized_value(coord[0]) and 64 | is_valid_normalized_value(coord[1])): 65 | # TODO: Draw coordinates even if it's outside of the image bounds. 66 | return None 67 | x_px = coord[0] * image_width 68 | y_px = coord[1] * image_height 69 | z_px = coord[2] * image_width 70 | landmark_pixel_coord_dict[idx] = [x_px, y_px, z_px] 71 | return landmark_pixel_coord_dict 72 | 73 | 74 | def draw_pose_normalized_mesh(target_dict, image, save_path): 75 | connections = mp_face_mesh.FACEMESH_TESSELATION 76 | drawing_spec = mp_drawing.DrawingSpec(color= mp_drawing.BLACK_COLOR, thickness=1, circle_radius=1) 77 | 78 | image_rows, image_cols, _ = image.shape 79 | R = target_dict['R'] 80 | t = target_dict['t'] 81 | c = target_dict['c'] 82 | 83 | idx_to_coordinates = {} 84 | for idx, coord in target_dict.items(): 85 | if (idx == 'R') or (idx == 't') or (idx == 'c'): 86 | continue 87 | tgt = np.array(coord).reshape(3, 1) 88 | norm_tgt = (c * np.matmul(R, tgt) + t).squeeze() 89 | x_px = min(math.floor(norm_tgt[0]), image_cols - 1) 90 | y_px = min(math.floor(norm_tgt[1]), image_rows - 1) 91 | landmark_px = (x_px, y_px) 92 | if landmark_px: 93 | idx_to_coordinates[idx] = landmark_px 94 | 95 | white_image = np.zeros([image_rows, image_cols, 3], dtype=np.uint8) 96 | white_image[:] = 255 97 | for connection in connections: 98 | start_idx = connection[0] 99 | end_idx = connection[1] 100 | 101 | if start_idx in idx_to_coordinates and end_idx in idx_to_coordinates: 102 | cv2.line(white_image, 103 | idx_to_coordinates[start_idx], 104 | idx_to_coordinates[end_idx], 105 | drawing_spec.color, 106 | drawing_spec.thickness 107 | ) 108 | cv2.imwrite(save_path, white_image) 109 | 110 | 111 | def draw_3d_mesh(target_dict, save_path, elevation=10, azimuth=10): 112 | connections = mp_face_mesh.FACEMESH_TESSELATION 113 | drawing_spec = mp_drawing.DrawingSpec(color= mp_drawing.BLACK_COLOR, thickness=1, circle_radius=1) 114 | 115 | plt.figure(figsize=(10, 10)) 116 | ax = plt.axes(projection='3d') 117 | ax.view_init(elev=elevation, azim=azimuth) 118 | plotted_landmarks = {} 119 | 120 | for idx, coord in target_dict.items(): 121 | if (idx == 'R') or (idx == 't') or (idx == 'c'): 122 | continue 123 | plotted_landmarks[idx] = (-coord[2], coord[0], -coord[1]) 124 | 125 | for connection in connections: 126 | start_idx = connection[0] 127 | end_idx = connection[1] 128 | 129 | if start_idx in plotted_landmarks and end_idx in plotted_landmarks: 130 | landmark_pair = [plotted_landmarks[start_idx], plotted_landmarks[end_idx]] 131 | ax.plot3D( 132 | xs=[landmark_pair[0][0], landmark_pair[1][0]], 133 | ys=[landmark_pair[0][1], landmark_pair[1][1]], 134 | zs=[landmark_pair[0][2], landmark_pair[1][2]], 135 | color=(0., 0., 1.), 136 | linewidth=1) 137 | plt.savefig(save_path) 138 | 139 | def multiProcess(im, data_dir, reference_dict): 140 | with mp_face_mesh.FaceMesh( 141 | max_num_faces=1, 142 | refine_landmarks=True, 143 | min_detection_confidence=0.5, 144 | min_tracking_confidence=0.5) as face_mesh: 145 | image = cv2.imread(im) 146 | annotated_image = image.copy() 147 | image_rows, image_cols, _ = image.shape 148 | results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) 149 | target_dict = landmark_to_dict(results.multi_face_landmarks[0].landmark) 150 | target_dict = normalized_to_pixel_coordinates(target_dict, image_cols, image_rows) 151 | R, t, c = utils.Umeyama_algorithm(reference_dict, target_dict) 152 | target_dict['R'] = R 153 | target_dict['t'] = t 154 | target_dict['c'] = c 155 | torch.save(target_dict, os.path.join(data_dir, 'mesh_dict', os.path.basename(im))[:-4]+'.pt') 156 | 157 | if args.draw_mesh: 158 | img_save_path = os.path.join(data_dir, 'mesh_image', os.path.basename(im)[:-4] + '.png') 159 | draw_landmark(results, annotated_image, img_save_path) 160 | 161 | if args.draw_norm_mesh: 162 | img_save_path = os.path.join(data_dir, 'mesh_norm_image', os.path.basename(im)[:-4] + '.png') 163 | draw_pose_normalized_mesh(target_dict, annotated_image, img_save_path) 164 | 165 | if args.draw_norm_3d_mesh: 166 | img_save_path = os.path.join(data_dir, 'mesh_norm_3d_image', os.path.basename(im)[:-4] + '.png') 167 | draw_3d_mesh(target_dict, img_save_path, elevation=10, azimuth=10) 168 | 169 | def pose_normalization(args): 170 | data_dir = args.data_dir 171 | image_list = util.get_file_list(os.path.join(data_dir, 'crop')) 172 | reference_dict = get_reference_dict(data_dir) 173 | torch.save(reference_dict, os.path.join(data_dir, 'reference_mesh.pt')) 174 | 175 | data_dirs = [] 176 | reference_dicts = [] 177 | 178 | for i in range(len(image_list)): 179 | data_dirs.append(data_dir) 180 | reference_dicts.append(reference_dict) 181 | 182 | pool = Pool(processes=40) 183 | pool.starmap(multiProcess, zip(image_list, data_dirs, reference_dicts)) 184 | 185 | # with mp_face_mesh.FaceMesh( 186 | # max_num_faces=1, 187 | # refine_landmarks=True, 188 | # min_detection_confidence=0.5, 189 | # min_tracking_confidence=0.5) as face_mesh: 190 | # for i in tqdm(range(len(image_list))): 191 | # image = cv2.imread(image_list[i]) 192 | # annotated_image = image.copy() 193 | # image_rows, image_cols, _ = image.shape 194 | # results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) 195 | # target_dict = landmark_to_dict(results.multi_face_landmarks[0].landmark) 196 | # target_dict = normalized_to_pixel_coordinates(target_dict, image_cols, image_rows) 197 | # R, t, c = utils.Umeyama_algorithm(reference_dict, target_dict) 198 | # target_dict['R'] = R 199 | # target_dict['t'] = t 200 | # target_dict['c'] = c 201 | # torch.save(target_dict, os.path.join(data_dir, 'mesh_dict', os.path.basename(image_list[i]))[:-4]+'.pt') 202 | 203 | # if args.draw_mesh: 204 | # img_save_path = os.path.join(data_dir, 'mesh_image', os.path.basename(image_list[i])[:-4] + '.png') 205 | # draw_landmark(results, annotated_image, img_save_path) 206 | 207 | # if args.draw_norm_mesh: 208 | # img_save_path = os.path.join(data_dir, 'mesh_norm_image', os.path.basename(image_list[i])[:-4] + '.png') 209 | # draw_pose_normalized_mesh(target_dict, annotated_image, img_save_path) 210 | 211 | # if args.draw_norm_3d_mesh: 212 | # img_save_path = os.path.join(data_dir, 'mesh_norm_3d_image', os.path.basename(image_list[i])[:-4] + '.png') 213 | # draw_3d_mesh(target_dict, img_save_path, elevation=10, azimuth=10) 214 | 215 | 216 | def create_dirs(opt): 217 | os.makedirs(os.path.join(args.data_dir, 'mesh_dict'), exist_ok=True) 218 | if opt.draw_mesh: 219 | os.makedirs(os.path.join(args.data_dir, 'mesh_image'), exist_ok=True) 220 | 221 | if opt.draw_norm_mesh: 222 | os.makedirs(os.path.join(args.data_dir, 'mesh_norm_image'), exist_ok=True) 223 | 224 | if opt.draw_norm_3d_mesh: 225 | os.makedirs(os.path.join(args.data_dir, 'mesh_norm_3d_image'), exist_ok=True) 226 | 227 | if __name__ == '__main__': 228 | parser = argparse.ArgumentParser(description='Process some integers.') 229 | parser.add_argument('--data_dir', type=str, default=None) 230 | parser.add_argument('--draw_mesh', type=bool, default=False) 231 | parser.add_argument('--draw_norm_mesh', type=bool, default=False) 232 | parser.add_argument('--draw_norm_3d_mesh', type=bool, default=False) 233 | args = parser.parse_args() 234 | 235 | create_dirs(args) 236 | pose_normalization(args) 237 | -------------------------------------------------------------------------------- /lipsync3d/test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('/home/server01/jyeongho_workspace/3d_face_gcns/') 3 | 4 | import torch 5 | from torch.utils.data import DataLoader 6 | from options import Options 7 | from dataset import Lipsync3DMeshDataset 8 | from model import Lipsync3DMesh 9 | from loss import L2Loss 10 | import time 11 | from utils import mesh_tensor_to_landmarkdict, draw_mesh_images 12 | import os 13 | from tqdm import tqdm 14 | import cv2 15 | import shutil 16 | 17 | 18 | if __name__ == '__main__': 19 | opt = Options().parse_args() 20 | device = opt.device 21 | calculate_test_loss = (opt.src_dir == opt.tgt_dir) 22 | dataset = Lipsync3DMeshDataset(opt) 23 | test_dataloader = torch.utils.data.DataLoader( 24 | dataset, 25 | batch_size=opt.batch_size, 26 | shuffle=False, # default not shuffle 27 | num_workers=opt.num_workers, 28 | drop_last=True # the batch size cannot change during the training so the last uncomplete batch need to be dropped 29 | ) 30 | 31 | model = Lipsync3DMesh().to(device) 32 | criterionGeo = L2Loss() 33 | 34 | if opt.model_name is not None: 35 | state_dict = torch.load(os.path.join(opt.tgt_dir, opt.model_name)) 36 | audioEncoder_state = {} 37 | geometryDecoder_state = {} 38 | 39 | for key, value in state_dict.items(): 40 | if 'AudioEncoder' in key: 41 | audioEncoder_state[key.replace('AudioEncoder.', '')] = value 42 | if 'GeometryDecoder' in key: 43 | geometryDecoder_state[key.replace('GeometryDecoder.', '')] = value 44 | model.AudioEncoder.load_state_dict(audioEncoder_state) 45 | model.GeometryDecoder.load_state_dict(geometryDecoder_state) 46 | else: 47 | raise ValueError('No checkpoint specified') 48 | 49 | def emptyFolder(path): 50 | if os.path.exists(path): 51 | shutil.rmtree(path) 52 | os.makedirs(path, exist_ok=True) 53 | 54 | ckpt = torch.load(os.path.join(opt.tgt_dir, opt.model_name), map_location=device) 55 | model.load_state_dict(ckpt) 56 | 57 | emptyFolder(os.path.join(opt.src_dir, 'reenact_mesh')) 58 | emptyFolder(os.path.join(opt.src_dir, 'reenact_mesh_image')) 59 | emptyFolder(os.path.join(opt.src_dir, 'reenact_texture')) 60 | emptyFolder(os.path.join(opt.src_dir, 'predicted_normalised_mesh')) 61 | 62 | avg_loss = 0 63 | 64 | # previous_texture = torch.zeros((1, 3, 140, 280)).to(device) 65 | with torch.no_grad(): 66 | model.eval() 67 | for i, data in enumerate(tqdm(test_dataloader)): 68 | audio_feature = data['audio_feature'].to(device) 69 | reference_mesh = data['reference_mesh'].to(device) 70 | normalized_mesh = data['normalized_mesh'].to(device) 71 | filename = data['filename'][0] 72 | R = data['R'][0].to(device) 73 | RT = R.transpose(0, 1) 74 | t = data['t'][0].to(device) 75 | c = data['c'][0].to(device) 76 | 77 | geometry_diff = model(audio_feature) 78 | geometry_diff = geometry_diff.reshape(-1, 478, 3) 79 | geometry = reference_mesh + geometry_diff 80 | 81 | if calculate_test_loss and (i > int(len(test_dataloader) * opt.train_rate)): 82 | geoLoss = criterionGeo(geometry, normalized_mesh) 83 | avg_loss += geoLoss.detach() / int(len(test_dataloader) * (1 - opt.train_rate)) 84 | 85 | geometry = geometry[0].transpose(0, 1) 86 | normlaised_geometry = geometry.clone().detach() 87 | normalised_landmark_dict = mesh_tensor_to_landmarkdict(normlaised_geometry) 88 | 89 | geometry = (torch.matmul(RT, (geometry - t)) / c).transpose(0, 1).cpu().detach() 90 | landmark_dict = mesh_tensor_to_landmarkdict(geometry) 91 | 92 | # save_image(predicted_mouth[0], os.path.join(opt.src_dir, 'reenact_texture',filename.split('.')[0]+'.jpg')) 93 | torch.save(normalised_landmark_dict, os.path.join(opt.src_dir,'predicted_normalised_mesh',filename)) 94 | torch.save(landmark_dict, os.path.join(opt.src_dir, 'reenact_mesh', filename)) 95 | 96 | if calculate_test_loss: 97 | print('Average Test loss : ', avg_loss) 98 | 99 | print('Start drawing reenact mesh') 100 | image = cv2.imread(os.path.join(opt.tgt_dir, 'reference_frame.png')) 101 | image_rows, image_cols, _ = image.shape 102 | draw_mesh_images(os.path.join(opt.src_dir, 'reenact_mesh'), os.path.join(opt.src_dir, 'reenact_mesh_image'), image_rows, image_cols) 103 | 104 | 105 | -------------------------------------------------------------------------------- /lipsync3d/train.py: -------------------------------------------------------------------------------- 1 | from torch import optim 2 | from torch.optim import optimizer 3 | 4 | import torch 5 | from torch.utils.data import DataLoader 6 | from options import Options 7 | from dataset import Lipsync3DMeshDataset 8 | from model import Lipsync3DMesh 9 | from loss import L2Loss 10 | from audiodvp_utils.visualizer import Visualizer 11 | import time 12 | import os 13 | 14 | import torch.nn as nn 15 | 16 | if __name__ == '__main__': 17 | opt = Options().parse_args() 18 | device = opt.device 19 | 20 | dataset = Lipsync3DMeshDataset(opt) 21 | train_dataloader = DataLoader( 22 | dataset, 23 | batch_size = opt.batch_size, 24 | shuffle = not opt.serial_batches, # default not shuffle 25 | num_workers = opt.num_workers, 26 | drop_last = True 27 | ) 28 | 29 | visualizer = Visualizer(opt) 30 | model = Lipsync3DMesh().to(device) 31 | 32 | #TODO : Define Loss function------ 33 | criterionGeo = None 34 | #--------------------------------- 35 | 36 | if opt.load_model: 37 | if os.path.exists(os.path.join(opt.tgt_dir, opt.model_name)): 38 | state_dict = torch.load(os.path.join(opt.tgt_dir, opt.model_name)) 39 | audioEncoder_state = {} 40 | geometryDecoder_state = {} 41 | 42 | for key, value in state_dict.items(): 43 | if 'AudioEncoder' in key: 44 | audioEncoder_state[key.replace('AudioEncoder.', '')] = value 45 | if 'GeometryDecoder' in key: 46 | geometryDecoder_state[key.replace('GeometryDecoder.', '')] = value 47 | 48 | model.AudioEncoder.load_state_dict(audioEncoder_state) 49 | model.GeometryDecoder.load_state_dict(geometryDecoder_state) 50 | 51 | optimizer = optim.Adam(model.parameters(), lr=opt.lr) 52 | 53 | os.makedirs(os.path.join(opt.tgt_dir, 'mesh_checkpoint'), exist_ok=True) 54 | 55 | # model = nn.DataParallel(model) 56 | 57 | total_iters = 0 58 | 59 | for epoch in range(opt.num_epoch): 60 | epoch_start_time = time.time() 61 | epoch_iter = 0 62 | 63 | for i, data in enumerate(train_dataloader): 64 | total_iters += opt.batch_size 65 | epoch_iter += opt.batch_size 66 | 67 | # TODO : Implement training process ------- 68 | geoLoss = None 69 | # ----------------------------------------- 70 | 71 | if total_iters % opt.print_freq == 0: 72 | losses = {'geoLoss' : geoLoss} 73 | 74 | visualizer.print_current_losses(epoch, epoch_iter, losses, 0, 0) 75 | visualizer.plot_current_losses(total_iters, losses) 76 | 77 | 78 | print('End of epoch %d / %d \t Time Taken: %d sec' % (epoch, opt.num_epoch, time.time() - epoch_start_time)) 79 | 80 | if epoch % opt.checkpoint_interval == 0 and epoch != 0: 81 | torch.save(model.state_dict(), os.path.join(opt.tgt_dir, 'mesh_checkpoint', 'checkpoint_{}.pth'.format(epoch))) 82 | print("Checkpoint saved") 83 | 84 | torch.save(model.state_dict(), os.path.join(opt.tgt_dir, 'mesh.pth')) 85 | -------------------------------------------------------------------------------- /lipsync3d/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('/home/server01/jyeongho_workspace/3d_face_gcns/') 3 | 4 | from audiodvp_utils import util 5 | import numpy as np 6 | import math 7 | import torch 8 | import os 9 | import mediapipe.python.solutions.face_mesh as mp_face_mesh 10 | import mediapipe.python.solutions.drawing_utils as mp_drawing 11 | import mediapipe.python.solutions.drawing_styles as mp_drawing_styles 12 | from tqdm import tqdm 13 | import cv2 14 | 15 | # Input : 16 | # reference(dictionary from vertex idx to normalized landmark, dict[idx] = [x, y, z]) : landmark of reference frame. 17 | # target(dictionary from vertex idx to normalized landmark, dict[idx] = [x, y, z]) : landmark of target frame. 18 | # Output : 19 | # R : 3x3 Rotation matrix(np.array) 20 | # c : scale value(float) 21 | # t : 3x1 translation matrix(np.array) 22 | 23 | def Umeyama_algorithm(reference, target): 24 | # idx 2 -> nose, 130 -> left eye, 359 -> right eye 25 | idx_list = [2, 94, 19, 1, 4, 5, 195, 197, 6, 168, 8, 9, 151, 10, 109, 108, 67, 69, 103, 104, 54, 68, 338, 337, 297, 299, 332, 333, 284, 298, 130, 243, 244, 359, 362, 463, 26 | 21, 71, 162, 139, 156, 70, 63, 105, 66, 107, 336, 296, 334, 293, 300, 301, 251, 55, 285, 193, 417, 122, 351, 196, 419, 3, 248, 51, 281, 27 | 45, 275, 44, 274, 220, 440, 134, 363, 236, 456] 28 | # idx_list = [19, 243, 463] 29 | ref_points = [] 30 | tgt_points = [] 31 | 32 | for idx in idx_list: 33 | ref_points.append(reference[idx]) 34 | tgt_points.append(target[idx]) 35 | 36 | ref_points = np.array(ref_points) 37 | tgt_points = np.array(tgt_points) 38 | 39 | ref_mu = ref_points.mean(axis=0) 40 | tgt_mu = tgt_points.mean(axis=0) 41 | ref_var = ref_points.var(axis=0).sum() 42 | tgt_var = tgt_points.var(axis=0).sum() 43 | n, m = ref_points.shape 44 | covar = np.matmul((ref_points - ref_mu).T, tgt_points - tgt_mu) / n 45 | det_covar = np.linalg.det(covar) 46 | u, d, vh = np.linalg.svd(covar) 47 | detuv = np.linalg.det(u) * np.linalg.det(vh.T) 48 | cov_rank = np.linalg.matrix_rank(covar) 49 | S = np.identity(m) 50 | 51 | if cov_rank > m - 1: 52 | if det_covar < 0: 53 | S[m - 1, m - 1] = -1 54 | else: 55 | if detuv < 0: 56 | S[m - 1, m - 1] = -1 57 | 58 | R = np.matmul(np.matmul(u, S), vh) 59 | c = (1 / tgt_var) * np.trace(np.matmul(np.diag(d), S)) 60 | t = ref_mu.reshape(3, 1) - c * np.matmul(R, tgt_mu.reshape(3, 1)) 61 | 62 | return R, t, c 63 | 64 | 65 | def landmark_to_dict(landmark_list): 66 | landmark_dict = {} 67 | for idx, landmark in enumerate(landmark_list): 68 | landmark_dict[idx] = [landmark.x, landmark.y, landmark.z] 69 | 70 | return landmark_dict 71 | 72 | def landmarkdict_to_normalized_mesh_tensor(landmark_dict): 73 | vertex_list = [] 74 | for idx, coord in landmark_dict.items(): 75 | if (idx == 'R') or (idx == 't') or (idx == 'c'): 76 | continue 77 | vertex_list.append(coord) 78 | 79 | if not ('R' in landmark_dict): 80 | return torch.tensor(vertex_list) 81 | 82 | R = torch.from_numpy(landmark_dict['R']).float() 83 | t = torch.from_numpy(landmark_dict['t']).float() 84 | c = float(landmark_dict['c']) 85 | vertices = torch.tensor(vertex_list).transpose(0, 1) 86 | norm_vertices = (c * torch.matmul(R, vertices) + t).transpose(0, 1) 87 | return norm_vertices 88 | 89 | 90 | def landmarkdict_to_mesh_tensor(landmark_dict): 91 | vertex_list = [] 92 | for idx, coord in landmark_dict.items(): 93 | if (idx == 'R') or (idx == 't') or (idx == 'c'): 94 | continue 95 | vertex_list.append(coord) 96 | 97 | vertices = torch.tensor(vertex_list) 98 | return vertices 99 | 100 | def mesh_tensor_to_landmarkdict(mesh_tensor): 101 | landmark_dict = {} 102 | for i in range(mesh_tensor.shape[0]): 103 | landmark_dict[i] = mesh_tensor[i].tolist() 104 | 105 | return landmark_dict 106 | 107 | 108 | def draw_mesh_image(mesh_dict, save_path, image_rows, image_cols): 109 | connections = mp_face_mesh.FACEMESH_TESSELATION 110 | drawing_spec = mp_drawing.DrawingSpec(color= mp_drawing.BLACK_COLOR, thickness=1, circle_radius=1) 111 | 112 | idx_to_coordinates = {} 113 | for idx, coord in mesh_dict.items(): 114 | if (idx == 'R') or (idx == 't') or (idx == 'c'): 115 | continue 116 | x_px = min(math.floor(coord[0]), image_cols - 1) 117 | y_px = min(math.floor(coord[1]), image_rows - 1) 118 | landmark_px = (x_px, y_px) 119 | if landmark_px: 120 | idx_to_coordinates[idx] = landmark_px 121 | 122 | white_image = np.zeros([image_rows, image_cols, 3], dtype=np.uint8) 123 | white_image[:] = 255 124 | for connection in connections: 125 | start_idx = connection[0] 126 | end_idx = connection[1] 127 | 128 | if start_idx in idx_to_coordinates and end_idx in idx_to_coordinates: 129 | cv2.line(white_image, 130 | idx_to_coordinates[start_idx], 131 | idx_to_coordinates[end_idx], 132 | drawing_spec.color, 133 | drawing_spec.thickness 134 | ) 135 | cv2.imwrite(save_path, white_image) 136 | 137 | 138 | def draw_mesh_images(mesh_dir, save_dir, image_rows, image_cols): 139 | mesh_filename_list = util.get_file_list(mesh_dir) 140 | 141 | for mesh_filename in tqdm(mesh_filename_list): 142 | mesh_dict = torch.load(mesh_filename) 143 | save_path = os.path.join(save_dir, os.path.basename(mesh_filename)[:-3] + '.png') 144 | draw_mesh_image(mesh_dict, save_path, image_rows, image_cols) 145 | 146 | return 147 | -------------------------------------------------------------------------------- /make_video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from natsort import natsorted 4 | import argparse 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--src_directory', type=str, required=True) 8 | 9 | args = parser.parse_args() 10 | 11 | if __name__ == '__main__': 12 | norm_images = natsorted([os.path.join(args.src_directory, 'reenact_mesh_image', x) for x in os.listdir(os.path.join(args.src_directory, 'reenact_mesh_image'))]) 13 | out = cv2.VideoWriter('{}/temp_original.mp4'.format(args.src_directory), cv2.VideoWriter_fourcc(*'mp4v'), 25, (256, 256)) 14 | 15 | for im in norm_images: 16 | image = cv2.imread(im) 17 | out.write(image) 18 | 19 | out.release() 20 | 21 | os.system('ffmpeg -y -i {}/temp_original.mp4 -i {}/audio/audio.wav -c:v copy -c:a aac {}/predicted_mesh.mp4'.format(args.src_directory, args.src_directory, args.src_directory)) --------------------------------------------------------------------------------