├── dataset
    ├── extract_full.py
    ├── mesh_stabilization.py
    ├── one_euro_filter.py
    └── points.py
├── lipsync3d
    ├── .ipynb_checkpoints
    │   ├── combine-audioDVP-checkpoint.ipynb
    │   ├── combine-checkpoint.ipynb
    │   └── combine-half_texture-checkpoint.ipynb
    ├── __init__.py
    ├── __pycache__
    │   ├── audio.cpython-38.pyc
    │   ├── dataset.cpython-38.pyc
    │   ├── hparams.cpython-38.pyc
    │   ├── loss.cpython-38.pyc
    │   ├── model.cpython-38.pyc
    │   ├── options.cpython-38.pyc
    │   ├── utils.cpython-36.pyc
    │   └── utils.cpython-38.pyc
    ├── audio.py
    ├── audiodvp_utils
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── util.cpython-36.pyc
    │   │   ├── util.cpython-38.pyc
    │   │   └── visualizer.cpython-38.pyc
    │   ├── audio.py
    │   ├── build_nfr_dataset.py
    │   ├── crop_portrait-checkpoint.py
    │   ├── crop_portrait.py
    │   ├── face_detection
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── api.cpython-38.pyc
    │   │   │   ├── models.cpython-38.pyc
    │   │   │   └── utils.cpython-38.pyc
    │   │   ├── api.py
    │   │   ├── detection
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   └── core.cpython-38.pyc
    │   │   │   ├── core.py
    │   │   │   └── sfd
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │       ├── __init__.cpython-38.pyc
    │   │   │   │       ├── bbox.cpython-38.pyc
    │   │   │   │       ├── detect.cpython-38.pyc
    │   │   │   │       ├── net_s3fd.cpython-38.pyc
    │   │   │   │       └── sfd_detector.cpython-38.pyc
    │   │   │   │   ├── bbox.py
    │   │   │   │   ├── detect.py
    │   │   │   │   ├── net_s3fd.py
    │   │   │   │   └── sfd_detector.py
    │   │   ├── models.py
    │   │   └── utils.py
    │   ├── hparams.py
    │   ├── rescale_image.py
    │   ├── util-checkpoint.py
    │   ├── util.py
    │   └── visualizer.py
    ├── combine-audioDVP.ipynb
    ├── combine-half_texture.ipynb
    ├── combine.ipynb
    ├── dataset.py
    ├── demo.sh
    ├── face_mesh.ipynb
    ├── hparams.py
    ├── loss.py
    ├── model.py
    ├── options.py
    ├── pose_normalization.py
    ├── test.py
    ├── train.py
    └── utils.py
└── make_video.py


/dataset/extract_full.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import argparse
 3 | import os
 4 | 
 5 | parser = argparse.ArgumentParser()
 6 | parser.add_argument('--target_video', type=str, required=True)
 7 | args = parser.parse_args()
 8 | 
 9 | if __name__ == "__main__":
10 |     os.makedirs('full', exist_ok=True)
11 |     cap = cv2.VideoCapture(args.target_video)
12 |     count = 0
13 | 
14 |     while(cap.isOpened()):
15 |         ret, frame = cap.read()
16 |         if ret:
17 |             cv2.imwrite(os.path.join('full', '{}.png'.format(count)), frame)
18 |             count += 1
19 |         else:
20 |             break
21 | 
22 |     cap.release()
23 | 


--------------------------------------------------------------------------------
/dataset/mesh_stabilization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from one_euro_filter import OneEuroFilter
 3 | import os
 4 | from natsort import natsorted
 5 | import math
 6 | import cv2
 7 | import torch
 8 | from points import topology, mouthPoints, chins, rest
 9 | 
10 | def applyFilter(points, t, min_cutoff, beta, skipPoints = []):
11 |     filtered = np.empty_like(points)
12 |     filtered[0] = points[0]
13 |     one_euro_filter = OneEuroFilter(t[0], points[0], min_cutoff, beta)
14 |     
15 |     for i in range(1, points.shape[0]):
16 |         filtered[i] = one_euro_filter(t[i], points[i])
17 |         
18 |     for i in range(1, points.shape[0]):
19 |         for skipPoint in skipPoints:
20 |             filtered[i, skipPoint] = points[i, skipPoint]
21 | 
22 |     return filtered
23 | 
24 | def draw_image(count, point):
25 |     white_image = np.ones((256, 256, 3), np.uint8) * 255
26 |     
27 |     for start, end in topology:
28 |         start_point = point[start,:2]
29 |         end_point = point[end,:2]
30 |         cv2.line(white_image, start_point.astype(int), end_point.astype(int), (0,0,0), 1)
31 |         
32 |     cv2.imwrite('test/{}.jpg'.format(count), white_image)
33 | 
34 | if __name__ == '__main__':
35 |     image_height = 256
36 |     image_width = 256
37 | 
38 |     normalised_mesh_files = natsorted([os.path.join('mesh_dict', x) for x in os.listdir(os.path.join('mesh_dict'))])
39 |     landmarks = []
40 |     for file in normalised_mesh_files:
41 |         landmark = torch.load(file)
42 |         R = landmark['R']
43 |         t = landmark['t']
44 |         c = landmark['c']
45 |         keys = natsorted([x for x in landmark.keys() if type(x) is int])
46 |         vertices = []
47 |         for key in keys:
48 |             vertice = np.array(landmark[key]).reshape(3,1)
49 |             norm_vertice = (c * np.matmul(R, vertice) + t).squeeze()
50 |             x_px = min(math.floor(norm_vertice[0]), image_width - 1)
51 |             y_px = min(math.floor(norm_vertice[1]), image_height - 1)
52 |             z_px = min(math.floor(norm_vertice[2]), image_width - 1)
53 |             vertices.append([x_px, y_px, z_px])
54 |         landmarks.append(vertices)
55 |     
56 |     landmarks = np.array(landmarks)
57 |     
58 |     shape_1, shape_2, shape_3 = landmarks.shape
59 | 
60 |     xs = landmarks[:,:,0].reshape((shape_1, shape_2))
61 |     ys = landmarks[:,:,1].reshape((shape_1, shape_2))
62 |     zs = landmarks[:,:,2].reshape((shape_1, shape_2))
63 | 
64 |     fps = 25
65 |     t = np.linspace(0, xs.shape[0]/fps, xs.shape[0])
66 | 
67 |     xs_hat = applyFilter(xs, t, 0.005, 0.7)
68 |     ys_hat = applyFilter(ys, t, 0.005, 0.7, mouthPoints + chins)
69 |     ys_hat = applyFilter(ys_hat, t, 0.000001, 1.5, rest)
70 |     zs_hat = applyFilter(zs, t, 0.005, 0.7)
71 |     combine = np.stack(((xs_hat, ys_hat, zs_hat)), axis=2)
72 | 
73 |     count = [i for i in range(combine.shape[0])]
74 | 
75 |     os.makedirs(os.path.join('stabilized_norm_mesh'),exist_ok=True)
76 |     for i in range(combine.shape[0]):
77 |         torch.save(combine[i], os.path.join('stabilized_norm_mesh', '{}.pt'.format(count[i])))
78 | 


--------------------------------------------------------------------------------
/dataset/one_euro_filter.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | 
 4 | def smoothing_factor(t_e, cutoff):
 5 |     r = 2 * math.pi * cutoff * t_e
 6 |     return r / (r + 1)
 7 | 
 8 | 
 9 | def exponential_smoothing(a, x, x_prev):
10 |     return a * x + (1 - a) * x_prev
11 | 
12 | 
13 | class OneEuroFilter:
14 |     def __init__(self, t0, x0, dx0=0.0, min_cutoff=1.0, beta=0.0,
15 |                  d_cutoff=1.0):
16 |         """Initialize the one euro filter."""
17 |         # The parameters.
18 |         self.min_cutoff = float(min_cutoff)
19 |         self.beta = float(beta)
20 |         self.d_cutoff = float(d_cutoff)
21 |         # Previous values.
22 |         self.x_prev = x0
23 |         self.dx_prev = dx0
24 |         self.t_prev = float(t0)
25 | 
26 |     def __call__(self, t, x):
27 |         """Compute the filtered signal."""
28 |         t_e = t - self.t_prev
29 | 
30 |         # The filtered derivative of the signal.
31 |         a_d = smoothing_factor(t_e, self.d_cutoff)
32 |         dx = (x - self.x_prev) / t_e
33 |         dx_hat = exponential_smoothing(a_d, dx, self.dx_prev)
34 | 
35 |         # The filtered signal.
36 |         cutoff = self.min_cutoff + self.beta * abs(dx_hat)
37 |         a = smoothing_factor(t_e, cutoff)
38 |         x_hat = exponential_smoothing(a, x, self.x_prev)
39 | 
40 |         # Memorize the previous values.
41 |         self.x_prev = x_hat
42 |         self.dx_prev = dx_hat
43 |         self.t_prev = t
44 | 
45 |         return x_hat
46 | 


--------------------------------------------------------------------------------
/dataset/points.py:
--------------------------------------------------------------------------------
  1 | topology = [
  2 |     (127, 34),  (34, 139),  (139, 127), (11, 0),    (0, 37),    (37, 11),
  3 |     (232, 231), (231, 120), (120, 232), (72, 37),   (37, 39),   (39, 72),
  4 |     (128, 121), (121, 47),  (47, 128),  (232, 121), (121, 128), (128, 232),
  5 |     (104, 69),  (69, 67),   (67, 104),  (175, 171), (171, 148), (148, 175),
  6 |     (118, 50),  (50, 101),  (101, 118), (73, 39),   (39, 40),   (40, 73),
  7 |     (9, 151),   (151, 108), (108, 9),   (48, 115),  (115, 131), (131, 48),
  8 |     (194, 204), (204, 211), (211, 194), (74, 40),   (40, 185),  (185, 74),
  9 |     (80, 42),   (42, 183),  (183, 80),  (40, 92),   (92, 186),  (186, 40),
 10 |     (230, 229), (229, 118), (118, 230), (202, 212), (212, 214), (214, 202),
 11 |     (83, 18),   (18, 17),   (17, 83),   (76, 61),   (61, 146),  (146, 76),
 12 |     (160, 29),  (29, 30),   (30, 160),  (56, 157),  (157, 173), (173, 56),
 13 |     (106, 204), (204, 194), (194, 106), (135, 214), (214, 192), (192, 135),
 14 |     (203, 165), (165, 98),  (98, 203),  (21, 71),   (71, 68),   (68, 21),
 15 |     (51, 45),   (45, 4),    (4, 51),    (144, 24),  (24, 23),   (23, 144),
 16 |     (77, 146),  (146, 91),  (91, 77),   (205, 50),  (50, 187),  (187, 205),
 17 |     (201, 200), (200, 18),  (18, 201),  (91, 106),  (106, 182), (182, 91),
 18 |     (90, 91),   (91, 181),  (181, 90),  (85, 84),   (84, 17),   (17, 85),
 19 |     (206, 203), (203, 36),  (36, 206),  (148, 171), (171, 140), (140, 148),
 20 |     (92, 40),   (40, 39),   (39, 92),   (193, 189), (189, 244), (244, 193),
 21 |     (159, 158), (158, 28),  (28, 159),  (247, 246), (246, 161), (161, 247),
 22 |     (236, 3),   (3, 196),   (196, 236), (54, 68),   (68, 104),  (104, 54),
 23 |     (193, 168), (168, 8),   (8, 193),   (117, 228), (228, 31),  (31, 117),
 24 |     (189, 193), (193, 55),  (55, 189),  (98, 97),   (97, 99),   (99, 98),
 25 |     (126, 47),  (47, 100),  (100, 126), (166, 79),  (79, 218),  (218, 166),
 26 |     (155, 154), (154, 26),  (26, 155),  (209, 49),  (49, 131),  (131, 209),
 27 |     (135, 136), (136, 150), (150, 135), (47, 126),  (126, 217), (217, 47),
 28 |     (223, 52),  (52, 53),   (53, 223),  (45, 51),   (51, 134),  (134, 45),
 29 |     (211, 170), (170, 140), (140, 211), (67, 69),   (69, 108),  (108, 67),
 30 |     (43, 106),  (106, 91),  (91, 43),   (230, 119), (119, 120), (120, 230),
 31 |     (226, 130), (130, 247), (247, 226), (63, 53),   (53, 52),   (52, 63),
 32 |     (238, 20),  (20, 242),  (242, 238), (46, 70),   (70, 156),  (156, 46),
 33 |     (78, 62),   (62, 96),   (96, 78),   (46, 53),   (53, 63),   (63, 46),
 34 |     (143, 34),  (34, 227),  (227, 143), (123, 117), (117, 111), (111, 123),
 35 |     (44, 125),  (125, 19),  (19, 44),   (236, 134), (134, 51),  (51, 236),
 36 |     (216, 206), (206, 205), (205, 216), (154, 153), (153, 22),  (22, 154),
 37 |     (39, 37),   (37, 167),  (167, 39),  (200, 201), (201, 208), (208, 200),
 38 |     (36, 142),  (142, 100), (100, 36),  (57, 212),  (212, 202), (202, 57),
 39 |     (20, 60),   (60, 99),   (99, 20),   (28, 158),  (158, 157), (157, 28),
 40 |     (35, 226),  (226, 113), (113, 35),  (160, 159), (159, 27),  (27, 160),
 41 |     (204, 202), (202, 210), (210, 204), (113, 225), (225, 46),  (46, 113),
 42 |     (43, 202),  (202, 204), (204, 43),  (62, 76),   (76, 77),   (77, 62),
 43 |     (137, 123), (123, 116), (116, 137), (41, 38),   (38, 72),   (72, 41),
 44 |     (203, 129), (129, 142), (142, 203), (64, 98),   (98, 240),  (240, 64),
 45 |     (49, 102),  (102, 64),  (64, 49),   (41, 73),   (73, 74),   (74, 41),
 46 |     (212, 216), (216, 207), (207, 212), (42, 74),   (74, 184),  (184, 42),
 47 |     (169, 170), (170, 211), (211, 169), (170, 149), (149, 176), (176, 170),
 48 |     (105, 66),  (66, 69),   (69, 105),  (122, 6),   (6, 168),   (168, 122),
 49 |     (123, 147), (147, 187), (187, 123), (96, 77),   (77, 90),   (90, 96),
 50 |     (65, 55),   (55, 107),  (107, 65),  (89, 90),   (90, 180),  (180, 89),
 51 |     (101, 100), (100, 120), (120, 101), (63, 105),  (105, 104), (104, 63),
 52 |     (93, 137),  (137, 227), (227, 93),  (15, 86),   (86, 85),   (85, 15),
 53 |     (129, 102), (102, 49),  (49, 129),  (14, 87),   (87, 86),   (86, 14),
 54 |     (55, 8),    (8, 9),     (9, 55),    (100, 47),  (47, 121),  (121, 100),
 55 |     (145, 23),  (23, 22),   (22, 145),  (88, 89),   (89, 179),  (179, 88),
 56 |     (6, 122),   (122, 196), (196, 6),   (88, 95),   (95, 96),   (96, 88),
 57 |     (138, 172), (172, 136), (136, 138), (215, 58),  (58, 172),  (172, 215),
 58 |     (115, 48),  (48, 219),  (219, 115), (42, 80),   (80, 81),   (81, 42),
 59 |     (195, 3),   (3, 51),    (51, 195),  (43, 146),  (146, 61),  (61, 43),
 60 |     (171, 175), (175, 199), (199, 171), (81, 82),   (82, 38),   (38, 81),
 61 |     (53, 46),   (46, 225),  (225, 53),  (144, 163), (163, 110), (110, 144),
 62 |     (52, 65),   (65, 66),   (66, 52),   (229, 228), (228, 117), (117, 229),
 63 |     (34, 127),  (127, 234), (234, 34),  (107, 108), (108, 69),  (69, 107),
 64 |     (109, 108), (108, 151), (151, 109), (48, 64),   (64, 235),  (235, 48),
 65 |     (62, 78),   (78, 191),  (191, 62),  (129, 209), (209, 126), (126, 129),
 66 |     (111, 35),  (35, 143),  (143, 111), (117, 123), (123, 50),  (50, 117),
 67 |     (222, 65),  (65, 52),   (52, 222),  (19, 125),  (125, 141), (141, 19),
 68 |     (221, 55),  (55, 65),   (65, 221),  (3, 195),   (195, 197), (197, 3),
 69 |     (25, 7),    (7, 33),    (33, 25),   (220, 237), (237, 44),  (44, 220),
 70 |     (70, 71),   (71, 139),  (139, 70),  (122, 193), (193, 245), (245, 122),
 71 |     (247, 130), (130, 33),  (33, 247),  (71, 21),   (21, 162),  (162, 71),
 72 |     (170, 169), (169, 150), (150, 170), (188, 174), (174, 196), (196, 188),
 73 |     (216, 186), (186, 92),  (92, 216),  (2, 97),    (97, 167),  (167, 2),
 74 |     (141, 125), (125, 241), (241, 141), (164, 167), (167, 37),  (37, 164),
 75 |     (72, 38),   (38, 12),   (12, 72),   (38, 82),   (82, 13),   (13, 38),
 76 |     (63, 68),   (68, 71),   (71, 63),   (226, 35),  (35, 111),  (111, 226),
 77 |     (101, 50),  (50, 205),  (205, 101), (206, 92),  (92, 165),  (165, 206),
 78 |     (209, 198), (198, 217), (217, 209), (165, 167), (167, 97),  (97, 165),
 79 |     (220, 115), (115, 218), (218, 220), (133, 112), (112, 243), (243, 133),
 80 |     (239, 238), (238, 241), (241, 239), (214, 135), (135, 169), (169, 214),
 81 |     (190, 173), (173, 133), (133, 190), (171, 208), (208, 32),  (32, 171),
 82 |     (125, 44),  (44, 237),  (237, 125), (86, 87),   (87, 178),  (178, 86),
 83 |     (85, 86),   (86, 179),  (179, 85),  (84, 85),   (85, 180),  (180, 84),
 84 |     (83, 84),   (84, 181),  (181, 83),  (201, 83),  (83, 182),  (182, 201),
 85 |     (137, 93),  (93, 132),  (132, 137), (76, 62),   (62, 183),  (183, 76),
 86 |     (61, 76),   (76, 184),  (184, 61),  (57, 61),   (61, 185),  (185, 57),
 87 |     (212, 57),  (57, 186),  (186, 212), (214, 207), (207, 187), (187, 214),
 88 |     (34, 143),  (143, 156), (156, 34),  (79, 239),  (239, 237), (237, 79),
 89 |     (123, 137), (137, 177), (177, 123), (44, 1),    (1, 4),     (4, 44),
 90 |     (201, 194), (194, 32),  (32, 201),  (64, 102),  (102, 129), (129, 64),
 91 |     (213, 215), (215, 138), (138, 213), (59, 166),  (166, 219), (219, 59),
 92 |     (242, 99),  (99, 97),   (97, 242),  (2, 94),    (94, 141),  (141, 2),
 93 |     (75, 59),   (59, 235),  (235, 75),  (24, 110),  (110, 228), (228, 24),
 94 |     (25, 130),  (130, 226), (226, 25),  (23, 24),   (24, 229),  (229, 23),
 95 |     (22, 23),   (23, 230),  (230, 22),  (26, 22),   (22, 231),  (231, 26),
 96 |     (112, 26),  (26, 232),  (232, 112), (189, 190), (190, 243), (243, 189),
 97 |     (221, 56),  (56, 190),  (190, 221), (28, 56),   (56, 221),  (221, 28),
 98 |     (27, 28),   (28, 222),  (222, 27),  (29, 27),   (27, 223),  (223, 29),
 99 |     (30, 29),   (29, 224),  (224, 30),  (247, 30),  (30, 225),  (225, 247),
100 |     (238, 79),  (79, 20),   (20, 238),  (166, 59),  (59, 75),   (75, 166),
101 |     (60, 75),   (75, 240),  (240, 60),  (147, 177), (177, 215), (215, 147),
102 |     (20, 79),   (79, 166),  (166, 20),  (187, 147), (147, 213), (213, 187),
103 |     (112, 233), (233, 244), (244, 112), (233, 128), (128, 245), (245, 233),
104 |     (128, 114), (114, 188), (188, 128), (114, 217), (217, 174), (174, 114),
105 |     (131, 115), (115, 220), (220, 131), (217, 198), (198, 236), (236, 217),
106 |     (198, 131), (131, 134), (134, 198), (177, 132), (132, 58),  (58, 177),
107 |     (143, 35),  (35, 124),  (124, 143), (110, 163), (163, 7),   (7, 110),
108 |     (228, 110), (110, 25),  (25, 228),  (356, 389), (389, 368), (368, 356),
109 |     (11, 302),  (302, 267), (267, 11),  (452, 350), (350, 349), (349, 452),
110 |     (302, 303), (303, 269), (269, 302), (357, 343), (343, 277), (277, 357),
111 |     (452, 453), (453, 357), (357, 452), (333, 332), (332, 297), (297, 333),
112 |     (175, 152), (152, 377), (377, 175), (347, 348), (348, 330), (330, 347),
113 |     (303, 304), (304, 270), (270, 303), (9, 336),   (336, 337), (337, 9),
114 |     (278, 279), (279, 360), (360, 278), (418, 262), (262, 431), (431, 418),
115 |     (304, 408), (408, 409), (409, 304), (310, 415), (415, 407), (407, 310),
116 |     (270, 409), (409, 410), (410, 270), (450, 348), (348, 347), (347, 450),
117 |     (422, 430), (430, 434), (434, 422), (313, 314), (314, 17),  (17, 313),
118 |     (306, 307), (307, 375), (375, 306), (387, 388), (388, 260), (260, 387),
119 |     (286, 414), (414, 398), (398, 286), (335, 406), (406, 418), (418, 335),
120 |     (364, 367), (367, 416), (416, 364), (423, 358), (358, 327), (327, 423),
121 |     (251, 284), (284, 298), (298, 251), (281, 5),   (5, 4),     (4, 281),
122 |     (373, 374), (374, 253), (253, 373), (307, 320), (320, 321), (321, 307),
123 |     (425, 427), (427, 411), (411, 425), (421, 313), (313, 18),  (18, 421),
124 |     (321, 405), (405, 406), (406, 321), (320, 404), (404, 405), (405, 320),
125 |     (315, 16),  (16, 17),   (17, 315),  (426, 425), (425, 266), (266, 426),
126 |     (377, 400), (400, 369), (369, 377), (322, 391), (391, 269), (269, 322),
127 |     (417, 465), (465, 464), (464, 417), (386, 257), (257, 258), (258, 386),
128 |     (466, 260), (260, 388), (388, 466), (456, 399), (399, 419), (419, 456),
129 |     (284, 332), (332, 333), (333, 284), (417, 285), (285, 8),   (8, 417),
130 |     (346, 340), (340, 261), (261, 346), (413, 441), (441, 285), (285, 413),
131 |     (327, 460), (460, 328), (328, 327), (355, 371), (371, 329), (329, 355),
132 |     (392, 439), (439, 438), (438, 392), (382, 341), (341, 256), (256, 382),
133 |     (429, 420), (420, 360), (360, 429), (364, 394), (394, 379), (379, 364),
134 |     (277, 343), (343, 437), (437, 277), (443, 444), (444, 283), (283, 443),
135 |     (275, 440), (440, 363), (363, 275), (431, 262), (262, 369), (369, 431),
136 |     (297, 338), (338, 337), (337, 297), (273, 375), (375, 321), (321, 273),
137 |     (450, 451), (451, 349), (349, 450), (446, 342), (342, 467), (467, 446),
138 |     (293, 334), (334, 282), (282, 293), (458, 461), (461, 462), (462, 458),
139 |     (276, 353), (353, 383), (383, 276), (308, 324), (324, 325), (325, 308),
140 |     (276, 300), (300, 293), (293, 276), (372, 345), (345, 447), (447, 372),
141 |     (352, 345), (345, 340), (340, 352), (274, 1),   (1, 19),    (19, 274),
142 |     (456, 248), (248, 281), (281, 456), (436, 427), (427, 425), (425, 436),
143 |     (381, 256), (256, 252), (252, 381), (269, 391), (391, 393), (393, 269),
144 |     (200, 199), (199, 428), (428, 200), (266, 330), (330, 329), (329, 266),
145 |     (287, 273), (273, 422), (422, 287), (250, 462), (462, 328), (328, 250),
146 |     (258, 286), (286, 384), (384, 258), (265, 353), (353, 342), (342, 265),
147 |     (387, 259), (259, 257), (257, 387), (424, 431), (431, 430), (430, 424),
148 |     (342, 353), (353, 276), (276, 342), (273, 335), (335, 424), (424, 273),
149 |     (292, 325), (325, 307), (307, 292), (366, 447), (447, 345), (345, 366),
150 |     (271, 303), (303, 302), (302, 271), (423, 266), (266, 371), (371, 423),
151 |     (294, 455), (455, 460), (460, 294), (279, 278), (278, 294), (294, 279),
152 |     (271, 272), (272, 304), (304, 271), (432, 434), (434, 427), (427, 432),
153 |     (272, 407), (407, 408), (408, 272), (394, 430), (430, 431), (431, 394),
154 |     (395, 369), (369, 400), (400, 395), (334, 333), (333, 299), (299, 334),
155 |     (351, 417), (417, 168), (168, 351), (352, 280), (280, 411), (411, 352),
156 |     (325, 319), (319, 320), (320, 325), (295, 296), (296, 336), (336, 295),
157 |     (319, 403), (403, 404), (404, 319), (330, 348), (348, 349), (349, 330),
158 |     (293, 298), (298, 333), (333, 293), (323, 454), (454, 447), (447, 323),
159 |     (15, 16),   (16, 315),  (315, 15),  (358, 429), (429, 279), (279, 358),
160 |     (14, 15),   (15, 316),  (316, 14),  (285, 336), (336, 9),   (9, 285),
161 |     (329, 349), (349, 350), (350, 329), (374, 380), (380, 252), (252, 374),
162 |     (318, 402), (402, 403), (403, 318), (6, 197),   (197, 419), (419, 6),
163 |     (318, 319), (319, 325), (325, 318), (367, 364), (364, 365), (365, 367),
164 |     (435, 367), (367, 397), (397, 435), (344, 438), (438, 439), (439, 344),
165 |     (272, 271), (271, 311), (311, 272), (195, 5),   (5, 281),   (281, 195),
166 |     (273, 287), (287, 291), (291, 273), (396, 428), (428, 199), (199, 396),
167 |     (311, 271), (271, 268), (268, 311), (283, 444), (444, 445), (445, 283),
168 |     (373, 254), (254, 339), (339, 373), (282, 334), (334, 296), (296, 282),
169 |     (449, 347), (347, 346), (346, 449), (264, 447), (447, 454), (454, 264),
170 |     (336, 296), (296, 299), (299, 336), (338, 10),  (10, 151),  (151, 338),
171 |     (278, 439), (439, 455), (455, 278), (292, 407), (407, 415), (415, 292),
172 |     (358, 371), (371, 355), (355, 358), (340, 345), (345, 372), (372, 340),
173 |     (346, 347), (347, 280), (280, 346), (442, 443), (443, 282), (282, 442),
174 |     (19, 94),   (94, 370),  (370, 19),  (441, 442), (442, 295), (295, 441),
175 |     (248, 419), (419, 197), (197, 248), (263, 255), (255, 359), (359, 263),
176 |     (440, 275), (275, 274), (274, 440), (300, 383), (383, 368), (368, 300),
177 |     (351, 412), (412, 465), (465, 351), (263, 467), (467, 466), (466, 263),
178 |     (301, 368), (368, 389), (389, 301), (395, 378), (378, 379), (379, 395),
179 |     (412, 351), (351, 419), (419, 412), (436, 426), (426, 322), (322, 436),
180 |     (2, 164),   (164, 393), (393, 2),   (370, 462), (462, 461), (461, 370),
181 |     (164, 0),   (0, 267),   (267, 164), (302, 11),  (11, 12),   (12, 302),
182 |     (268, 12),  (12, 13),   (13, 268),  (293, 300), (300, 301), (301, 293),
183 |     (446, 261), (261, 340), (340, 446), (330, 266), (266, 425), (425, 330),
184 |     (426, 423), (423, 391), (391, 426), (429, 355), (355, 437), (437, 429),
185 |     (391, 327), (327, 326), (326, 391), (440, 457), (457, 438), (438, 440),
186 |     (341, 382), (382, 362), (362, 341), (459, 457), (457, 461), (461, 459),
187 |     (434, 430), (430, 394), (394, 434), (414, 463), (463, 362), (362, 414),
188 |     (396, 369), (369, 262), (262, 396), (354, 461), (461, 457), (457, 354),
189 |     (316, 403), (403, 402), (402, 316), (315, 404), (404, 403), (403, 315),
190 |     (314, 405), (405, 404), (404, 314), (313, 406), (406, 405), (405, 313),
191 |     (421, 418), (418, 406), (406, 421), (366, 401), (401, 361), (361, 366),
192 |     (306, 408), (408, 407), (407, 306), (291, 409), (409, 408), (408, 291),
193 |     (287, 410), (410, 409), (409, 287), (432, 436), (436, 410), (410, 432),
194 |     (434, 416), (416, 411), (411, 434), (264, 368), (368, 383), (383, 264),
195 |     (309, 438), (438, 457), (457, 309), (352, 376), (376, 401), (401, 352),
196 |     (274, 275), (275, 4),   (4, 274),   (421, 428), (428, 262), (262, 421),
197 |     (294, 327), (327, 358), (358, 294), (433, 416), (416, 367), (367, 433),
198 |     (289, 455), (455, 439), (439, 289), (462, 370), (370, 326), (326, 462),
199 |     (2, 326),   (326, 370), (370, 2),   (305, 460), (460, 455), (455, 305),
200 |     (254, 449), (449, 448), (448, 254), (255, 261), (261, 446), (446, 255),
201 |     (253, 450), (450, 449), (449, 253), (252, 451), (451, 450), (450, 252),
202 |     (256, 452), (452, 451), (451, 256), (341, 453), (453, 452), (452, 341),
203 |     (413, 464), (464, 463), (463, 413), (441, 413), (413, 414), (414, 441),
204 |     (258, 442), (442, 441), (441, 258), (257, 443), (443, 442), (442, 257),
205 |     (259, 444), (444, 443), (443, 259), (260, 445), (445, 444), (444, 260),
206 |     (467, 342), (342, 445), (445, 467), (459, 458), (458, 250), (250, 459),
207 |     (289, 392), (392, 290), (290, 289), (290, 328), (328, 460), (460, 290),
208 |     (376, 433), (433, 435), (435, 376), (250, 290), (290, 392), (392, 250),
209 |     (411, 416), (416, 433), (433, 411), (341, 463), (463, 464), (464, 341),
210 |     (453, 464), (464, 465), (465, 453), (357, 465), (465, 412), (412, 357),
211 |     (343, 412), (412, 399), (399, 343), (360, 363), (363, 440), (440, 360),
212 |     (437, 399), (399, 456), (456, 437), (420, 456), (456, 363), (363, 420),
213 |     (401, 435), (435, 288), (288, 401), (372, 383), (383, 353), (353, 372),
214 |     (339, 255), (255, 249), (249, 339), (448, 261), (261, 255), (255, 448),
215 |     (133, 243), (243, 190), (190, 133), (133, 155), (155, 112), (112, 133),
216 |     (33, 246),  (246, 247), (247, 33),  (33, 130),  (130, 25),  (25, 33),
217 |     (398, 384), (384, 286), (286, 398), (362, 398), (398, 414), (414, 362),
218 |     (362, 463), (463, 341), (341, 362), (263, 359), (359, 467), (467, 263),
219 |     (263, 249), (249, 255), (255, 263), (466, 467), (467, 260), (260, 466),
220 |     (75, 60),   (60, 166),  (166, 75),  (238, 239), (239, 79),  (79, 238),
221 |     (162, 127), (127, 139), (139, 162), (72, 11),   (11, 37),   (37, 72),
222 |     (121, 232), (232, 120), (120, 121), (73, 72),   (72, 39),   (39, 73),
223 |     (114, 128), (128, 47),  (47, 114),  (233, 232), (232, 128), (128, 233),
224 |     (103, 104), (104, 67),  (67, 103),  (152, 175), (175, 148), (148, 152),
225 |     (119, 118), (118, 101), (101, 119), (74, 73),   (73, 40),   (40, 74),
226 |     (107, 9),   (9, 108),   (108, 107), (49, 48),   (48, 131),  (131, 49),
227 |     (32, 194),  (194, 211), (211, 32),  (184, 74),  (74, 185),  (185, 184),
228 |     (191, 80),  (80, 183),  (183, 191), (185, 40),  (40, 186),  (186, 185),
229 |     (119, 230), (230, 118), (118, 119), (210, 202), (202, 214), (214, 210),
230 |     (84, 83),   (83, 17),   (17, 84),   (77, 76),   (76, 146),  (146, 77),
231 |     (161, 160), (160, 30),  (30, 161),  (190, 56),  (56, 173),  (173, 190),
232 |     (182, 106), (106, 194), (194, 182), (138, 135), (135, 192), (192, 138),
233 |     (129, 203), (203, 98),  (98, 129),  (54, 21),   (21, 68),   (68, 54),
234 |     (5, 51),    (51, 4),    (4, 5),     (145, 144), (144, 23),  (23, 145),
235 |     (90, 77),   (77, 91),   (91, 90),   (207, 205), (205, 187), (187, 207),
236 |     (83, 201),  (201, 18),  (18, 83),   (181, 91),  (91, 182),  (182, 181),
237 |     (180, 90),  (90, 181),  (181, 180), (16, 85),   (85, 17),   (17, 16),
238 |     (205, 206), (206, 36),  (36, 205),  (176, 148), (148, 140), (140, 176),
239 |     (165, 92),  (92, 39),   (39, 165),  (245, 193), (193, 244), (244, 245),
240 |     (27, 159),  (159, 28),  (28, 27),   (30, 247),  (247, 161), (161, 30),
241 |     (174, 236), (236, 196), (196, 174), (103, 54),  (54, 104),  (104, 103),
242 |     (55, 193),  (193, 8),   (8, 55),    (111, 117), (117, 31),  (31, 111),
243 |     (221, 189), (189, 55),  (55, 221),  (240, 98),  (98, 99),   (99, 240),
244 |     (142, 126), (126, 100), (100, 142), (219, 166), (166, 218), (218, 219),
245 |     (112, 155), (155, 26),  (26, 112),  (198, 209), (209, 131), (131, 198),
246 |     (169, 135), (135, 150), (150, 169), (114, 47),  (47, 217),  (217, 114),
247 |     (224, 223), (223, 53),  (53, 224),  (220, 45),  (45, 134),  (134, 220),
248 |     (32, 211),  (211, 140), (140, 32),  (109, 67),  (67, 108),  (108, 109),
249 |     (146, 43),  (43, 91),   (91, 146),  (231, 230), (230, 120), (120, 231),
250 |     (113, 226), (226, 247), (247, 113), (105, 63),  (63, 52),   (52, 105),
251 |     (241, 238), (238, 242), (242, 241), (124, 46),  (46, 156),  (156, 124),
252 |     (95, 78),   (78, 96),   (96, 95),   (70, 46),   (46, 63),   (63, 70),
253 |     (116, 143), (143, 227), (227, 116), (116, 123), (123, 111), (111, 116),
254 |     (1, 44),    (44, 19),   (19, 1),    (3, 236),   (236, 51),  (51, 3),
255 |     (207, 216), (216, 205), (205, 207), (26, 154),  (154, 22),  (22, 26),
256 |     (165, 39),  (39, 167),  (167, 165), (199, 200), (200, 208), (208, 199),
257 |     (101, 36),  (36, 100),  (100, 101), (43, 57),   (57, 202),  (202, 43),
258 |     (242, 20),  (20, 99),   (99, 242),  (56, 28),   (28, 157),  (157, 56),
259 |     (124, 35),  (35, 113),  (113, 124), (29, 160),  (160, 27),  (27, 29),
260 |     (211, 204), (204, 210), (210, 211), (124, 113), (113, 46),  (46, 124),
261 |     (106, 43),  (43, 204),  (204, 106), (96, 62),   (62, 77),   (77, 96),
262 |     (227, 137), (137, 116), (116, 227), (73, 41),   (41, 72),   (72, 73),
263 |     (36, 203),  (203, 142), (142, 36),  (235, 64),  (64, 240),  (240, 235),
264 |     (48, 49),   (49, 64),   (64, 48),   (42, 41),   (41, 74),   (74, 42),
265 |     (214, 212), (212, 207), (207, 214), (183, 42),  (42, 184),  (184, 183),
266 |     (210, 169), (169, 211), (211, 210), (140, 170), (170, 176), (176, 140),
267 |     (104, 105), (105, 69),  (69, 104),  (193, 122), (122, 168), (168, 193),
268 |     (50, 123),  (123, 187), (187, 50),  (89, 96),   (96, 90),   (90, 89),
269 |     (66, 65),   (65, 107),  (107, 66),  (179, 89),  (89, 180),  (180, 179),
270 |     (119, 101), (101, 120), (120, 119), (68, 63),   (63, 104),  (104, 68),
271 |     (234, 93),  (93, 227),  (227, 234), (16, 15),   (15, 85),   (85, 16),
272 |     (209, 129), (129, 49),  (49, 209),  (15, 14),   (14, 86),   (86, 15),
273 |     (107, 55),  (55, 9),    (9, 107),   (120, 100), (100, 121), (121, 120),
274 |     (153, 145), (145, 22),  (22, 153),  (178, 88),  (88, 179),  (179, 178),
275 |     (197, 6),   (6, 196),   (196, 197), (89, 88),   (88, 96),   (96, 89),
276 |     (135, 138), (138, 136), (136, 135), (138, 215), (215, 172), (172, 138),
277 |     (218, 115), (115, 219), (219, 218), (41, 42),   (42, 81),   (81, 41),
278 |     (5, 195),   (195, 51),  (51, 5),    (57, 43),   (43, 61),   (61, 57),
279 |     (208, 171), (171, 199), (199, 208), (41, 81),   (81, 38),   (38, 41),
280 |     (224, 53),  (53, 225),  (225, 224), (24, 144),  (144, 110), (110, 24),
281 |     (105, 52),  (52, 66),   (66, 105),  (118, 229), (229, 117), (117, 118),
282 |     (227, 34),  (34, 234),  (234, 227), (66, 107),  (107, 69),  (69, 66),
283 |     (10, 109),  (109, 151), (151, 10),  (219, 48),  (48, 235),  (235, 219),
284 |     (183, 62),  (62, 191),  (191, 183), (142, 129), (129, 126), (126, 142),
285 |     (116, 111), (111, 143), (143, 116), (118, 117), (117, 50),  (50, 118),
286 |     (223, 222), (222, 52),  (52, 223),  (94, 19),   (19, 141),  (141, 94),
287 |     (222, 221), (221, 65),  (65, 222),  (196, 3),   (3, 197),   (197, 196),
288 |     (45, 220),  (220, 44),  (44, 45),   (156, 70),  (70, 139),  (139, 156),
289 |     (188, 122), (122, 245), (245, 188), (139, 71),  (71, 162),  (162, 139),
290 |     (149, 170), (170, 150), (150, 149), (122, 188), (188, 196), (196, 122),
291 |     (206, 216), (216, 92),  (92, 206),  (164, 2),   (2, 167),   (167, 164),
292 |     (242, 141), (141, 241), (241, 242), (0, 164),   (164, 37),  (37, 0),
293 |     (11, 72),   (72, 12),   (12, 11),   (12, 38),   (38, 13),   (13, 12),
294 |     (70, 63),   (63, 71),   (71, 70),   (31, 226),  (226, 111), (111, 31),
295 |     (36, 101),  (101, 205), (205, 36),  (203, 206), (206, 165), (165, 203),
296 |     (126, 209), (209, 217), (217, 126), (98, 165),  (165, 97),  (97, 98),
297 |     (237, 220), (220, 218), (218, 237), (237, 239), (239, 241), (241, 237),
298 |     (210, 214), (214, 169), (169, 210), (140, 171), (171, 32),  (32, 140),
299 |     (241, 125), (125, 237), (237, 241), (179, 86),  (86, 178),  (178, 179),
300 |     (180, 85),  (85, 179),  (179, 180), (181, 84),  (84, 180),  (180, 181),
301 |     (182, 83),  (83, 181),  (181, 182), (194, 201), (201, 182), (182, 194),
302 |     (177, 137), (137, 132), (132, 177), (184, 76),  (76, 183),  (183, 184),
303 |     (185, 61),  (61, 184),  (184, 185), (186, 57),  (57, 185),  (185, 186),
304 |     (216, 212), (212, 186), (186, 216), (192, 214), (214, 187), (187, 192),
305 |     (139, 34),  (34, 156),  (156, 139), (218, 79),  (79, 237),  (237, 218),
306 |     (147, 123), (123, 177), (177, 147), (45, 44),   (44, 4),    (4, 45),
307 |     (208, 201), (201, 32),  (32, 208),  (98, 64),   (64, 129),  (129, 98),
308 |     (192, 213), (213, 138), (138, 192), (235, 59),  (59, 219),  (219, 235),
309 |     (141, 242), (242, 97),  (97, 141),  (97, 2),    (2, 141),   (141, 97),
310 |     (240, 75),  (75, 235),  (235, 240), (229, 24),  (24, 228),  (228, 229),
311 |     (31, 25),   (25, 226),  (226, 31),  (230, 23),  (23, 229),  (229, 230),
312 |     (231, 22),  (22, 230),  (230, 231), (232, 26),  (26, 231),  (231, 232),
313 |     (233, 112), (112, 232), (232, 233), (244, 189), (189, 243), (243, 244),
314 |     (189, 221), (221, 190), (190, 189), (222, 28),  (28, 221),  (221, 222),
315 |     (223, 27),  (27, 222),  (222, 223), (224, 29),  (29, 223),  (223, 224),
316 |     (225, 30),  (30, 224),  (224, 225), (113, 247), (247, 225), (225, 113),
317 |     (99, 60),   (60, 240),  (240, 99),  (213, 147), (147, 215), (215, 213),
318 |     (60, 20),   (20, 166),  (166, 60),  (192, 187), (187, 213), (213, 192),
319 |     (243, 112), (112, 244), (244, 243), (244, 233), (233, 245), (245, 244),
320 |     (245, 128), (128, 188), (188, 245), (188, 114), (114, 174), (174, 188),
321 |     (134, 131), (131, 220), (220, 134), (174, 217), (217, 236), (236, 174),
322 |     (236, 198), (198, 134), (134, 236), (215, 177), (177, 58),  (58, 215),
323 |     (156, 143), (143, 124), (124, 156), (25, 110),  (110, 7),   (7, 25),
324 |     (31, 228),  (228, 25),  (25, 31),   (264, 356), (356, 368), (368, 264),
325 |     (0, 11),    (11, 267),  (267, 0),   (451, 452), (452, 349), (349, 451),
326 |     (267, 302), (302, 269), (269, 267), (350, 357), (357, 277), (277, 350),
327 |     (350, 452), (452, 357), (357, 350), (299, 333), (333, 297), (297, 299),
328 |     (396, 175), (175, 377), (377, 396), (280, 347), (347, 330), (330, 280),
329 |     (269, 303), (303, 270), (270, 269), (151, 9),   (9, 337),   (337, 151),
330 |     (344, 278), (278, 360), (360, 344), (424, 418), (418, 431), (431, 424),
331 |     (270, 304), (304, 409), (409, 270), (272, 310), (310, 407), (407, 272),
332 |     (322, 270), (270, 410), (410, 322), (449, 450), (450, 347), (347, 449),
333 |     (432, 422), (422, 434), (434, 432), (18, 313),  (313, 17),  (17, 18),
334 |     (291, 306), (306, 375), (375, 291), (259, 387), (387, 260), (260, 259),
335 |     (424, 335), (335, 418), (418, 424), (434, 364), (364, 416), (416, 434),
336 |     (391, 423), (423, 327), (327, 391), (301, 251), (251, 298), (298, 301),
337 |     (275, 281), (281, 4),   (4, 275),   (254, 373), (373, 253), (253, 254),
338 |     (375, 307), (307, 321), (321, 375), (280, 425), (425, 411), (411, 280),
339 |     (200, 421), (421, 18),  (18, 200),  (335, 321), (321, 406), (406, 335),
340 |     (321, 320), (320, 405), (405, 321), (314, 315), (315, 17),  (17, 314),
341 |     (423, 426), (426, 266), (266, 423), (396, 377), (377, 369), (369, 396),
342 |     (270, 322), (322, 269), (269, 270), (413, 417), (417, 464), (464, 413),
343 |     (385, 386), (386, 258), (258, 385), (248, 456), (456, 419), (419, 248),
344 |     (298, 284), (284, 333), (333, 298), (168, 417), (417, 8),   (8, 168),
345 |     (448, 346), (346, 261), (261, 448), (417, 413), (413, 285), (285, 417),
346 |     (326, 327), (327, 328), (328, 326), (277, 355), (355, 329), (329, 277),
347 |     (309, 392), (392, 438), (438, 309), (381, 382), (382, 256), (256, 381),
348 |     (279, 429), (429, 360), (360, 279), (365, 364), (364, 379), (379, 365),
349 |     (355, 277), (277, 437), (437, 355), (282, 443), (443, 283), (283, 282),
350 |     (281, 275), (275, 363), (363, 281), (395, 431), (431, 369), (369, 395),
351 |     (299, 297), (297, 337), (337, 299), (335, 273), (273, 321), (321, 335),
352 |     (348, 450), (450, 349), (349, 348), (359, 446), (446, 467), (467, 359),
353 |     (283, 293), (293, 282), (282, 283), (250, 458), (458, 462), (462, 250),
354 |     (300, 276), (276, 383), (383, 300), (292, 308), (308, 325), (325, 292),
355 |     (283, 276), (276, 293), (293, 283), (264, 372), (372, 447), (447, 264),
356 |     (346, 352), (352, 340), (340, 346), (354, 274), (274, 19),  (19, 354),
357 |     (363, 456), (456, 281), (281, 363), (426, 436), (436, 425), (425, 426),
358 |     (380, 381), (381, 252), (252, 380), (267, 269), (269, 393), (393, 267),
359 |     (421, 200), (200, 428), (428, 421), (371, 266), (266, 329), (329, 371),
360 |     (432, 287), (287, 422), (422, 432), (290, 250), (250, 328), (328, 290),
361 |     (385, 258), (258, 384), (384, 385), (446, 265), (265, 342), (342, 446),
362 |     (386, 387), (387, 257), (257, 386), (422, 424), (424, 430), (430, 422),
363 |     (445, 342), (342, 276), (276, 445), (422, 273), (273, 424), (424, 422),
364 |     (306, 292), (292, 307), (307, 306), (352, 366), (366, 345), (345, 352),
365 |     (268, 271), (271, 302), (302, 268), (358, 423), (423, 371), (371, 358),
366 |     (327, 294), (294, 460), (460, 327), (331, 279), (279, 294), (294, 331),
367 |     (303, 271), (271, 304), (304, 303), (436, 432), (432, 427), (427, 436),
368 |     (304, 272), (272, 408), (408, 304), (395, 394), (394, 431), (431, 395),
369 |     (378, 395), (395, 400), (400, 378), (296, 334), (334, 299), (299, 296),
370 |     (6, 351),   (351, 168), (168, 6),   (376, 352), (352, 411), (411, 376),
371 |     (307, 325), (325, 320), (320, 307), (285, 295), (295, 336), (336, 285),
372 |     (320, 319), (319, 404), (404, 320), (329, 330), (330, 349), (349, 329),
373 |     (334, 293), (293, 333), (333, 334), (366, 323), (323, 447), (447, 366),
374 |     (316, 15),  (15, 315),  (315, 316), (331, 358), (358, 279), (279, 331),
375 |     (317, 14),  (14, 316),  (316, 317), (8, 285),   (285, 9),   (9, 8),
376 |     (277, 329), (329, 350), (350, 277), (253, 374), (374, 252), (252, 253),
377 |     (319, 318), (318, 403), (403, 319), (351, 6),   (6, 419),   (419, 351),
378 |     (324, 318), (318, 325), (325, 324), (397, 367), (367, 365), (365, 397),
379 |     (288, 435), (435, 397), (397, 288), (278, 344), (344, 439), (439, 278),
380 |     (310, 272), (272, 311), (311, 310), (248, 195), (195, 281), (281, 248),
381 |     (375, 273), (273, 291), (291, 375), (175, 396), (396, 199), (199, 175),
382 |     (312, 311), (311, 268), (268, 312), (276, 283), (283, 445), (445, 276),
383 |     (390, 373), (373, 339), (339, 390), (295, 282), (282, 296), (296, 295),
384 |     (448, 449), (449, 346), (346, 448), (356, 264), (264, 454), (454, 356),
385 |     (337, 336), (336, 299), (299, 337), (337, 338), (338, 151), (151, 337),
386 |     (294, 278), (278, 455), (455, 294), (308, 292), (292, 415), (415, 308),
387 |     (429, 358), (358, 355), (355, 429), (265, 340), (340, 372), (372, 265),
388 |     (352, 346), (346, 280), (280, 352), (295, 442), (442, 282), (282, 295),
389 |     (354, 19),  (19, 370),  (370, 354), (285, 441), (441, 295), (295, 285),
390 |     (195, 248), (248, 197), (197, 195), (457, 440), (440, 274), (274, 457),
391 |     (301, 300), (300, 368), (368, 301), (417, 351), (351, 465), (465, 417),
392 |     (251, 301), (301, 389), (389, 251), (394, 395), (395, 379), (379, 394),
393 |     (399, 412), (412, 419), (419, 399), (410, 436), (436, 322), (322, 410),
394 |     (326, 2),   (2, 393),   (393, 326), (354, 370), (370, 461), (461, 354),
395 |     (393, 164), (164, 267), (267, 393), (268, 302), (302, 12),  (12, 268),
396 |     (312, 268), (268, 13),  (13, 312),  (298, 293), (293, 301), (301, 298),
397 |     (265, 446), (446, 340), (340, 265), (280, 330), (330, 425), (425, 280),
398 |     (322, 426), (426, 391), (391, 322), (420, 429), (429, 437), (437, 420),
399 |     (393, 391), (391, 326), (326, 393), (344, 440), (440, 438), (438, 344),
400 |     (458, 459), (459, 461), (461, 458), (364, 434), (434, 394), (394, 364),
401 |     (428, 396), (396, 262), (262, 428), (274, 354), (354, 457), (457, 274),
402 |     (317, 316), (316, 402), (402, 317), (316, 315), (315, 403), (403, 316),
403 |     (315, 314), (314, 404), (404, 315), (314, 313), (313, 405), (405, 314),
404 |     (313, 421), (421, 406), (406, 313), (323, 366), (366, 361), (361, 323),
405 |     (292, 306), (306, 407), (407, 292), (306, 291), (291, 408), (408, 306),
406 |     (291, 287), (287, 409), (409, 291), (287, 432), (432, 410), (410, 287),
407 |     (427, 434), (434, 411), (411, 427), (372, 264), (264, 383), (383, 372),
408 |     (459, 309), (309, 457), (457, 459), (366, 352), (352, 401), (401, 366),
409 |     (1, 274),   (274, 4),   (4, 1),     (418, 421), (421, 262), (262, 418),
410 |     (331, 294), (294, 358), (358, 331), (435, 433), (433, 367), (367, 435),
411 |     (392, 289), (289, 439), (439, 392), (328, 462), (462, 326), (326, 328),
412 |     (94, 2),    (2, 370),   (370, 94),  (289, 305), (305, 455), (455, 289),
413 |     (339, 254), (254, 448), (448, 339), (359, 255), (255, 446), (446, 359),
414 |     (254, 253), (253, 449), (449, 254), (253, 252), (252, 450), (450, 253),
415 |     (252, 256), (256, 451), (451, 252), (256, 341), (341, 452), (452, 256),
416 |     (414, 413), (413, 463), (463, 414), (286, 441), (441, 414), (414, 286),
417 |     (286, 258), (258, 441), (441, 286), (258, 257), (257, 442), (442, 258),
418 |     (257, 259), (259, 443), (443, 257), (259, 260), (260, 444), (444, 259),
419 |     (260, 467), (467, 445), (445, 260), (309, 459), (459, 250), (250, 309),
420 |     (305, 289), (289, 290), (290, 305), (305, 290), (290, 460), (460, 305),
421 |     (401, 376), (376, 435), (435, 401), (309, 250), (250, 392), (392, 309),
422 |     (376, 411), (411, 433), (433, 376), (453, 341), (341, 464), (464, 453),
423 |     (357, 453), (453, 465), (465, 357), (343, 357), (357, 412), (412, 343),
424 |     (437, 343), (343, 399), (399, 437), (344, 360), (360, 440), (440, 344),
425 |     (420, 437), (437, 456), (456, 420), (360, 420), (420, 363), (363, 360),
426 |     (361, 401), (401, 288), (288, 361), (265, 372), (372, 353), (353, 265),
427 |     (390, 339), (339, 249), (249, 390), (339, 448), (448, 255), (255, 339)]
428 | 
429 | mouthPoints = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291, 375, 321, 405, 
430 |               314, 17, 84, 181, 91, 146, 76, 184, 74, 73, 72, 11, 302, 303, 304, 
431 |               408, 206, 307, 320, 404, 315, 16, 85, 180, 90, 77, 62, 183, 42, 
432 |                41, 38, 12, 268, 271, 272, 407, 293, 325, 319, 403, 316, 15, 86, 
433 |               179, 89, 96, 78, 191, 95, 80, 88, 81, 178, 82, 87, 13, 14, 312, 317, 
434 |               311, 402, 310, 318, 415, 324, 308]
435 | 
436 | chins = [93, 137, 123, 50, 205, 206, 165, 167, 164, 393, 391, 426, 425, 280, 352, 
437 |          366, 323, 361, 401, 376, 411, 427, 436, 322, 92, 216, 207, 187, 147, 177,
438 |          132, 58, 215, 213, 192, 214, 212, 57, 186, 43, 106, 182, 83, 18, 313, 406,
439 |          335, 273, 287, 432, 434, 416, 433, 435, 288, 297, 367, 364, 365, 430, 394,
440 |          379, 422, 397, 424, 431, 395, 378, 418, 262, 369, 400, 421, 428, 396, 377, 
441 |          200, 199, 175, 152, 201, 208, 171, 148, 194, 32, 140, 176, 204, 211, 170, 
442 |          149, 202, 210, 169, 150, 135, 136, 138, 172]
443 | 
444 | rest = [i for i in range(468) if i not in chins]


--------------------------------------------------------------------------------
/lipsync3d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__init__.py


--------------------------------------------------------------------------------
/lipsync3d/__pycache__/audio.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/audio.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/__pycache__/hparams.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/hparams.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/__pycache__/loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/loss.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/__pycache__/model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/model.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/__pycache__/options.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/options.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/lipsync3d/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audio.py:
--------------------------------------------------------------------------------
  1 | import librosa
  2 | import librosa.filters
  3 | import numpy as np
  4 | # import tensorflow as tf
  5 | from scipy import signal
  6 | from scipy.io import wavfile
  7 | from hparams import hparams as hp
  8 | 
  9 | def load_wav(path, sr):
 10 |     return librosa.core.load(path, sr=sr)[0]
 11 | 
 12 | def save_wav(wav, path, sr):
 13 |     wav *= 32767 / max(0.01, np.max(np.abs(wav)))
 14 |     #proposed by @dsmiller
 15 |     wavfile.write(path, sr, wav.astype(np.int16))
 16 | 
 17 | def save_wavenet_wav(wav, path, sr):
 18 |     librosa.output.write_wav(path, wav, sr=sr)
 19 | 
 20 | def preemphasis(wav, k, preemphasize=True):
 21 |     if preemphasize:
 22 |         return signal.lfilter([1, -k], [1], wav)
 23 |     return wav
 24 | 
 25 | def inv_preemphasis(wav, k, inv_preemphasize=True):
 26 |     if inv_preemphasize:
 27 |         return signal.lfilter([1], [1, -k], wav)
 28 |     return wav
 29 | 
 30 | def get_hop_size():
 31 |     hop_size = hp.hop_size
 32 |     if hop_size is None:
 33 |         assert hp.frame_shift_ms is not None
 34 |         hop_size = int(hp.frame_shift_ms / 1000 * hp.sample_rate)
 35 |     return hop_size
 36 | 
 37 | def linearspectrogram(wav):
 38 |     D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
 39 |     S = _amp_to_db(np.abs(D)) - hp.ref_level_db
 40 |     
 41 |     if hp.signal_normalization:
 42 |         return _normalize(S)
 43 |     return S
 44 | 
 45 | def melspectrogram(wav):
 46 |     D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
 47 |     S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db
 48 |     
 49 |     if hp.signal_normalization:
 50 |         return _normalize(S)
 51 |     return S
 52 | 
 53 | def _lws_processor():
 54 |     import lws
 55 |     return lws.lws(hp.n_fft, get_hop_size(), fftsize=hp.win_size, mode="speech")
 56 | 
 57 | def _stft(y):
 58 |     if hp.use_lws:
 59 |         return _lws_processor(hp).stft(y).T
 60 |     else:
 61 |         return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size)
 62 | 
 63 | ##########################################################
 64 | #Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!)
 65 | def num_frames(length, fsize, fshift):
 66 |     """Compute number of time frames of spectrogram
 67 |     """
 68 |     pad = (fsize - fshift)
 69 |     if length % fshift == 0:
 70 |         M = (length + pad * 2 - fsize) // fshift + 1
 71 |     else:
 72 |         M = (length + pad * 2 - fsize) // fshift + 2
 73 |     return M
 74 | 
 75 | 
 76 | def pad_lr(x, fsize, fshift):
 77 |     """Compute left and right padding
 78 |     """
 79 |     M = num_frames(len(x), fsize, fshift)
 80 |     pad = (fsize - fshift)
 81 |     T = len(x) + 2 * pad
 82 |     r = (M - 1) * fshift + fsize - T
 83 |     return pad, pad + r
 84 | ##########################################################
 85 | #Librosa correct padding
 86 | def librosa_pad_lr(x, fsize, fshift):
 87 |     return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0]
 88 | 
 89 | # Conversions
 90 | _mel_basis = None
 91 | 
 92 | def _linear_to_mel(spectogram):
 93 |     global _mel_basis
 94 |     if _mel_basis is None:
 95 |         _mel_basis = _build_mel_basis()
 96 |     return np.dot(_mel_basis, spectogram)
 97 | 
 98 | def _build_mel_basis():
 99 |     assert hp.fmax <= hp.sample_rate // 2
100 |     return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels,
101 |                                fmin=hp.fmin, fmax=hp.fmax)
102 | 
103 | def _amp_to_db(x):
104 |     min_level = np.exp(hp.min_level_db / 20 * np.log(10))
105 |     return 20 * np.log10(np.maximum(min_level, x))
106 | 
107 | def _db_to_amp(x):
108 |     return np.power(10.0, (x) * 0.05)
109 | 
110 | def _normalize(S):
111 |     if hp.allow_clipping_in_normalization:
112 |         if hp.symmetric_mels:
113 |             return np.clip((2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value,
114 |                            -hp.max_abs_value, hp.max_abs_value)
115 |         else:
116 |             return np.clip(hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)), 0, hp.max_abs_value)
117 |     
118 |     assert S.max() <= 0 and S.min() - hp.min_level_db >= 0
119 |     if hp.symmetric_mels:
120 |         return (2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value
121 |     else:
122 |         return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db))
123 | 
124 | def _denormalize(D):
125 |     if hp.allow_clipping_in_normalization:
126 |         if hp.symmetric_mels:
127 |             return (((np.clip(D, -hp.max_abs_value,
128 |                               hp.max_abs_value) + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value))
129 |                     + hp.min_level_db)
130 |         else:
131 |             return ((np.clip(D, 0, hp.max_abs_value) * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
132 |     
133 |     if hp.symmetric_mels:
134 |         return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
135 |     else:
136 |         return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
137 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__init__.py


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/__pycache__/util.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__pycache__/util.cpython-36.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__pycache__/util.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/__pycache__/visualizer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/__pycache__/visualizer.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/audio.py:
--------------------------------------------------------------------------------
  1 | import librosa
  2 | import librosa.filters
  3 | import numpy as np
  4 | # import tensorflow as tf
  5 | from scipy import signal
  6 | from scipy.io import wavfile
  7 | from .hparams import hparams as hp
  8 | 
  9 | def load_wav(path, sr):
 10 |     return librosa.core.load(path, sr=sr)[0]
 11 | 
 12 | def save_wav(wav, path, sr):
 13 |     wav *= 32767 / max(0.01, np.max(np.abs(wav)))
 14 |     #proposed by @dsmiller
 15 |     wavfile.write(path, sr, wav.astype(np.int16))
 16 | 
 17 | def save_wavenet_wav(wav, path, sr):
 18 |     librosa.output.write_wav(path, wav, sr=sr)
 19 | 
 20 | def preemphasis(wav, k, preemphasize=True):
 21 |     if preemphasize:
 22 |         return signal.lfilter([1, -k], [1], wav)
 23 |     return wav
 24 | 
 25 | def inv_preemphasis(wav, k, inv_preemphasize=True):
 26 |     if inv_preemphasize:
 27 |         return signal.lfilter([1], [1, -k], wav)
 28 |     return wav
 29 | 
 30 | def get_hop_size():
 31 |     hop_size = hp.hop_size
 32 |     if hop_size is None:
 33 |         assert hp.frame_shift_ms is not None
 34 |         hop_size = int(hp.frame_shift_ms / 1000 * hp.sample_rate)
 35 |     return hop_size
 36 | 
 37 | def linearspectrogram(wav):
 38 |     D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
 39 |     S = _amp_to_db(np.abs(D)) - hp.ref_level_db
 40 |     
 41 |     if hp.signal_normalization:
 42 |         return _normalize(S)
 43 |     return S
 44 | 
 45 | def melspectrogram(wav):
 46 |     D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
 47 |     S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db
 48 |     
 49 |     if hp.signal_normalization:
 50 |         return _normalize(S)
 51 |     return S
 52 | 
 53 | def _lws_processor():
 54 |     import lws
 55 |     return lws.lws(hp.n_fft, get_hop_size(), fftsize=hp.win_size, mode="speech")
 56 | 
 57 | def _stft(y):
 58 |     if hp.use_lws:
 59 |         return _lws_processor(hp).stft(y).T
 60 |     else:
 61 |         return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size)
 62 | 
 63 | ##########################################################
 64 | #Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!)
 65 | def num_frames(length, fsize, fshift):
 66 |     """Compute number of time frames of spectrogram
 67 |     """
 68 |     pad = (fsize - fshift)
 69 |     if length % fshift == 0:
 70 |         M = (length + pad * 2 - fsize) // fshift + 1
 71 |     else:
 72 |         M = (length + pad * 2 - fsize) // fshift + 2
 73 |     return M
 74 | 
 75 | 
 76 | def pad_lr(x, fsize, fshift):
 77 |     """Compute left and right padding
 78 |     """
 79 |     M = num_frames(len(x), fsize, fshift)
 80 |     pad = (fsize - fshift)
 81 |     T = len(x) + 2 * pad
 82 |     r = (M - 1) * fshift + fsize - T
 83 |     return pad, pad + r
 84 | ##########################################################
 85 | #Librosa correct padding
 86 | def librosa_pad_lr(x, fsize, fshift):
 87 |     return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0]
 88 | 
 89 | # Conversions
 90 | _mel_basis = None
 91 | 
 92 | def _linear_to_mel(spectogram):
 93 |     global _mel_basis
 94 |     if _mel_basis is None:
 95 |         _mel_basis = _build_mel_basis()
 96 |     return np.dot(_mel_basis, spectogram)
 97 | 
 98 | def _build_mel_basis():
 99 |     assert hp.fmax <= hp.sample_rate // 2
100 |     return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels,
101 |                                fmin=hp.fmin, fmax=hp.fmax)
102 | 
103 | def _amp_to_db(x):
104 |     min_level = np.exp(hp.min_level_db / 20 * np.log(10))
105 |     return 20 * np.log10(np.maximum(min_level, x))
106 | 
107 | def _db_to_amp(x):
108 |     return np.power(10.0, (x) * 0.05)
109 | 
110 | def _normalize(S):
111 |     if hp.allow_clipping_in_normalization:
112 |         if hp.symmetric_mels:
113 |             return np.clip((2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value,
114 |                            -hp.max_abs_value, hp.max_abs_value)
115 |         else:
116 |             return np.clip(hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)), 0, hp.max_abs_value)
117 |     
118 |     assert S.max() <= 0 and S.min() - hp.min_level_db >= 0
119 |     if hp.symmetric_mels:
120 |         return (2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value
121 |     else:
122 |         return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db))
123 | 
124 | def _denormalize(D):
125 |     if hp.allow_clipping_in_normalization:
126 |         if hp.symmetric_mels:
127 |             return (((np.clip(D, -hp.max_abs_value,
128 |                               hp.max_abs_value) + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value))
129 |                     + hp.min_level_db)
130 |         else:
131 |             return ((np.clip(D, 0, hp.max_abs_value) * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
132 |     
133 |     if hp.symmetric_mels:
134 |         return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
135 |     else:
136 |         return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
137 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/build_nfr_dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Following https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/datasets/combine_A_and_B.py
  3 | """
  4 | 
  5 | import os
  6 | import cv2
  7 | import numpy as np
  8 | from tqdm import tqdm
  9 | 
 10 | import sys
 11 | sys.path.append(".")
 12 | 
 13 | from models import networks
 14 | from options.options import Options
 15 | from audiodvp_utils.util import create_dir, load_coef, get_file_list, load_face_emb, get_max_crop_region
 16 | from audiodvp_utils.rescale_image import rescale_and_paste
 17 | 
 18 | 
 19 | if __name__ == '__main__':
 20 |     opt = Options().parse_args()
 21 | 
 22 |     create_dir(os.path.join(opt.data_dir, 'mask'))
 23 | 
 24 |     alpha_list = load_coef(os.path.join(opt.data_dir, 'alpha'))
 25 |     beta_list = load_coef(os.path.join(opt.data_dir, 'beta'))
 26 |     delta_list = load_coef(os.path.join(opt.data_dir, 'delta'))
 27 |     gamma_list = load_coef(os.path.join(opt.data_dir, 'gamma'))
 28 |     angle_list = load_coef(os.path.join(opt.data_dir, 'rotation'))
 29 |     translation_list = load_coef(os.path.join(opt.data_dir, 'translation'))
 30 |     face_emb_list = load_face_emb(opt.data_dir)
 31 | 
 32 |     crop_region_list = load_coef(os.path.join(opt.data_dir, 'crop_region'))
 33 |     full_image_list = get_file_list(os.path.join(opt.data_dir, 'full'))
 34 |     
 35 |     top, bottom, left, right = get_max_crop_region(crop_region_list)
 36 |     H, W, _ = cv2.imread(full_image_list[0]).shape
 37 |     mouth_mask = networks.MouthMask(opt).to(opt.device)
 38 | 
 39 |     for i in tqdm(range(len(alpha_list))):
 40 |         alpha = alpha_list[i].unsqueeze(0).cuda()
 41 |         beta = beta_list[i].unsqueeze(0).cuda()
 42 |         delta = delta_list[i].unsqueeze(0).cuda()
 43 |         gamma = gamma_list[i].unsqueeze(0).cuda()
 44 |         rotation = angle_list[i].unsqueeze(0).cuda()
 45 |         translation = translation_list[i].unsqueeze(0).cuda()
 46 |         face_emb = face_emb_list[i].unsqueeze(0).cuda()
 47 |         crop_region = crop_region_list[i]
 48 |         empty_image = np.zeros((H, W), np.uint8)
 49 | 
 50 |         mask = mouth_mask(alpha, delta, beta, gamma, rotation, translation, face_emb)
 51 |         mask = mask.squeeze(0).detach().cpu().permute(1, 2, 0).numpy() * 255.0
 52 |         mask = cv2.dilate(mask, np.ones((3,3), np.uint8), iterations=4)
 53 |         rescaled_mask = rescale_and_paste(crop_region, empty_image, mask)
 54 |         rescaled_mask = rescaled_mask[top:bottom, left:right]
 55 |         rescaled_mask = cv2.resize(rescaled_mask, (opt.image_width, opt.image_height), interpolation=cv2.INTER_AREA)
 56 |         
 57 |         cv2.imwrite(os.path.join(opt.data_dir, 'mask', '%05d.png' % (i+1)), rescaled_mask)
 58 | 
 59 |     create_dir(os.path.join(opt.data_dir, 'nfr', 'A', 'train'))
 60 |     create_dir(os.path.join(opt.data_dir, 'nfr', 'B', 'train'))
 61 | 
 62 |     masks = get_file_list(os.path.join(opt.data_dir, 'mask'))
 63 |     renders = get_file_list(os.path.join(opt.data_dir, 'render'))
 64 | 
 65 |     for i in tqdm(range(len(masks))):
 66 |         mask = cv2.imread(masks[i])
 67 |         render = cv2.imread(renders[i])
 68 |         full = cv2.imread(full_image_list[i])
 69 |         crop_region = crop_region_list[i]
 70 | 
 71 |         empty_image = np.zeros((H, W, 3), np.uint8)
 72 | 
 73 |         rescaled_render = rescale_and_paste(crop_region, empty_image, render)
 74 |         rescaled_render = rescaled_render[top:bottom, left:right]
 75 |         rescaled_render = cv2.resize(rescaled_render, (opt.image_width, opt.image_height), interpolation=cv2.INTER_AREA)
 76 |         
 77 |         rescaled_crop = full[top:bottom, left:right]
 78 |         rescaled_crop = cv2.resize(rescaled_crop, (opt.image_width, opt.image_height), interpolation=cv2.INTER_AREA)
 79 |         
 80 |         masked_crop = cv2.bitwise_and(rescaled_crop, mask)
 81 |         masked_render = cv2.bitwise_and(rescaled_render, mask)
 82 | 
 83 |         cv2.imwrite(os.path.join(opt.data_dir, 'nfr', 'A', 'train', '%05d.png' % (i+1)), masked_crop)
 84 |         cv2.imwrite(os.path.join(opt.data_dir, 'nfr', 'B', 'train', '%05d.png' % (i+1)), masked_render)
 85 | 
 86 |     splits = os.listdir(os.path.join(opt.data_dir, 'nfr', 'A'))
 87 | 
 88 |     for sp in splits:
 89 |         image_fold_A = os.path.join(os.path.join(opt.data_dir, 'nfr', 'A'), sp)
 90 |         image_fold_B = os.path.join(os.path.join(opt.data_dir, 'nfr', 'B'), sp)
 91 |         image_list = os.listdir(image_fold_A)
 92 | 
 93 |         image_fold_AB = os.path.join(opt.data_dir, 'nfr', 'AB', sp)
 94 |         if not os.path.isdir(image_fold_AB):
 95 |             os.makedirs(image_fold_AB)
 96 | 
 97 |         for n in tqdm(range(len(image_list))):
 98 |             name_A = image_list[n]
 99 |             path_A = os.path.join(image_fold_A, name_A)
100 | 
101 |             name_B = name_A
102 |             path_B = os.path.join(image_fold_B, name_B)
103 | 
104 |             if os.path.isfile(path_A) and os.path.isfile(path_B):
105 |                 name_AB = name_A
106 |                 path_AB = os.path.join(image_fold_AB, name_AB)
107 |                 im_A = cv2.imread(path_A, 1) # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
108 |                 im_B = cv2.imread(path_B, 1) # python2: cv2.CV_LOAD_IMAGE_COLOR; python3: cv2.IMREAD_COLOR
109 |                 im_AB = np.concatenate([im_A, im_B], 1)
110 |                 cv2.imwrite(path_AB, im_AB)
111 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/crop_portrait-checkpoint.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Crop upper boddy in every video frame, square bounding box is averaged among all frames and fixed.
 3 | """
 4 | 
 5 | import os
 6 | import cv2
 7 | import argparse
 8 | from tqdm import tqdm
 9 | import face_recognition
10 | 
11 | import util
12 | 
13 | 
14 | def calc_bbox(image_list, batch_size=5):
15 |     """Batch infer of face location, batch_size should be factor of total frame number."""
16 |     top_sum = right_sum = bottom_sum = left_sum = 0
17 | 
18 |     for i in tqdm(range(len(image_list) // batch_size - batch_size)):
19 |         image_batch = []
20 | 
21 |         for j in range(i * batch_size, (i + 1) * batch_size):
22 |             image = face_recognition.load_image_file(image_list[j])
23 |             image_batch.append(image)
24 |         
25 |         face_locations = face_recognition.batch_face_locations(image_batch, number_of_times_to_upsample=0, batch_size=batch_size)
26 |         for face_location in face_locations:
27 |             top, right, bottom, left = face_location[0]  # assuming only one face detected in the frame
28 |             top_sum += top
29 |             right_sum += right
30 |             bottom_sum += bottom
31 |             left_sum += left
32 | 
33 |     return (top_sum // len(image_list), right_sum // len(image_list), bottom_sum // len(image_list), left_sum // len(image_list))
34 | 
35 | 
36 | def crop_image(data_dir, dest_size, crop_level, vertical_adjust):
37 |     image_list = util.get_file_list(os.path.join(data_dir, 'full'))
38 |     top, right, bottom, left = calc_bbox(image_list)
39 | 
40 |     height = bottom - top
41 |     width = right - left
42 | 
43 |     crop_size = int(height * crop_level)
44 | 
45 |     horizontal_delta = (crop_size - width) // 2
46 |     left -= horizontal_delta
47 |     right += horizontal_delta
48 | 
49 |     top = int(top * vertical_adjust)
50 |     bottom = top + crop_size
51 | 
52 |     for i in tqdm(range(len(image_list))):
53 |         image =cv2.imread(image_list[i])
54 |         image = image[top:bottom, left:right]
55 | 
56 |         image = cv2.resize(image, (dest_size, dest_size), interpolation=cv2.INTER_AREA)
57 |         cv2.imwrite(os.path.join(args.data_dir, 'crop', os.path.basename(image_list[i])), image)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     parser = argparse.ArgumentParser(description='Process some integers.')
62 |     parser.add_argument('--data_dir', type=str, default=None)
63 |     parser.add_argument('--dest_size', type=int, default=256)
64 |     parser.add_argument('--crop_level', type=float, default=2.0, help='Adjust crop image size.')
65 |     parser.add_argument('--vertical_adjust', type=float, default=0.3, help='Adjust vertical location of portrait in image.')
66 |     args = parser.parse_args()
67 |     crop_image(args.data_dir, dest_size=args.dest_size, crop_level=args.crop_level, vertical_adjust=args.vertical_adjust)
68 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/crop_portrait.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Crop upper boddy in every video frame, square bounding box is averaged among all frames and fixed.
  3 | """
  4 | 
  5 | import os
  6 | import cv2
  7 | import argparse
  8 | from tqdm import tqdm
  9 | import face_recognition
 10 | import torch
 11 | import util
 12 | import numpy as np
 13 | import face_detection
 14 | 
 15 | def calc_bbox(image_list, batch_size=5):
 16 |     """Batch infer of face location, batch_size should be factor of total frame number."""
 17 |     fa = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device='cuda')
 18 | 
 19 |     top_best = 10000
 20 |     bottom_best = 0
 21 |     right_best = 0
 22 |     left_best = 10000
 23 | 
 24 |     for i in tqdm(range(len(image_list) // batch_size - batch_size)):
 25 |         image_batch = []
 26 | 
 27 |         for j in range(i * batch_size, (i + 1) * batch_size):
 28 |             image = face_recognition.load_image_file(image_list[j])
 29 |             image_batch.append(image)
 30 |         
 31 |         # face_locations = face_recognition.batch_face_locations(image_batch, number_of_times_to_upsample=0, batch_size=batch_size)
 32 |         preds = fa.get_detections_for_batch(np.asarray(image_batch))
 33 | 
 34 |         for face_location in preds:
 35 |             left, top, right, bottom = face_location  # assuming only one face detected in the frame
 36 |             if top_best > top:
 37 |                 top_best = top
 38 |             if bottom_best < bottom:
 39 |                 bottom_best = bottom
 40 |             if right_best < right:
 41 |                 right_best = right
 42 |             if left_best > left:
 43 |                 left_best = left
 44 | 
 45 |     return top_best, right_best, bottom_best, left_best
 46 | 
 47 | 
 48 | def crop_image(data_dir, dest_size, crop_level, vertical_adjust):
 49 |     image_list = util.get_file_list(os.path.join(data_dir, 'full'))
 50 |     H, W, _ = face_recognition.load_image_file(image_list[0]).shape
 51 |     top, right, bottom, left = calc_bbox(image_list)
 52 |     height = bottom - top
 53 |     width = right - left
 54 | 
 55 |     crop_size = int(height * crop_level)
 56 | 
 57 |     horizontal_delta = (crop_size - width) // 2
 58 |     vertical_delta = (crop_size - height) // 2
 59 | 
 60 |     left = max(left - horizontal_delta, 0)
 61 |     right = min(right + horizontal_delta, W)
 62 | 
 63 |     top = max(top - int(vertical_delta * 0.5), 0)
 64 |     bottom = min(bottom + int(vertical_delta * 1.5), H)
 65 | 
 66 |     for i in tqdm(range(len(image_list))):
 67 |         image =cv2.imread(image_list[i])
 68 |         image = image[top:bottom, left:right]
 69 | 
 70 |         image = cv2.resize(image, (dest_size, dest_size), interpolation=cv2.INTER_AREA)
 71 |         cv2.imwrite(os.path.join(args.data_dir, 'crop', os.path.basename(image_list[i])), image)
 72 |         torch.save([top, bottom, left, right], os.path.join(data_dir, 'crop_region', os.path.basename(image_list[i]))[:-4]+'.pt')
 73 | 
 74 | 
 75 | 
 76 | def crop_per_image(data_dir, dest_size, crop_level):
 77 |     fa = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device='cuda')
 78 | 
 79 |     image_list = util.get_file_list(os.path.join(data_dir, 'full'))
 80 |     batch_size = 5
 81 |     frames = []
 82 | 
 83 |     for i in tqdm(range(len(image_list))):
 84 |         frame = face_recognition.load_image_file(image_list[i])
 85 |         frames.append(frame)
 86 | 
 87 |     H, W, _ = frames[0].shape
 88 | 
 89 |     batches = [frames[i:i + batch_size] for i in range(0, len(frames), batch_size)]
 90 | 
 91 |     for idx in tqdm(range(len(batches))):
 92 |         fb = batches[idx]
 93 |         preds = fa.get_detections_for_batch(np.asarray(fb))
 94 | 
 95 |         for j, f in enumerate(preds):
 96 |             if f is None:
 97 |                 print('no face in image {}'.format(idx * batch_size + j))
 98 |             else:
 99 |                 left, top, right, bottom = f
100 | 
101 |             
102 |             height = bottom - top
103 |             width = right - left
104 |             crop_size = int(height * crop_level)
105 | 
106 |             horizontal_delta = (crop_size - width) // 2
107 |             vertical_delta = (crop_size - height) // 2
108 | 
109 |             left = max(left - horizontal_delta, 0)
110 |             right = min(right + horizontal_delta, W)
111 |             top = max(top - int(vertical_delta * 0.5), 0)
112 |             bottom = min(bottom + int(vertical_delta * 1.5), H)
113 |             
114 |             crop_f = cv2.imread(image_list[idx * batch_size + j])
115 |             crop_f = crop_f[top:bottom, left:right]
116 |             crop_f = cv2.resize(crop_f, (dest_size, dest_size), interpolation=cv2.INTER_AREA)
117 |             cv2.imwrite(os.path.join(data_dir, 'crop', os.path.basename(image_list[idx * batch_size + j])), crop_f)
118 |             torch.save([top, bottom, left, right], os.path.join(data_dir, 'crop_region', os.path.basename(image_list[idx * batch_size + j]))[:-4]+'.pt')
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     parser = argparse.ArgumentParser(description='Process some integers.')
123 |     parser.add_argument('--data_dir', type=str, default=None)
124 |     parser.add_argument('--dest_size', type=int, default=256)
125 |     parser.add_argument('--crop_level', type=float, default=2.0, help='Adjust crop image size.')
126 |     parser.add_argument('--vertical_adjust', type=float, default=0.3, help='Adjust vertical location of portrait in image.')
127 |     args = parser.parse_args()
128 |     util.create_dir(os.path.join(args.data_dir,'crop'))
129 |     util.create_dir(os.path.join(args.data_dir, 'crop_region'))
130 |     # crop_per_image(args.data_dir, dest_size=args.dest_size, crop_level=args.crop_level)
131 |     crop_image(args.data_dir, dest_size=args.dest_size, crop_level=args.crop_level, vertical_adjust=args.vertical_adjust)
132 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/README.md:
--------------------------------------------------------------------------------
1 | The code for Face Detection in this folder has been taken from the wonderful [face_alignment](https://github.com/1adrianb/face-alignment) repository. This has been modified to take batches of faces at a time. 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | __author__ = """Adrian Bulat"""
4 | __email__ = 'adrian.bulat@nottingham.ac.uk'
5 | __version__ = '1.0.1'
6 | 
7 | from .api import FaceAlignment, LandmarksType, NetworkSize
8 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/__pycache__/api.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/__pycache__/api.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/__pycache__/models.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/__pycache__/models.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/api.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.model_zoo import load_url
 5 | from enum import Enum
 6 | import numpy as np
 7 | import cv2
 8 | try:
 9 |     import urllib.request as request_file
10 | except BaseException:
11 |     import urllib as request_file
12 | 
13 | from .models import FAN, ResNetDepth
14 | from .utils import *
15 | 
16 | 
17 | class LandmarksType(Enum):
18 |     """Enum class defining the type of landmarks to detect.
19 | 
20 |     ``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face
21 |     ``_2halfD`` - this points represent the projection of the 3D points into 3D
22 |     ``_3D`` - detect the points ``(x,y,z)``` in a 3D space
23 | 
24 |     """
25 |     _2D = 1
26 |     _2halfD = 2
27 |     _3D = 3
28 | 
29 | 
30 | class NetworkSize(Enum):
31 |     # TINY = 1
32 |     # SMALL = 2
33 |     # MEDIUM = 3
34 |     LARGE = 4
35 | 
36 |     def __new__(cls, value):
37 |         member = object.__new__(cls)
38 |         member._value_ = value
39 |         return member
40 | 
41 |     def __int__(self):
42 |         return self.value
43 | 
44 | ROOT = os.path.dirname(os.path.abspath(__file__))
45 | 
46 | class FaceAlignment:
47 |     def __init__(self, landmarks_type, network_size=NetworkSize.LARGE,
48 |                  device='cuda', flip_input=False, face_detector='sfd', verbose=False):
49 |         self.device = device
50 |         self.flip_input = flip_input
51 |         self.landmarks_type = landmarks_type
52 |         self.verbose = verbose
53 | 
54 |         network_size = int(network_size)
55 | 
56 |         if 'cuda' in device:
57 |             torch.backends.cudnn.benchmark = True
58 | 
59 |         # Get the face detector
60 |         face_detector_module = __import__('face_detection.detection.' + face_detector,
61 |                                           globals(), locals(), [face_detector], 0)
62 |         self.face_detector = face_detector_module.FaceDetector(device=device, verbose=verbose)
63 | 
64 |     def get_detections_for_batch(self, images):
65 |         images = images[..., ::-1]
66 |         detected_faces = self.face_detector.detect_from_batch(images.copy())
67 |         results = []
68 | 
69 |         for i, d in enumerate(detected_faces):
70 |             if len(d) == 0:
71 |                 results.append(None)
72 |                 continue
73 |             d = d[0]
74 |             d = np.clip(d, 0, None)
75 |             
76 |             x1, y1, x2, y2 = map(int, d[:-1])
77 |             results.append((x1, y1, x2, y2))
78 | 
79 |         return results


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import FaceDetector


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/__pycache__/core.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/__pycache__/core.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/core.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import glob
  3 | from tqdm import tqdm
  4 | import numpy as np
  5 | import torch
  6 | import cv2
  7 | 
  8 | 
  9 | class FaceDetector(object):
 10 |     """An abstract class representing a face detector.
 11 | 
 12 |     Any other face detection implementation must subclass it. All subclasses
 13 |     must implement ``detect_from_image``, that return a list of detected
 14 |     bounding boxes. Optionally, for speed considerations detect from path is
 15 |     recommended.
 16 |     """
 17 | 
 18 |     def __init__(self, device, verbose):
 19 |         self.device = device
 20 |         self.verbose = verbose
 21 | 
 22 |         if verbose:
 23 |             if 'cpu' in device:
 24 |                 logger = logging.getLogger(__name__)
 25 |                 logger.warning("Detection running on CPU, this may be potentially slow.")
 26 | 
 27 |         if 'cpu' not in device and 'cuda' not in device:
 28 |             if verbose:
 29 |                 logger.error("Expected values for device are: {cpu, cuda} but got: %s", device)
 30 |             raise ValueError
 31 | 
 32 |     def detect_from_image(self, tensor_or_path):
 33 |         """Detects faces in a given image.
 34 | 
 35 |         This function detects the faces present in a provided BGR(usually)
 36 |         image. The input can be either the image itself or the path to it.
 37 | 
 38 |         Arguments:
 39 |             tensor_or_path {numpy.ndarray, torch.tensor or string} -- the path
 40 |             to an image or the image itself.
 41 | 
 42 |         Example::
 43 | 
 44 |             >>> path_to_image = 'data/image_01.jpg'
 45 |             ...   detected_faces = detect_from_image(path_to_image)
 46 |             [A list of bounding boxes (x1, y1, x2, y2)]
 47 |             >>> image = cv2.imread(path_to_image)
 48 |             ...   detected_faces = detect_from_image(image)
 49 |             [A list of bounding boxes (x1, y1, x2, y2)]
 50 | 
 51 |         """
 52 |         raise NotImplementedError
 53 | 
 54 |     def detect_from_directory(self, path, extensions=['.jpg', '.png'], recursive=False, show_progress_bar=True):
 55 |         """Detects faces from all the images present in a given directory.
 56 | 
 57 |         Arguments:
 58 |             path {string} -- a string containing a path that points to the folder containing the images
 59 | 
 60 |         Keyword Arguments:
 61 |             extensions {list} -- list of string containing the extensions to be
 62 |             consider in the following format: ``.extension_name`` (default:
 63 |             {['.jpg', '.png']}) recursive {bool} -- option wherever to scan the
 64 |             folder recursively (default: {False}) show_progress_bar {bool} --
 65 |             display a progressbar (default: {True})
 66 | 
 67 |         Example:
 68 |         >>> directory = 'data'
 69 |         ...   detected_faces = detect_from_directory(directory)
 70 |         {A dictionary of [lists containing bounding boxes(x1, y1, x2, y2)]}
 71 | 
 72 |         """
 73 |         if self.verbose:
 74 |             logger = logging.getLogger(__name__)
 75 | 
 76 |         if len(extensions) == 0:
 77 |             if self.verbose:
 78 |                 logger.error("Expected at list one extension, but none was received.")
 79 |             raise ValueError
 80 | 
 81 |         if self.verbose:
 82 |             logger.info("Constructing the list of images.")
 83 |         additional_pattern = '/**/*' if recursive else '/*'
 84 |         files = []
 85 |         for extension in extensions:
 86 |             files.extend(glob.glob(path + additional_pattern + extension, recursive=recursive))
 87 | 
 88 |         if self.verbose:
 89 |             logger.info("Finished searching for images. %s images found", len(files))
 90 |             logger.info("Preparing to run the detection.")
 91 | 
 92 |         predictions = {}
 93 |         for image_path in tqdm(files, disable=not show_progress_bar):
 94 |             if self.verbose:
 95 |                 logger.info("Running the face detector on image: %s", image_path)
 96 |             predictions[image_path] = self.detect_from_image(image_path)
 97 | 
 98 |         if self.verbose:
 99 |             logger.info("The detector was successfully run on all %s images", len(files))
100 | 
101 |         return predictions
102 | 
103 |     @property
104 |     def reference_scale(self):
105 |         raise NotImplementedError
106 | 
107 |     @property
108 |     def reference_x_shift(self):
109 |         raise NotImplementedError
110 | 
111 |     @property
112 |     def reference_y_shift(self):
113 |         raise NotImplementedError
114 | 
115 |     @staticmethod
116 |     def tensor_or_path_to_ndarray(tensor_or_path, rgb=True):
117 |         """Convert path (represented as a string) or torch.tensor to a numpy.ndarray
118 | 
119 |         Arguments:
120 |             tensor_or_path {numpy.ndarray, torch.tensor or string} -- path to the image, or the image itself
121 |         """
122 |         if isinstance(tensor_or_path, str):
123 |             return cv2.imread(tensor_or_path) if not rgb else cv2.imread(tensor_or_path)[..., ::-1]
124 |         elif torch.is_tensor(tensor_or_path):
125 |             # Call cpu in case its coming from cuda
126 |             return tensor_or_path.cpu().numpy()[..., ::-1].copy() if not rgb else tensor_or_path.cpu().numpy()
127 |         elif isinstance(tensor_or_path, np.ndarray):
128 |             return tensor_or_path[..., ::-1].copy() if not rgb else tensor_or_path
129 |         else:
130 |             raise TypeError
131 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__init__.py:
--------------------------------------------------------------------------------
1 | from .sfd_detector import SFDDetector as FaceDetector


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/bbox.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/bbox.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/detect.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/detect.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/net_s3fd.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/net_s3fd.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/sfd_detector.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/longredzhong/LipSync3D/cc15e88d11b57d5bd5e6ef5e661b646c19b4bf64/lipsync3d/audiodvp_utils/face_detection/detection/sfd/__pycache__/sfd_detector.cpython-38.pyc


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/sfd/bbox.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import sys
  4 | import cv2
  5 | import random
  6 | import datetime
  7 | import time
  8 | import math
  9 | import argparse
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | try:
 14 |     from iou import IOU
 15 | except BaseException:
 16 |     # IOU cython speedup 10x
 17 |     def IOU(ax1, ay1, ax2, ay2, bx1, by1, bx2, by2):
 18 |         sa = abs((ax2 - ax1) * (ay2 - ay1))
 19 |         sb = abs((bx2 - bx1) * (by2 - by1))
 20 |         x1, y1 = max(ax1, bx1), max(ay1, by1)
 21 |         x2, y2 = min(ax2, bx2), min(ay2, by2)
 22 |         w = x2 - x1
 23 |         h = y2 - y1
 24 |         if w < 0 or h < 0:
 25 |             return 0.0
 26 |         else:
 27 |             return 1.0 * w * h / (sa + sb - w * h)
 28 | 
 29 | 
 30 | def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh):
 31 |     xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1
 32 |     dx, dy = (xc - axc) / aww, (yc - ayc) / ahh
 33 |     dw, dh = math.log(ww / aww), math.log(hh / ahh)
 34 |     return dx, dy, dw, dh
 35 | 
 36 | 
 37 | def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh):
 38 |     xc, yc = dx * aww + axc, dy * ahh + ayc
 39 |     ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh
 40 |     x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2
 41 |     return x1, y1, x2, y2
 42 | 
 43 | 
 44 | def nms(dets, thresh):
 45 |     if 0 == len(dets):
 46 |         return []
 47 |     x1, y1, x2, y2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4]
 48 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 49 |     order = scores.argsort()[::-1]
 50 | 
 51 |     keep = []
 52 |     while order.size > 0:
 53 |         i = order[0]
 54 |         keep.append(i)
 55 |         xx1, yy1 = np.maximum(x1[i], x1[order[1:]]), np.maximum(y1[i], y1[order[1:]])
 56 |         xx2, yy2 = np.minimum(x2[i], x2[order[1:]]), np.minimum(y2[i], y2[order[1:]])
 57 | 
 58 |         w, h = np.maximum(0.0, xx2 - xx1 + 1), np.maximum(0.0, yy2 - yy1 + 1)
 59 |         ovr = w * h / (areas[i] + areas[order[1:]] - w * h)
 60 | 
 61 |         inds = np.where(ovr <= thresh)[0]
 62 |         order = order[inds + 1]
 63 | 
 64 |     return keep
 65 | 
 66 | 
 67 | def encode(matched, priors, variances):
 68 |     """Encode the variances from the priorbox layers into the ground truth boxes
 69 |     we have matched (based on jaccard overlap) with the prior boxes.
 70 |     Args:
 71 |         matched: (tensor) Coords of ground truth for each prior in point-form
 72 |             Shape: [num_priors, 4].
 73 |         priors: (tensor) Prior boxes in center-offset form
 74 |             Shape: [num_priors,4].
 75 |         variances: (list[float]) Variances of priorboxes
 76 |     Return:
 77 |         encoded boxes (tensor), Shape: [num_priors, 4]
 78 |     """
 79 | 
 80 |     # dist b/t match center and prior's center
 81 |     g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
 82 |     # encode variance
 83 |     g_cxcy /= (variances[0] * priors[:, 2:])
 84 |     # match wh / prior wh
 85 |     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
 86 |     g_wh = torch.log(g_wh) / variances[1]
 87 |     # return target for smooth_l1_loss
 88 |     return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
 89 | 
 90 | 
 91 | def decode(loc, priors, variances):
 92 |     """Decode locations from predictions using priors to undo
 93 |     the encoding we did for offset regression at train time.
 94 |     Args:
 95 |         loc (tensor): location predictions for loc layers,
 96 |             Shape: [num_priors,4]
 97 |         priors (tensor): Prior boxes in center-offset form.
 98 |             Shape: [num_priors,4].
 99 |         variances: (list[float]) Variances of priorboxes
100 |     Return:
101 |         decoded bounding box predictions
102 |     """
103 | 
104 |     boxes = torch.cat((
105 |         priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
106 |         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
107 |     boxes[:, :2] -= boxes[:, 2:] / 2
108 |     boxes[:, 2:] += boxes[:, :2]
109 |     return boxes
110 | 
111 | def batch_decode(loc, priors, variances):
112 |     """Decode locations from predictions using priors to undo
113 |     the encoding we did for offset regression at train time.
114 |     Args:
115 |         loc (tensor): location predictions for loc layers,
116 |             Shape: [num_priors,4]
117 |         priors (tensor): Prior boxes in center-offset form.
118 |             Shape: [num_priors,4].
119 |         variances: (list[float]) Variances of priorboxes
120 |     Return:
121 |         decoded bounding box predictions
122 |     """
123 | 
124 |     boxes = torch.cat((
125 |         priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:],
126 |         priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2)
127 |     boxes[:, :, :2] -= boxes[:, :, 2:] / 2
128 |     boxes[:, :, 2:] += boxes[:, :, :2]
129 |     return boxes
130 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/sfd/detect.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | 
  4 | import os
  5 | import sys
  6 | import cv2
  7 | import random
  8 | import datetime
  9 | import math
 10 | import argparse
 11 | import numpy as np
 12 | 
 13 | import scipy.io as sio
 14 | import zipfile
 15 | from .net_s3fd import s3fd
 16 | from .bbox import *
 17 | 
 18 | 
 19 | def detect(net, img, device):
 20 |     img = img - np.array([104, 117, 123])
 21 |     img = img.transpose(2, 0, 1)
 22 |     img = img.reshape((1,) + img.shape)
 23 | 
 24 |     if 'cuda' in device:
 25 |         torch.backends.cudnn.benchmark = True
 26 | 
 27 |     img = torch.from_numpy(img).float().to(device)
 28 |     BB, CC, HH, WW = img.size()
 29 |     with torch.no_grad():
 30 |         olist = net(img)
 31 | 
 32 |     bboxlist = []
 33 |     for i in range(len(olist) // 2):
 34 |         olist[i * 2] = F.softmax(olist[i * 2], dim=1)
 35 |     olist = [oelem.data.cpu() for oelem in olist]
 36 |     for i in range(len(olist) // 2):
 37 |         ocls, oreg = olist[i * 2], olist[i * 2 + 1]
 38 |         FB, FC, FH, FW = ocls.size()  # feature map size
 39 |         stride = 2**(i + 2)    # 4,8,16,32,64,128
 40 |         anchor = stride * 4
 41 |         poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
 42 |         for Iindex, hindex, windex in poss:
 43 |             axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
 44 |             score = ocls[0, 1, hindex, windex]
 45 |             loc = oreg[0, :, hindex, windex].contiguous().view(1, 4)
 46 |             priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]])
 47 |             variances = [0.1, 0.2]
 48 |             box = decode(loc, priors, variances)
 49 |             x1, y1, x2, y2 = box[0] * 1.0
 50 |             # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
 51 |             bboxlist.append([x1, y1, x2, y2, score])
 52 |     bboxlist = np.array(bboxlist)
 53 |     if 0 == len(bboxlist):
 54 |         bboxlist = np.zeros((1, 5))
 55 | 
 56 |     return bboxlist
 57 | 
 58 | def batch_detect(net, imgs, device):
 59 |     imgs = imgs - np.array([104, 117, 123])
 60 |     imgs = imgs.transpose(0, 3, 1, 2)
 61 | 
 62 |     if 'cuda' in device:
 63 |         torch.backends.cudnn.benchmark = True
 64 | 
 65 |     imgs = torch.from_numpy(imgs).float().to(device)
 66 |     BB, CC, HH, WW = imgs.size()
 67 |     with torch.no_grad():
 68 |         olist = net(imgs)
 69 | 
 70 |     bboxlist = []
 71 |     for i in range(len(olist) // 2):
 72 |         olist[i * 2] = F.softmax(olist[i * 2], dim=1)
 73 |     olist = [oelem.data.cpu() for oelem in olist]
 74 |     for i in range(len(olist) // 2):
 75 |         ocls, oreg = olist[i * 2], olist[i * 2 + 1]
 76 |         FB, FC, FH, FW = ocls.size()  # feature map size
 77 |         stride = 2**(i + 2)    # 4,8,16,32,64,128
 78 |         anchor = stride * 4
 79 |         poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
 80 |         for Iindex, hindex, windex in poss:
 81 |             axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
 82 |             score = ocls[:, 1, hindex, windex]
 83 |             loc = oreg[:, :, hindex, windex].contiguous().view(BB, 1, 4)
 84 |             priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]).view(1, 1, 4)
 85 |             variances = [0.1, 0.2]
 86 |             box = batch_decode(loc, priors, variances)
 87 |             box = box[:, 0] * 1.0
 88 |             # cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
 89 |             bboxlist.append(torch.cat([box, score.unsqueeze(1)], 1).cpu().numpy())
 90 |     bboxlist = np.array(bboxlist)
 91 |     if 0 == len(bboxlist):
 92 |         bboxlist = np.zeros((1, BB, 5))
 93 | 
 94 |     return bboxlist
 95 | 
 96 | def flip_detect(net, img, device):
 97 |     img = cv2.flip(img, 1)
 98 |     b = detect(net, img, device)
 99 | 
100 |     bboxlist = np.zeros(b.shape)
101 |     bboxlist[:, 0] = img.shape[1] - b[:, 2]
102 |     bboxlist[:, 1] = b[:, 1]
103 |     bboxlist[:, 2] = img.shape[1] - b[:, 0]
104 |     bboxlist[:, 3] = b[:, 3]
105 |     bboxlist[:, 4] = b[:, 4]
106 |     return bboxlist
107 | 
108 | 
109 | def pts_to_bb(pts):
110 |     min_x, min_y = np.min(pts, axis=0)
111 |     max_x, max_y = np.max(pts, axis=0)
112 |     return np.array([min_x, min_y, max_x, max_y])
113 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/sfd/net_s3fd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class L2Norm(nn.Module):
  7 |     def __init__(self, n_channels, scale=1.0):
  8 |         super(L2Norm, self).__init__()
  9 |         self.n_channels = n_channels
 10 |         self.scale = scale
 11 |         self.eps = 1e-10
 12 |         self.weight = nn.Parameter(torch.Tensor(self.n_channels))
 13 |         self.weight.data *= 0.0
 14 |         self.weight.data += self.scale
 15 | 
 16 |     def forward(self, x):
 17 |         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
 18 |         x = x / norm * self.weight.view(1, -1, 1, 1)
 19 |         return x
 20 | 
 21 | 
 22 | class s3fd(nn.Module):
 23 |     def __init__(self):
 24 |         super(s3fd, self).__init__()
 25 |         self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
 26 |         self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
 27 | 
 28 |         self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
 29 |         self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
 30 | 
 31 |         self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
 32 |         self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 33 |         self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 34 | 
 35 |         self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
 36 |         self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
 37 |         self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
 38 | 
 39 |         self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
 40 |         self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
 41 |         self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
 42 | 
 43 |         self.fc6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=3)
 44 |         self.fc7 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0)
 45 | 
 46 |         self.conv6_1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
 47 |         self.conv6_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)
 48 | 
 49 |         self.conv7_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0)
 50 |         self.conv7_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
 51 | 
 52 |         self.conv3_3_norm = L2Norm(256, scale=10)
 53 |         self.conv4_3_norm = L2Norm(512, scale=8)
 54 |         self.conv5_3_norm = L2Norm(512, scale=5)
 55 | 
 56 |         self.conv3_3_norm_mbox_conf = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
 57 |         self.conv3_3_norm_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
 58 |         self.conv4_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
 59 |         self.conv4_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
 60 |         self.conv5_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
 61 |         self.conv5_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
 62 | 
 63 |         self.fc7_mbox_conf = nn.Conv2d(1024, 2, kernel_size=3, stride=1, padding=1)
 64 |         self.fc7_mbox_loc = nn.Conv2d(1024, 4, kernel_size=3, stride=1, padding=1)
 65 |         self.conv6_2_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
 66 |         self.conv6_2_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
 67 |         self.conv7_2_mbox_conf = nn.Conv2d(256, 2, kernel_size=3, stride=1, padding=1)
 68 |         self.conv7_2_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
 69 | 
 70 |     def forward(self, x):
 71 |         h = F.relu(self.conv1_1(x))
 72 |         h = F.relu(self.conv1_2(h))
 73 |         h = F.max_pool2d(h, 2, 2)
 74 | 
 75 |         h = F.relu(self.conv2_1(h))
 76 |         h = F.relu(self.conv2_2(h))
 77 |         h = F.max_pool2d(h, 2, 2)
 78 | 
 79 |         h = F.relu(self.conv3_1(h))
 80 |         h = F.relu(self.conv3_2(h))
 81 |         h = F.relu(self.conv3_3(h))
 82 |         f3_3 = h
 83 |         h = F.max_pool2d(h, 2, 2)
 84 | 
 85 |         h = F.relu(self.conv4_1(h))
 86 |         h = F.relu(self.conv4_2(h))
 87 |         h = F.relu(self.conv4_3(h))
 88 |         f4_3 = h
 89 |         h = F.max_pool2d(h, 2, 2)
 90 | 
 91 |         h = F.relu(self.conv5_1(h))
 92 |         h = F.relu(self.conv5_2(h))
 93 |         h = F.relu(self.conv5_3(h))
 94 |         f5_3 = h
 95 |         h = F.max_pool2d(h, 2, 2)
 96 | 
 97 |         h = F.relu(self.fc6(h))
 98 |         h = F.relu(self.fc7(h))
 99 |         ffc7 = h
100 |         h = F.relu(self.conv6_1(h))
101 |         h = F.relu(self.conv6_2(h))
102 |         f6_2 = h
103 |         h = F.relu(self.conv7_1(h))
104 |         h = F.relu(self.conv7_2(h))
105 |         f7_2 = h
106 | 
107 |         f3_3 = self.conv3_3_norm(f3_3)
108 |         f4_3 = self.conv4_3_norm(f4_3)
109 |         f5_3 = self.conv5_3_norm(f5_3)
110 | 
111 |         cls1 = self.conv3_3_norm_mbox_conf(f3_3)
112 |         reg1 = self.conv3_3_norm_mbox_loc(f3_3)
113 |         cls2 = self.conv4_3_norm_mbox_conf(f4_3)
114 |         reg2 = self.conv4_3_norm_mbox_loc(f4_3)
115 |         cls3 = self.conv5_3_norm_mbox_conf(f5_3)
116 |         reg3 = self.conv5_3_norm_mbox_loc(f5_3)
117 |         cls4 = self.fc7_mbox_conf(ffc7)
118 |         reg4 = self.fc7_mbox_loc(ffc7)
119 |         cls5 = self.conv6_2_mbox_conf(f6_2)
120 |         reg5 = self.conv6_2_mbox_loc(f6_2)
121 |         cls6 = self.conv7_2_mbox_conf(f7_2)
122 |         reg6 = self.conv7_2_mbox_loc(f7_2)
123 | 
124 |         # max-out background label
125 |         chunk = torch.chunk(cls1, 4, 1)
126 |         bmax = torch.max(torch.max(chunk[0], chunk[1]), chunk[2])
127 |         cls1 = torch.cat([bmax, chunk[3]], dim=1)
128 | 
129 |         return [cls1, reg1, cls2, reg2, cls3, reg3, cls4, reg4, cls5, reg5, cls6, reg6]
130 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/detection/sfd/sfd_detector.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | from torch.utils.model_zoo import load_url
 4 | 
 5 | from ..core import FaceDetector
 6 | 
 7 | from .net_s3fd import s3fd
 8 | from .bbox import *
 9 | from .detect import *
10 | 
11 | models_urls = {
12 |     's3fd': 'https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth',
13 | }
14 | 
15 | 
16 | class SFDDetector(FaceDetector):
17 |     def __init__(self, device, path_to_detector=os.path.join(os.path.dirname(os.path.abspath(__file__)), 's3fd.pth'), verbose=False):
18 |         super(SFDDetector, self).__init__(device, verbose)
19 | 
20 |         # Initialise the face detector
21 |         if not os.path.isfile(path_to_detector):
22 |             model_weights = load_url(models_urls['s3fd'])
23 |         else:
24 |             model_weights = torch.load(path_to_detector)
25 | 
26 |         self.face_detector = s3fd()
27 |         self.face_detector.load_state_dict(model_weights)
28 |         self.face_detector.to(device)
29 |         self.face_detector.eval()
30 | 
31 |     def detect_from_image(self, tensor_or_path):
32 |         image = self.tensor_or_path_to_ndarray(tensor_or_path)
33 | 
34 |         bboxlist = detect(self.face_detector, image, device=self.device)
35 |         keep = nms(bboxlist, 0.3)
36 |         bboxlist = bboxlist[keep, :]
37 |         bboxlist = [x for x in bboxlist if x[-1] > 0.5]
38 | 
39 |         return bboxlist
40 | 
41 |     def detect_from_batch(self, images):
42 |         bboxlists = batch_detect(self.face_detector, images, device=self.device)
43 |         keeps = [nms(bboxlists[:, i, :], 0.3) for i in range(bboxlists.shape[1])]
44 |         bboxlists = [bboxlists[keep, i, :] for i, keep in enumerate(keeps)]
45 |         bboxlists = [[x for x in bboxlist if x[-1] > 0.5] for bboxlist in bboxlists]
46 | 
47 |         return bboxlists
48 | 
49 |     @property
50 |     def reference_scale(self):
51 |         return 195
52 | 
53 |     @property
54 |     def reference_x_shift(self):
55 |         return 0
56 | 
57 |     @property
58 |     def reference_y_shift(self):
59 |         return 0
60 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import math
  5 | 
  6 | 
  7 | def conv3x3(in_planes, out_planes, strd=1, padding=1, bias=False):
  8 |     "3x3 convolution with padding"
  9 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3,
 10 |                      stride=strd, padding=padding, bias=bias)
 11 | 
 12 | 
 13 | class ConvBlock(nn.Module):
 14 |     def __init__(self, in_planes, out_planes):
 15 |         super(ConvBlock, self).__init__()
 16 |         self.bn1 = nn.BatchNorm2d(in_planes)
 17 |         self.conv1 = conv3x3(in_planes, int(out_planes / 2))
 18 |         self.bn2 = nn.BatchNorm2d(int(out_planes / 2))
 19 |         self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4))
 20 |         self.bn3 = nn.BatchNorm2d(int(out_planes / 4))
 21 |         self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4))
 22 | 
 23 |         if in_planes != out_planes:
 24 |             self.downsample = nn.Sequential(
 25 |                 nn.BatchNorm2d(in_planes),
 26 |                 nn.ReLU(True),
 27 |                 nn.Conv2d(in_planes, out_planes,
 28 |                           kernel_size=1, stride=1, bias=False),
 29 |             )
 30 |         else:
 31 |             self.downsample = None
 32 | 
 33 |     def forward(self, x):
 34 |         residual = x
 35 | 
 36 |         out1 = self.bn1(x)
 37 |         out1 = F.relu(out1, True)
 38 |         out1 = self.conv1(out1)
 39 | 
 40 |         out2 = self.bn2(out1)
 41 |         out2 = F.relu(out2, True)
 42 |         out2 = self.conv2(out2)
 43 | 
 44 |         out3 = self.bn3(out2)
 45 |         out3 = F.relu(out3, True)
 46 |         out3 = self.conv3(out3)
 47 | 
 48 |         out3 = torch.cat((out1, out2, out3), 1)
 49 | 
 50 |         if self.downsample is not None:
 51 |             residual = self.downsample(residual)
 52 | 
 53 |         out3 += residual
 54 | 
 55 |         return out3
 56 | 
 57 | 
 58 | class Bottleneck(nn.Module):
 59 | 
 60 |     expansion = 4
 61 | 
 62 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 63 |         super(Bottleneck, self).__init__()
 64 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 65 |         self.bn1 = nn.BatchNorm2d(planes)
 66 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 67 |                                padding=1, bias=False)
 68 |         self.bn2 = nn.BatchNorm2d(planes)
 69 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 70 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 71 |         self.relu = nn.ReLU(inplace=True)
 72 |         self.downsample = downsample
 73 |         self.stride = stride
 74 | 
 75 |     def forward(self, x):
 76 |         residual = x
 77 | 
 78 |         out = self.conv1(x)
 79 |         out = self.bn1(out)
 80 |         out = self.relu(out)
 81 | 
 82 |         out = self.conv2(out)
 83 |         out = self.bn2(out)
 84 |         out = self.relu(out)
 85 | 
 86 |         out = self.conv3(out)
 87 |         out = self.bn3(out)
 88 | 
 89 |         if self.downsample is not None:
 90 |             residual = self.downsample(x)
 91 | 
 92 |         out += residual
 93 |         out = self.relu(out)
 94 | 
 95 |         return out
 96 | 
 97 | 
 98 | class HourGlass(nn.Module):
 99 |     def __init__(self, num_modules, depth, num_features):
100 |         super(HourGlass, self).__init__()
101 |         self.num_modules = num_modules
102 |         self.depth = depth
103 |         self.features = num_features
104 | 
105 |         self._generate_network(self.depth)
106 | 
107 |     def _generate_network(self, level):
108 |         self.add_module('b1_' + str(level), ConvBlock(self.features, self.features))
109 | 
110 |         self.add_module('b2_' + str(level), ConvBlock(self.features, self.features))
111 | 
112 |         if level > 1:
113 |             self._generate_network(level - 1)
114 |         else:
115 |             self.add_module('b2_plus_' + str(level), ConvBlock(self.features, self.features))
116 | 
117 |         self.add_module('b3_' + str(level), ConvBlock(self.features, self.features))
118 | 
119 |     def _forward(self, level, inp):
120 |         # Upper branch
121 |         up1 = inp
122 |         up1 = self._modules['b1_' + str(level)](up1)
123 | 
124 |         # Lower branch
125 |         low1 = F.avg_pool2d(inp, 2, stride=2)
126 |         low1 = self._modules['b2_' + str(level)](low1)
127 | 
128 |         if level > 1:
129 |             low2 = self._forward(level - 1, low1)
130 |         else:
131 |             low2 = low1
132 |             low2 = self._modules['b2_plus_' + str(level)](low2)
133 | 
134 |         low3 = low2
135 |         low3 = self._modules['b3_' + str(level)](low3)
136 | 
137 |         up2 = F.interpolate(low3, scale_factor=2, mode='nearest')
138 | 
139 |         return up1 + up2
140 | 
141 |     def forward(self, x):
142 |         return self._forward(self.depth, x)
143 | 
144 | 
145 | class FAN(nn.Module):
146 | 
147 |     def __init__(self, num_modules=1):
148 |         super(FAN, self).__init__()
149 |         self.num_modules = num_modules
150 | 
151 |         # Base part
152 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
153 |         self.bn1 = nn.BatchNorm2d(64)
154 |         self.conv2 = ConvBlock(64, 128)
155 |         self.conv3 = ConvBlock(128, 128)
156 |         self.conv4 = ConvBlock(128, 256)
157 | 
158 |         # Stacking part
159 |         for hg_module in range(self.num_modules):
160 |             self.add_module('m' + str(hg_module), HourGlass(1, 4, 256))
161 |             self.add_module('top_m_' + str(hg_module), ConvBlock(256, 256))
162 |             self.add_module('conv_last' + str(hg_module),
163 |                             nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
164 |             self.add_module('bn_end' + str(hg_module), nn.BatchNorm2d(256))
165 |             self.add_module('l' + str(hg_module), nn.Conv2d(256,
166 |                                                             68, kernel_size=1, stride=1, padding=0))
167 | 
168 |             if hg_module < self.num_modules - 1:
169 |                 self.add_module(
170 |                     'bl' + str(hg_module), nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
171 |                 self.add_module('al' + str(hg_module), nn.Conv2d(68,
172 |                                                                  256, kernel_size=1, stride=1, padding=0))
173 | 
174 |     def forward(self, x):
175 |         x = F.relu(self.bn1(self.conv1(x)), True)
176 |         x = F.avg_pool2d(self.conv2(x), 2, stride=2)
177 |         x = self.conv3(x)
178 |         x = self.conv4(x)
179 | 
180 |         previous = x
181 | 
182 |         outputs = []
183 |         for i in range(self.num_modules):
184 |             hg = self._modules['m' + str(i)](previous)
185 | 
186 |             ll = hg
187 |             ll = self._modules['top_m_' + str(i)](ll)
188 | 
189 |             ll = F.relu(self._modules['bn_end' + str(i)]
190 |                         (self._modules['conv_last' + str(i)](ll)), True)
191 | 
192 |             # Predict heatmaps
193 |             tmp_out = self._modules['l' + str(i)](ll)
194 |             outputs.append(tmp_out)
195 | 
196 |             if i < self.num_modules - 1:
197 |                 ll = self._modules['bl' + str(i)](ll)
198 |                 tmp_out_ = self._modules['al' + str(i)](tmp_out)
199 |                 previous = previous + ll + tmp_out_
200 | 
201 |         return outputs
202 | 
203 | 
204 | class ResNetDepth(nn.Module):
205 | 
206 |     def __init__(self, block=Bottleneck, layers=[3, 8, 36, 3], num_classes=68):
207 |         self.inplanes = 64
208 |         super(ResNetDepth, self).__init__()
209 |         self.conv1 = nn.Conv2d(3 + 68, 64, kernel_size=7, stride=2, padding=3,
210 |                                bias=False)
211 |         self.bn1 = nn.BatchNorm2d(64)
212 |         self.relu = nn.ReLU(inplace=True)
213 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
214 |         self.layer1 = self._make_layer(block, 64, layers[0])
215 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
216 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
217 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
218 |         self.avgpool = nn.AvgPool2d(7)
219 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
220 | 
221 |         for m in self.modules():
222 |             if isinstance(m, nn.Conv2d):
223 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
224 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
225 |             elif isinstance(m, nn.BatchNorm2d):
226 |                 m.weight.data.fill_(1)
227 |                 m.bias.data.zero_()
228 | 
229 |     def _make_layer(self, block, planes, blocks, stride=1):
230 |         downsample = None
231 |         if stride != 1 or self.inplanes != planes * block.expansion:
232 |             downsample = nn.Sequential(
233 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
234 |                           kernel_size=1, stride=stride, bias=False),
235 |                 nn.BatchNorm2d(planes * block.expansion),
236 |             )
237 | 
238 |         layers = []
239 |         layers.append(block(self.inplanes, planes, stride, downsample))
240 |         self.inplanes = planes * block.expansion
241 |         for i in range(1, blocks):
242 |             layers.append(block(self.inplanes, planes))
243 | 
244 |         return nn.Sequential(*layers)
245 | 
246 |     def forward(self, x):
247 |         x = self.conv1(x)
248 |         x = self.bn1(x)
249 |         x = self.relu(x)
250 |         x = self.maxpool(x)
251 | 
252 |         x = self.layer1(x)
253 |         x = self.layer2(x)
254 |         x = self.layer3(x)
255 |         x = self.layer4(x)
256 | 
257 |         x = self.avgpool(x)
258 |         x = x.view(x.size(0), -1)
259 |         x = self.fc(x)
260 | 
261 |         return x
262 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/face_detection/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import sys
  4 | import time
  5 | import torch
  6 | import math
  7 | import numpy as np
  8 | import cv2
  9 | 
 10 | 
 11 | def _gaussian(
 12 |         size=3, sigma=0.25, amplitude=1, normalize=False, width=None,
 13 |         height=None, sigma_horz=None, sigma_vert=None, mean_horz=0.5,
 14 |         mean_vert=0.5):
 15 |     # handle some defaults
 16 |     if width is None:
 17 |         width = size
 18 |     if height is None:
 19 |         height = size
 20 |     if sigma_horz is None:
 21 |         sigma_horz = sigma
 22 |     if sigma_vert is None:
 23 |         sigma_vert = sigma
 24 |     center_x = mean_horz * width + 0.5
 25 |     center_y = mean_vert * height + 0.5
 26 |     gauss = np.empty((height, width), dtype=np.float32)
 27 |     # generate kernel
 28 |     for i in range(height):
 29 |         for j in range(width):
 30 |             gauss[i][j] = amplitude * math.exp(-(math.pow((j + 1 - center_x) / (
 31 |                 sigma_horz * width), 2) / 2.0 + math.pow((i + 1 - center_y) / (sigma_vert * height), 2) / 2.0))
 32 |     if normalize:
 33 |         gauss = gauss / np.sum(gauss)
 34 |     return gauss
 35 | 
 36 | 
 37 | def draw_gaussian(image, point, sigma):
 38 |     # Check if the gaussian is inside
 39 |     ul = [math.floor(point[0] - 3 * sigma), math.floor(point[1] - 3 * sigma)]
 40 |     br = [math.floor(point[0] + 3 * sigma), math.floor(point[1] + 3 * sigma)]
 41 |     if (ul[0] > image.shape[1] or ul[1] > image.shape[0] or br[0] < 1 or br[1] < 1):
 42 |         return image
 43 |     size = 6 * sigma + 1
 44 |     g = _gaussian(size)
 45 |     g_x = [int(max(1, -ul[0])), int(min(br[0], image.shape[1])) - int(max(1, ul[0])) + int(max(1, -ul[0]))]
 46 |     g_y = [int(max(1, -ul[1])), int(min(br[1], image.shape[0])) - int(max(1, ul[1])) + int(max(1, -ul[1]))]
 47 |     img_x = [int(max(1, ul[0])), int(min(br[0], image.shape[1]))]
 48 |     img_y = [int(max(1, ul[1])), int(min(br[1], image.shape[0]))]
 49 |     assert (g_x[0] > 0 and g_y[1] > 0)
 50 |     image[img_y[0] - 1:img_y[1], img_x[0] - 1:img_x[1]
 51 |           ] = image[img_y[0] - 1:img_y[1], img_x[0] - 1:img_x[1]] + g[g_y[0] - 1:g_y[1], g_x[0] - 1:g_x[1]]
 52 |     image[image > 1] = 1
 53 |     return image
 54 | 
 55 | 
 56 | def transform(point, center, scale, resolution, invert=False):
 57 |     """Generate and affine transformation matrix.
 58 | 
 59 |     Given a set of points, a center, a scale and a targer resolution, the
 60 |     function generates and affine transformation matrix. If invert is ``True``
 61 |     it will produce the inverse transformation.
 62 | 
 63 |     Arguments:
 64 |         point {torch.tensor} -- the input 2D point
 65 |         center {torch.tensor or numpy.array} -- the center around which to perform the transformations
 66 |         scale {float} -- the scale of the face/object
 67 |         resolution {float} -- the output resolution
 68 | 
 69 |     Keyword Arguments:
 70 |         invert {bool} -- define wherever the function should produce the direct or the
 71 |         inverse transformation matrix (default: {False})
 72 |     """
 73 |     _pt = torch.ones(3)
 74 |     _pt[0] = point[0]
 75 |     _pt[1] = point[1]
 76 | 
 77 |     h = 200.0 * scale
 78 |     t = torch.eye(3)
 79 |     t[0, 0] = resolution / h
 80 |     t[1, 1] = resolution / h
 81 |     t[0, 2] = resolution * (-center[0] / h + 0.5)
 82 |     t[1, 2] = resolution * (-center[1] / h + 0.5)
 83 | 
 84 |     if invert:
 85 |         t = torch.inverse(t)
 86 | 
 87 |     new_point = (torch.matmul(t, _pt))[0:2]
 88 | 
 89 |     return new_point.int()
 90 | 
 91 | 
 92 | def crop(image, center, scale, resolution=256.0):
 93 |     """Center crops an image or set of heatmaps
 94 | 
 95 |     Arguments:
 96 |         image {numpy.array} -- an rgb image
 97 |         center {numpy.array} -- the center of the object, usually the same as of the bounding box
 98 |         scale {float} -- scale of the face
 99 | 
100 |     Keyword Arguments:
101 |         resolution {float} -- the size of the output cropped image (default: {256.0})
102 | 
103 |     Returns:
104 |         [type] -- [description]
105 |     """  # Crop around the center point
106 |     """ Crops the image around the center. Input is expected to be an np.ndarray """
107 |     ul = transform([1, 1], center, scale, resolution, True)
108 |     br = transform([resolution, resolution], center, scale, resolution, True)
109 |     # pad = math.ceil(torch.norm((ul - br).float()) / 2.0 - (br[0] - ul[0]) / 2.0)
110 |     if image.ndim > 2:
111 |         newDim = np.array([br[1] - ul[1], br[0] - ul[0],
112 |                            image.shape[2]], dtype=np.int32)
113 |         newImg = np.zeros(newDim, dtype=np.uint8)
114 |     else:
115 |         newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int)
116 |         newImg = np.zeros(newDim, dtype=np.uint8)
117 |     ht = image.shape[0]
118 |     wd = image.shape[1]
119 |     newX = np.array(
120 |         [max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32)
121 |     newY = np.array(
122 |         [max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32)
123 |     oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32)
124 |     oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32)
125 |     newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1]
126 |            ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :]
127 |     newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)),
128 |                         interpolation=cv2.INTER_LINEAR)
129 |     return newImg
130 | 
131 | 
132 | def get_preds_fromhm(hm, center=None, scale=None):
133 |     """Obtain (x,y) coordinates given a set of N heatmaps. If the center
134 |     and the scale is provided the function will return the points also in
135 |     the original coordinate frame.
136 | 
137 |     Arguments:
138 |         hm {torch.tensor} -- the predicted heatmaps, of shape [B, N, W, H]
139 | 
140 |     Keyword Arguments:
141 |         center {torch.tensor} -- the center of the bounding box (default: {None})
142 |         scale {float} -- face scale (default: {None})
143 |     """
144 |     max, idx = torch.max(
145 |         hm.view(hm.size(0), hm.size(1), hm.size(2) * hm.size(3)), 2)
146 |     idx += 1
147 |     preds = idx.view(idx.size(0), idx.size(1), 1).repeat(1, 1, 2).float()
148 |     preds[..., 0].apply_(lambda x: (x - 1) % hm.size(3) + 1)
149 |     preds[..., 1].add_(-1).div_(hm.size(2)).floor_().add_(1)
150 | 
151 |     for i in range(preds.size(0)):
152 |         for j in range(preds.size(1)):
153 |             hm_ = hm[i, j, :]
154 |             pX, pY = int(preds[i, j, 0]) - 1, int(preds[i, j, 1]) - 1
155 |             if pX > 0 and pX < 63 and pY > 0 and pY < 63:
156 |                 diff = torch.FloatTensor(
157 |                     [hm_[pY, pX + 1] - hm_[pY, pX - 1],
158 |                      hm_[pY + 1, pX] - hm_[pY - 1, pX]])
159 |                 preds[i, j].add_(diff.sign_().mul_(.25))
160 | 
161 |     preds.add_(-.5)
162 | 
163 |     preds_orig = torch.zeros(preds.size())
164 |     if center is not None and scale is not None:
165 |         for i in range(hm.size(0)):
166 |             for j in range(hm.size(1)):
167 |                 preds_orig[i, j] = transform(
168 |                     preds[i, j], center, scale, hm.size(2), True)
169 | 
170 |     return preds, preds_orig
171 | 
172 | def get_preds_fromhm_batch(hm, centers=None, scales=None):
173 |     """Obtain (x,y) coordinates given a set of N heatmaps. If the centers
174 |     and the scales is provided the function will return the points also in
175 |     the original coordinate frame.
176 | 
177 |     Arguments:
178 |         hm {torch.tensor} -- the predicted heatmaps, of shape [B, N, W, H]
179 | 
180 |     Keyword Arguments:
181 |         centers {torch.tensor} -- the centers of the bounding box (default: {None})
182 |         scales {float} -- face scales (default: {None})
183 |     """
184 |     max, idx = torch.max(
185 |         hm.view(hm.size(0), hm.size(1), hm.size(2) * hm.size(3)), 2)
186 |     idx += 1
187 |     preds = idx.view(idx.size(0), idx.size(1), 1).repeat(1, 1, 2).float()
188 |     preds[..., 0].apply_(lambda x: (x - 1) % hm.size(3) + 1)
189 |     preds[..., 1].add_(-1).div_(hm.size(2)).floor_().add_(1)
190 | 
191 |     for i in range(preds.size(0)):
192 |         for j in range(preds.size(1)):
193 |             hm_ = hm[i, j, :]
194 |             pX, pY = int(preds[i, j, 0]) - 1, int(preds[i, j, 1]) - 1
195 |             if pX > 0 and pX < 63 and pY > 0 and pY < 63:
196 |                 diff = torch.FloatTensor(
197 |                     [hm_[pY, pX + 1] - hm_[pY, pX - 1],
198 |                      hm_[pY + 1, pX] - hm_[pY - 1, pX]])
199 |                 preds[i, j].add_(diff.sign_().mul_(.25))
200 | 
201 |     preds.add_(-.5)
202 | 
203 |     preds_orig = torch.zeros(preds.size())
204 |     if centers is not None and scales is not None:
205 |         for i in range(hm.size(0)):
206 |             for j in range(hm.size(1)):
207 |                 preds_orig[i, j] = transform(
208 |                     preds[i, j], centers[i], scales[i], hm.size(2), True)
209 | 
210 |     return preds, preds_orig
211 | 
212 | def shuffle_lr(parts, pairs=None):
213 |     """Shuffle the points left-right according to the axis of symmetry
214 |     of the object.
215 | 
216 |     Arguments:
217 |         parts {torch.tensor} -- a 3D or 4D object containing the
218 |         heatmaps.
219 | 
220 |     Keyword Arguments:
221 |         pairs {list of integers} -- [order of the flipped points] (default: {None})
222 |     """
223 |     if pairs is None:
224 |         pairs = [16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
225 |                  26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 27, 28, 29, 30, 35,
226 |                  34, 33, 32, 31, 45, 44, 43, 42, 47, 46, 39, 38, 37, 36, 41,
227 |                  40, 54, 53, 52, 51, 50, 49, 48, 59, 58, 57, 56, 55, 64, 63,
228 |                  62, 61, 60, 67, 66, 65]
229 |     if parts.ndimension() == 3:
230 |         parts = parts[pairs, ...]
231 |     else:
232 |         parts = parts[:, pairs, ...]
233 | 
234 |     return parts
235 | 
236 | 
237 | def flip(tensor, is_label=False):
238 |     """Flip an image or a set of heatmaps left-right
239 | 
240 |     Arguments:
241 |         tensor {numpy.array or torch.tensor} -- [the input image or heatmaps]
242 | 
243 |     Keyword Arguments:
244 |         is_label {bool} -- [denote wherever the input is an image or a set of heatmaps ] (default: {False})
245 |     """
246 |     if not torch.is_tensor(tensor):
247 |         tensor = torch.from_numpy(tensor)
248 | 
249 |     if is_label:
250 |         tensor = shuffle_lr(tensor).flip(tensor.ndimension() - 1)
251 |     else:
252 |         tensor = tensor.flip(tensor.ndimension() - 1)
253 | 
254 |     return tensor
255 | 
256 | # From pyzolib/paths.py (https://bitbucket.org/pyzo/pyzolib/src/tip/paths.py)
257 | 
258 | 
259 | def appdata_dir(appname=None, roaming=False):
260 |     """ appdata_dir(appname=None, roaming=False)
261 | 
262 |     Get the path to the application directory, where applications are allowed
263 |     to write user specific files (e.g. configurations). For non-user specific
264 |     data, consider using common_appdata_dir().
265 |     If appname is given, a subdir is appended (and created if necessary).
266 |     If roaming is True, will prefer a roaming directory (Windows Vista/7).
267 |     """
268 | 
269 |     # Define default user directory
270 |     userDir = os.getenv('FACEALIGNMENT_USERDIR', None)
271 |     if userDir is None:
272 |         userDir = os.path.expanduser('~')
273 |         if not os.path.isdir(userDir):  # pragma: no cover
274 |             userDir = '/var/tmp'  # issue #54
275 | 
276 |     # Get system app data dir
277 |     path = None
278 |     if sys.platform.startswith('win'):
279 |         path1, path2 = os.getenv('LOCALAPPDATA'), os.getenv('APPDATA')
280 |         path = (path2 or path1) if roaming else (path1 or path2)
281 |     elif sys.platform.startswith('darwin'):
282 |         path = os.path.join(userDir, 'Library', 'Application Support')
283 |     # On Linux and as fallback
284 |     if not (path and os.path.isdir(path)):
285 |         path = userDir
286 | 
287 |     # Maybe we should store things local to the executable (in case of a
288 |     # portable distro or a frozen application that wants to be portable)
289 |     prefix = sys.prefix
290 |     if getattr(sys, 'frozen', None):
291 |         prefix = os.path.abspath(os.path.dirname(sys.executable))
292 |     for reldir in ('settings', '../settings'):
293 |         localpath = os.path.abspath(os.path.join(prefix, reldir))
294 |         if os.path.isdir(localpath):  # pragma: no cover
295 |             try:
296 |                 open(os.path.join(localpath, 'test.write'), 'wb').close()
297 |                 os.remove(os.path.join(localpath, 'test.write'))
298 |             except IOError:
299 |                 pass  # We cannot write in this directory
300 |             else:
301 |                 path = localpath
302 |                 break
303 | 
304 |     # Get path specific for this app
305 |     if appname:
306 |         if path == userDir:
307 |             appname = '.' + appname.lstrip('.')  # Make it a hidden directory
308 |         path = os.path.join(path, appname)
309 |         if not os.path.isdir(path):  # pragma: no cover
310 |             os.mkdir(path)
311 | 
312 |     # Done
313 |     return path
314 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/hparams.py:
--------------------------------------------------------------------------------
  1 | from glob import glob
  2 | import os
  3 | 
  4 | def get_image_list(data_root, split):
  5 | 	filelist = []
  6 | 
  7 | 	with open('filelists/{}.txt'.format(split)) as f:
  8 | 		for line in f:
  9 | 			line = line.strip()
 10 | 			if ' ' in line: line = line.split()[0]
 11 | 			filelist.append(os.path.join(data_root, line))
 12 | 
 13 | 	return filelist
 14 | 
 15 | class HParams:
 16 | 	def __init__(self, **kwargs):
 17 | 		self.data = {}
 18 | 
 19 | 		for key, value in kwargs.items():
 20 | 			self.data[key] = value
 21 | 
 22 | 	def __getattr__(self, key):
 23 | 		if key not in self.data:
 24 | 			raise AttributeError("'HParams' object has no attribute %s" % key)
 25 | 		return self.data[key]
 26 | 
 27 | 	def set_hparam(self, key, value):
 28 | 		self.data[key] = value
 29 | 
 30 | 
 31 | # Default hyperparameters
 32 | hparams = HParams(
 33 | 	num_mels=80,  # Number of mel-spectrogram channels and local conditioning dimensionality
 34 | 	#  network
 35 | 	rescale=True,  # Whether to rescale audio prior to preprocessing
 36 | 	rescaling_max=0.9,  # Rescaling value
 37 | 	
 38 | 	# Use LWS (https://github.com/Jonathan-LeRoux/lws) for STFT and phase reconstruction
 39 | 	# It"s preferred to set True to use with https://github.com/r9y9/wavenet_vocoder
 40 | 	# Does not work if n_ffit is not multiple of hop_size!!
 41 | 	use_lws=False,
 42 | 	
 43 | 	n_fft=800,  # Extra window size is filled with 0 paddings to match this parameter
 44 | 	hop_size=200,  # For 16000Hz, 200 = 12.5 ms (0.0125 * sample_rate)
 45 | 	win_size=800,  # For 16000Hz, 800 = 50 ms (If None, win_size = n_fft) (0.05 * sample_rate)
 46 | 	sample_rate=16000,  # 16000Hz (corresponding to librispeech) (sox --i <filename>)
 47 | 	
 48 | 	frame_shift_ms=None,  # Can replace hop_size parameter. (Recommended: 12.5)
 49 | 	
 50 | 	# Mel and Linear spectrograms normalization/scaling and clipping
 51 | 	signal_normalization=True,
 52 | 	# Whether to normalize mel spectrograms to some predefined range (following below parameters)
 53 | 	allow_clipping_in_normalization=True,  # Only relevant if mel_normalization = True
 54 | 	symmetric_mels=True,
 55 | 	# Whether to scale the data to be symmetric around 0. (Also multiplies the output range by 2, 
 56 | 	# faster and cleaner convergence)
 57 | 	max_abs_value=4.,
 58 | 	# max absolute value of data. If symmetric, data will be [-max, max] else [0, max] (Must not 
 59 | 	# be too big to avoid gradient explosion, 
 60 | 	# not too small for fast convergence)
 61 | 	# Contribution by @begeekmyfriend
 62 | 	# Spectrogram Pre-Emphasis (Lfilter: Reduce spectrogram noise and helps model certitude 
 63 | 	# levels. Also allows for better G&L phase reconstruction)
 64 | 	preemphasize=True,  # whether to apply filter
 65 | 	preemphasis=0.97,  # filter coefficient.
 66 | 	
 67 | 	# Limits
 68 | 	min_level_db=-100,
 69 | 	ref_level_db=20,
 70 | 	fmin=55,
 71 | 	# Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To 
 72 | 	# test depending on dataset. Pitch info: male~[65, 260], female~[100, 525])
 73 | 	fmax=7600,  # To be increased/reduced depending on data.
 74 | 
 75 | 	###################### Our training parameters #################################
 76 | 	img_size=96,
 77 | 	fps=25,
 78 | 	
 79 | 	batch_size=8,
 80 | 	initial_learning_rate=1e-4,
 81 | 	nepochs=200000000000000000,  ### ctrl + c, stop whenever eval loss is consistently greater than train loss for ~10 epochs
 82 | 	num_workers=16,
 83 | 	checkpoint_interval=10000,
 84 | 	#eval_interval= 10
 85 |     save_img_interval = 10,
 86 |     save_optimizer_state=True,
 87 | 
 88 |     syncnet_wt=0.00, # is initially zero, will be set automatically to 0.03 later. Leads to faster convergence. 
 89 | 	syncnet_batch_size=64,
 90 | 	syncnet_lr=1e-4,
 91 | 	syncnet_eval_interval=10000,
 92 | 	syncnet_checkpoint_interval=10000,
 93 | 
 94 | 	disc_wt=0.07,
 95 | 	disc_initial_learning_rate= 1e-5, #1e-4,
 96 | 
 97 |     LC_wt = 0.01
 98 | )
 99 | 
100 | 
101 | def hparams_debug_string():
102 | 	values = hparams.values()
103 | 	hp = ["  %s: %s" % (name, values[name]) for name in sorted(values) if name != "sentences"]
104 | 	return "Hyperparameters:\n" + "\n".join(hp)
105 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/rescale_image.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Crop upper boddy in every video frame, square bounding box is averaged among all frames and fixed.
 3 | """
 4 | 
 5 | import os
 6 | import cv2
 7 | import argparse
 8 | from torch import full
 9 | from tqdm import tqdm
10 | import numpy as np
11 | from .util import load_coef, create_dir, get_file_list
12 | 
13 | # Resacle overlay and render images. Paste it on full image.
14 | def rescale_and_paste(crop_region, full_image, target_image):
15 |     top, bottom, left, right = crop_region
16 |     height = bottom - top
17 |     width = right - left
18 |     
19 |     pasted_image = full_image.copy()
20 |     rescaled_target = cv2.resize(target_image, (width, height), interpolation=cv2.INTER_AREA)
21 |     pasted_image[top:bottom, left:right] = rescaled_target
22 | 
23 |     return pasted_image
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     parser = argparse.ArgumentParser(description='Process some integers.')
28 |     parser.add_argument('--data_dir', type=str, default=None)
29 |     args = parser.parse_args()
30 | 
31 |     crop_region_list = load_coef(os.path.join(args.data_dir, 'crop_region'))
32 |     full_image_list = get_file_list(os.path.join(args.data_dir, 'full'))
33 |     overlay_image_list = get_file_list(os.path.join(args.data_dir, 'overlay'))
34 |     render_image_list = get_file_list(os.path.join(args.data_dir, 'render'))
35 | 
36 |     create_dir(os.path.join(args.data_dir, 'rescaled_overlay'))
37 |     create_dir(os.path.join(args.data_dir, 'rescaled_render'))
38 | 
39 |     for i in tqdm(range(len(full_image_list))):
40 |         full_image = cv2.imread(full_image_list[i])
41 |         overlay_image = cv2.imread(overlay_image_list[i])
42 |         render_image = cv2.imread(render_image_list[i])
43 |         crop_region = crop_region_list[i]
44 | 
45 |         H, W, _ = full_image.shape
46 |         empty_image = np.zeros((H, W, 3), np.uint8)
47 | 
48 |         pasted_overlay = rescale_and_paste(crop_region, full_image, overlay_image)
49 |         pasted_render = rescale_and_paste(crop_region, empty_image, render_image)
50 | 
51 |         cv2.imwrite(os.path.join(args.data_dir, 'rescaled_overlay', os.path.basename(full_image_list[i])), pasted_overlay)
52 |         cv2.imwrite(os.path.join(args.data_dir, 'rescaled_render', os.path.basename(full_image_list[i])), pasted_render)   


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/util-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | from tqdm import tqdm
  4 | import cv2
  5 | from skimage import io
  6 | import torch
  7 | import face_alignment
  8 | 
  9 | 
 10 | def create_dir(dir_name):
 11 |     if not os.path.exists(dir_name):
 12 |         os.makedirs(dir_name)
 13 | 
 14 | 
 15 | def get_file_list(data_dir, suffix=""):
 16 |     file_list = []
 17 | 
 18 |     for dirpath, _, filenames in os.walk(data_dir):
 19 |         for filename in filenames:
 20 |             if suffix in filename:
 21 |                 file_list.append(os.path.join(dirpath, filename))
 22 | 
 23 |     file_list = sorted(file_list)
 24 | 
 25 |     return file_list
 26 | 
 27 | 
 28 | def load_state_dict(model, fname):
 29 |     """
 30 |     Set parameters converted from Caffe models authors of VGGFace2 provide.
 31 |     See https://www.robots.ox.ac.uk/~vgg/data/vgg_face2/.
 32 | 
 33 |     Arguments:
 34 |         model: model
 35 |         fname: file name of parameters converted from a Caffe model, assuming the file format is Pickle.
 36 |     """
 37 |     with open(fname, 'rb') as f:
 38 |         weights = pickle.load(f, encoding='latin1')
 39 | 
 40 |     own_state = model.state_dict()
 41 | 
 42 |     for name, param in weights.items():
 43 |         if name in own_state:
 44 |             try:
 45 |                 own_state[name].copy_(torch.from_numpy(param))
 46 |             except Exception:
 47 |                 raise RuntimeError('While copying the parameter named {}, whose dimensions in the model are {} and whose '
 48 |                                    'dimensions in the checkpoint are {}.'.format(name, own_state[name].size(), param.size))
 49 |         else:
 50 |             # raise KeyError('unexpected key "{}" in state_dict'.format(name))
 51 |             pass
 52 | 
 53 | 
 54 | def load_coef(data_dir, load_num=float('inf')):
 55 |     coef_list = []
 56 |     count = 0
 57 | 
 58 |     for filename in tqdm(get_file_list(data_dir)):
 59 |         coef = torch.load(filename)
 60 |         coef_list.append(coef)
 61 |         count += 1
 62 |         if count >= load_num:
 63 |             break
 64 | 
 65 |     return coef_list
 66 | 
 67 | 
 68 | def landmark_detection(image_list, save_path):
 69 |     fa_3d = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, flip_input=False, device='cuda')
 70 | 
 71 |     landmark_dict = {}
 72 | 
 73 |     for i in tqdm(range(len(image_list))):
 74 |         image_name = image_list[i]
 75 |         image = io.imread(image_name)
 76 |         preds = fa_3d.get_landmarks(image)
 77 | 
 78 |         assert preds is not None
 79 | 
 80 |         landmark_dict[image_name] = preds[0][:, :2]
 81 | 
 82 |     with open(save_path, 'wb') as f:
 83 |         pickle.dump(landmark_dict, f)
 84 | 
 85 | 
 86 | def plot_landmark(data_dir):
 87 |     create_dir(os.path.join(data_dir, 'landmark'))
 88 | 
 89 |     with open(os.path.join(data_dir, 'landmark.pkl'), 'rb') as f:
 90 |         landmark_dict = pickle.load(f)
 91 | 
 92 |     image_list = get_file_list(os.path.join(data_dir, 'crop'))
 93 | 
 94 |     for image_name in tqdm(image_list):
 95 |         image = cv2.imread(image_name)
 96 |         landmark = landmark_dict[image_name]
 97 | 
 98 |         for point in landmark:
 99 |             image = cv2.circle(image, (point[0], point[1]), radius=0, color=(255, 0, 0), thickness=-1)
100 | 
101 |         cv2.imwrite(os.path.join(data_dir, 'landmark', os.path.basename(image_name)), image)
102 | 


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/util.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | from tqdm import tqdm
  4 | import cv2
  5 | from skimage import io
  6 | import torch
  7 | import numpy as np
  8 | import face_alignment
  9 | from facenet_pytorch import InceptionResnetV1
 10 | from PIL import Image
 11 | from natsort import natsorted
 12 | 
 13 | def create_dir(dir_name):
 14 |     if not os.path.exists(dir_name):
 15 |         os.makedirs(dir_name)
 16 | 
 17 | 
 18 | def get_file_list(data_dir, suffix=""):
 19 |     file_list = []
 20 | 
 21 |     for dirpath, _, filenames in os.walk(data_dir):
 22 |         for filename in filenames:
 23 |             if suffix in filename:
 24 |                 file_list.append(os.path.join(dirpath, filename))
 25 | 
 26 |     file_list = natsorted(file_list)
 27 | 
 28 |     return file_list
 29 | 
 30 | 
 31 | def load_state_dict(model, fname):
 32 |     """
 33 |     Set parameters converted from Caffe models authors of VGGFace2 provide.
 34 |     See https://www.robots.ox.ac.uk/~vgg/data/vgg_face2/.
 35 | 
 36 |     Arguments:
 37 |         model: model
 38 |         fname: file name of parameters converted from a Caffe model, assuming the file format is Pickle.
 39 |     """
 40 |     with open(fname, 'rb') as f:
 41 |         weights = pickle.load(f, encoding='latin1')
 42 | 
 43 |     own_state = model.state_dict()
 44 | 
 45 |     for name, param in weights.items():
 46 |         if name in own_state:
 47 |             try:
 48 |                 own_state[name].copy_(torch.from_numpy(param))
 49 |             except Exception:
 50 |                 raise RuntimeError('While copying the parameter named {}, whose dimensions in the model are {} and whose '
 51 |                                    'dimensions in the checkpoint are {}.'.format(name, own_state[name].size(), param.size))
 52 |         else:
 53 |             # raise KeyError('unexpected key "{}" in state_dict'.format(name))
 54 |             pass
 55 | 
 56 | 
 57 | def load_coef(data_dir, load_num=float('inf')):
 58 |     coef_list = []
 59 |     count = 0
 60 | 
 61 |     for filename in tqdm(get_file_list(data_dir)):
 62 |         coef = torch.load(filename)
 63 |         coef_list.append(coef)
 64 |         count += 1
 65 |         if count >= load_num:
 66 |             break
 67 | 
 68 |     return coef_list
 69 | 
 70 | 
 71 | def landmark_detection(image_list, save_path):
 72 |     fa_3d = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, flip_input=False, device='cuda')
 73 | 
 74 |     landmark_dict = {}
 75 | 
 76 |     for i in tqdm(range(len(image_list))):
 77 |         image_name = image_list[i]
 78 |         image = io.imread(image_name)
 79 |         preds = fa_3d.get_landmarks(image)
 80 | 
 81 |         assert preds is not None
 82 | 
 83 |         landmark_dict[image_name] = preds[0][:, :2]
 84 | 
 85 |     with open(save_path, 'wb') as f:
 86 |         pickle.dump(landmark_dict, f)
 87 | 
 88 | 
 89 | def plot_landmark(data_dir):
 90 |     create_dir(os.path.join(data_dir, 'landmark'))
 91 | 
 92 |     with open(os.path.join(data_dir, 'landmark.pkl'), 'rb') as f:
 93 |         landmark_dict = pickle.load(f)
 94 | 
 95 |     image_list = get_file_list(os.path.join(data_dir, 'crop'))
 96 | 
 97 |     for image_name in tqdm(image_list):
 98 |         image = cv2.imread(image_name)
 99 |         landmark = landmark_dict[image_name]
100 | 
101 |         for point in landmark:
102 |             image = cv2.circle(image, (point[0], point[1]), radius=0, color=(255, 0, 0), thickness=-1)
103 | 
104 |         cv2.imwrite(os.path.join(data_dir, 'landmark', os.path.basename(image_name)), image)
105 | 
106 | 
107 | def extract_face_emb(image_list, save_path, transforms_input):
108 |     facenet = InceptionResnetV1(pretrained='vggface2').eval().to('cuda')
109 | 
110 |     face_emb_dict = {}
111 | 
112 |     for i in tqdm(range(len(image_list))):
113 |         image_name = image_list[i]
114 |         image = Image.open(image_name).convert('RGB')
115 | 
116 |         input = transforms_input(image).to('cuda')
117 |         input = input.reshape(1, 3, 224, 224)
118 |         face_emb = facenet(input)
119 | 
120 |         face_emb_dict[image_name] = face_emb.squeeze().detach().to('cpu')
121 |     
122 |     with open(save_path, 'wb') as f:
123 |         pickle.dump(face_emb_dict, f)
124 | 
125 | 
126 | def load_face_emb(data_dir):
127 |     face_emb_dir = os.path.join(data_dir, 'face_emb.pkl')
128 | 
129 |     with open(face_emb_dir, 'rb') as f:
130 |         face_emb_dict = pickle.load(f)
131 |     face_emb_list = list(face_emb_dict.values())
132 |     return face_emb_list
133 | 
134 | def get_max_crop_region(crop_region_list):
135 |     top, bottom, left, right = np.inf, 0, np.inf, 0
136 | 
137 |     for t, b, l, r in crop_region_list:
138 |         if top > t:
139 |             top = t
140 | 
141 |         if bottom < b:
142 |             bottom = b
143 |         
144 |         if left > l:
145 |             left = l
146 |         
147 |         if right < r:
148 |             right = r
149 |     
150 |     return top, bottom, left, right


--------------------------------------------------------------------------------
/lipsync3d/audiodvp_utils/visualizer.py:
--------------------------------------------------------------------------------
 1 | import torchvision
 2 | from torch.utils.tensorboard import SummaryWriter
 3 | 
 4 | 
 5 | class Visualizer:
 6 |     def __init__(self, opt):
 7 | 
 8 |         self.opt = opt  # cache the option
 9 |         self.port = opt.display_port
10 |         self.writer = SummaryWriter()
11 | 
12 |     def display_current_results(self, visuals, steps):
13 |         for label, image in visuals.items():
14 |             self.writer.add_image(label, torchvision.utils.make_grid(image), steps)
15 | 
16 |     def plot_current_losses(self, total_iters, losses):
17 |         """display the current losses on tensorboard display: dictionary of error labels and values
18 |         Parameters:
19 |             total_iters(int) -- total_iters
20 |             losses (OrderedDict)  -- training losses stored in the format of (name, float) pairs
21 |         """
22 |         for label, loss in losses.items():
23 |             self.writer.add_scalar(label, loss, total_iters)
24 | 
25 |     def plot_current_texture(self, total_iters, predicted_mouths, gt_mouths):
26 |         self.writer.add_image('Training/Predicted Texture', torchvision.utils.make_grid(predicted_mouths), total_iters)
27 |         self.writer.add_image('Training/Ground Truth Texture', torchvision.utils.make_grid(gt_mouths), total_iters)
28 | 
29 |     def print_current_losses(self, epoch, iters, losses, t_comp, t_data):
30 |         """print current losses on console; also save the losses to the disk
31 |         Parameters:
32 |             epoch (int) -- current epoch
33 |             iters (int) -- current training iteration during this epoch (reset to 0 at the end of every epoch)
34 |             losses (OrderedDict) -- training losses stored in the format of (name, float) pairs
35 |             t_comp (float) -- computational time per data point (normalized by batch_size)
36 |             t_data (float) -- data loading time per data point (not normalized by batch_size)
37 |         """
38 |         message = '(epoch: %d, iters: %d, data: %.3f, comp: %.3f) ' % (epoch, iters, t_data, t_comp)
39 |         for k, v in losses.items():
40 |             message += '%s: %.9f ' % (k, v)
41 | 
42 |         print(message)  # print the message
43 | 


--------------------------------------------------------------------------------
/lipsync3d/combine-audioDVP.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import cv2\n",
 11 |     "import numpy as np\n",
 12 |     "from natsort import natsorted\n",
 13 |     "import torch\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "from PIL import Image\n",
 16 |     "from multiprocessing import Pool\n",
 17 |     "from sklearn.preprocessing import MinMaxScaler\n",
 18 |     "from skimage import exposure\n",
 19 |     "import math\n",
 20 |     "import shutil"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 6,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "def toTexture(image, gt_image, count):\n",
 30 |     "    \n",
 31 |     "    background = cv2.imread(gt_image)\n",
 32 |     "    groundTruth = background.copy()\n",
 33 |     "    texture_img = cv2.imread(image)\n",
 34 |     "    \n",
 35 |     "    texture_img_gray = cv2.cvtColor(texture_img, cv2.COLOR_BGR2GRAY)\n",
 36 |     "    th, texture_th = cv2.threshold(texture_img_gray, 30, 50, cv2.THRESH_BINARY_INV)\n",
 37 |     "    \n",
 38 |     "    texture_th_floodfill = texture_th.copy()\n",
 39 |     "    \n",
 40 |     "    mask = np.zeros((image_height + 2, image_width + 2), np.uint8)\n",
 41 |     "    \n",
 42 |     "    cv2.floodFill(texture_th_floodfill, mask, (0,0), 255)\n",
 43 |     "    \n",
 44 |     "    texture_th_floodfill_inv = cv2.bitwise_not(texture_th_floodfill)\n",
 45 |     "    \n",
 46 |     "    im_out = texture_th | texture_th_floodfill_inv\n",
 47 |     "    \n",
 48 |     "    index_texture = np.array(list(zip(*np.where(im_out == 255))))\n",
 49 |     "    \n",
 50 |     "    background[index_texture[:,0], index_texture[:,1]] = texture_img[index_texture[:,0], index_texture[:,1]]\n",
 51 |     "    \n",
 52 |     "    texture_result = cv2.seamlessClone(background, groundTruth, im_out, (image_height//2, image_width // 2), cv2.NORMAL_CLONE)\n",
 53 |     "    \n",
 54 |     "    cv2.imwrite(os.path.join(src_directory, 'merged_texture', '{}.jpg'.format(count)), texture_result)\n",
 55 |     "    "
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 11,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "image_height = 256\n",
 65 |     "image_width = 256\n",
 66 |     "texture_height = 280\n",
 67 |     "texture_width = 280\n",
 68 |     "src_directory = '../audioDVP_files/'"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 13,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "texture_image_files = natsorted([os.path.join(src_directory,'predicted_texture', x) for x in os.listdir(os.path.join(src_directory, 'predicted_texture'))])\n",
 78 |     "gt_frames = natsorted([os.path.join(src_directory, 'crop', x) for x in os.listdir(os.path.join(src_directory, 'crop'))])\n",
 79 |     "# reference_mouth = Image.fromarray(reference_mouth)\n",
 80 |     "count = [i for i in range(len(texture_image_files))]"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 17,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "if os.path.exists(os.path.join(src_directory, 'merged_texture')):\n",
 90 |     "    shutil.rmtree(os.path.join(src_directory, 'merged_texture'))\n",
 91 |     "os.makedirs(os.path.join(src_directory, 'merged_texture'))\n",
 92 |     "pool = Pool(processes=40)\n",
 93 |     "pool.starmap(toTexture, zip(texture_image_files, gt_frames, count))\n",
 94 |     "pool.terminate()\n",
 95 |     "pool.join()"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 24,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "0"
107 |       ]
108 |      },
109 |      "execution_count": 24,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "# os.system('ffmpeg -y -i {}/%d.jpg -i {} -c:v libopenh264 -r 25 {}'.format(os.path.join(src_directory, 'predicted_face'), os.path.join(src_directory, 'audio','audio.wav'), os.path.join(src_directory, 'results', 'predicted_face.mp4')))\n",
116 |     "os.system('ffmpeg -y -i {}/%d.jpg -c:v libx264 -crf 1 -r 25 {}'.format(os.path.join(src_directory, 'merged_texture'), os.path.join(src_directory, 'results', 'final_outcome.mp4')))\n",
117 |     "os.system('ffmpeg -y -i {} -i {} -c:v copy -c:a copy {}'.format(os.path.join(src_directory, 'results', 'final_outcome.mp4'), os.path.join(src_directory,'reenact_audio2bfm_1e_2.mp4'), os.path.join(src_directory, 'results', 'with_audio.mp4')))"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 66,
123 |    "metadata": {},
124 |    "outputs": [
125 |     {
126 |      "data": {
127 |       "text/plain": [
128 |        "0"
129 |       ]
130 |      },
131 |      "execution_count": 66,
132 |      "metadata": {},
133 |      "output_type": "execute_result"
134 |     }
135 |    ],
136 |    "source": [
137 |     "os.system('ffmpeg -y -i {}/%d.jpg -i {} -c:v libx264 -crf 1 -r 25 {}'.format(os.path.join(src_directory, 'merged_texture'), os.path.join(src_directory, 'audio','audio.wav'), os.path.join(src_directory, 'results', 'final_texture.mp4')))"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 47,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "text/plain": [
148 |        "0"
149 |       ]
150 |      },
151 |      "execution_count": 47,
152 |      "metadata": {},
153 |      "output_type": "execute_result"
154 |     }
155 |    ],
156 |    "source": [
157 |     "out = cv2.VideoWriter(os.path.join(src_directory, 'temp_mesh.mp4'), cv2.VideoWriter_fourcc(*'mp4v'), 25, (image_width, image_height))\n",
158 |     "\n",
159 |     "imageFiles = natsorted([os.path.join(src_directory, 'reenact_mesh_image', x) for x in os.listdir(os.path.join(src_directory, 'reenact_mesh_image'))])\n",
160 |     "\n",
161 |     "for im in imageFiles:\n",
162 |     "    image = cv2.imread(im)\n",
163 |     "    out.write(image)\n",
164 |     "\n",
165 |     "out.release()\n",
166 |     "os.system('ffmpeg -y -i {} -i {} -c:v copy -c:a aac {}'.format(os.path.join(src_directory, 'temp_mesh.mp4')\n",
167 |     ", os.path.join(src_directory, 'audio','audio.wav'), os.path.join(src_directory, 'results', 'predicted_mesh.mp4')\n",
168 |     "))"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 17,
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "data": {
178 |       "text/plain": [
179 |        "0"
180 |       ]
181 |      },
182 |      "execution_count": 17,
183 |      "metadata": {},
184 |      "output_type": "execute_result"
185 |     }
186 |    ],
187 |    "source": [
188 |     "os.system('ffmpeg -y -i {}/results/predicted_face.mp4 -i {}/results/predicted_mesh.mp4 -filter_complex hstack -c:v libopenh264 {}/results/face_mesh_comparison.mp4'.format(src_directory,src_directory,src_directory))"
189 |    ]
190 |   }
191 |  ],
192 |  "metadata": {
193 |   "interpreter": {
194 |    "hash": "9968410507dc6acd82900f38c76c24d3f252bf51bc1b0c3680d51a23b0e86376"
195 |   },
196 |   "kernelspec": {
197 |    "display_name": "Python 3",
198 |    "language": "python",
199 |    "name": "python3"
200 |   },
201 |   "language_info": {
202 |    "codemirror_mode": {
203 |     "name": "ipython",
204 |     "version": 3
205 |    },
206 |    "file_extension": ".py",
207 |    "mimetype": "text/x-python",
208 |    "name": "python",
209 |    "nbconvert_exporter": "python",
210 |    "pygments_lexer": "ipython3",
211 |    "version": "3.8.11"
212 |   }
213 |  },
214 |  "nbformat": 4,
215 |  "nbformat_minor": 2
216 | }
217 | 


--------------------------------------------------------------------------------
/lipsync3d/dataset.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from hparams import hparams
 4 | 
 5 | sys.path.append('/home/server01/jyeongho_workspace/3d_face_gcns/')
 6 | 
 7 | import os
 8 | import torch
 9 | import numpy as np
10 | import librosa
11 | from utils import landmarkdict_to_normalized_mesh_tensor, landmarkdict_to_mesh_tensor
12 | from audiodvp_utils import util
13 | from torch.utils.data import Dataset
14 | from natsort import natsorted
15 | import torchvision.transforms as transforms
16 | import cv2
17 | from PIL import Image
18 | import audio as audioLibrary
19 | import random
20 | 
21 | class Lipsync3DMeshDataset(Dataset):
22 |     def __init__(self, opt):
23 |         super().__init__()
24 |         self.opt = opt
25 |         self.src_dir = opt.src_dir
26 |         self.tgt_dir = opt.tgt_dir
27 | 
28 |         self.stabilized_mesh = [os.path.join(self.tgt_dir, 'stabilized_norm_mesh', x) for x in natsorted(os.listdir(os.path.join(self.tgt_dir, 'stabilized_norm_mesh')))]
29 | 
30 | 
31 |         stft_path = os.path.join(self.src_dir, 'audio/audio_stft.pt')
32 |         if not os.path.exists(stft_path):
33 |             audio = librosa.load(os.path.join(self.src_dir, 'audio/audio.wav'),16000)[0]
34 |             audio_stft = librosa.stft(audio, n_fft=510, hop_length=160, win_length=480)
35 |             self.audio_stft = torch.from_numpy(np.stack((audio_stft.real, audio_stft.imag)))
36 |             torch.save(self.audio_stft, os.path.join(self.src_dir, 'audio/audio_stft.pt'))
37 |         else:
38 |             self.audio_stft = torch.load(os.path.join(self.src_dir, 'audio/audio_stft.pt'))
39 |         
40 |         self.mesh_dict_list = util.load_coef(os.path.join(self.tgt_dir, 'mesh_dict'))
41 |         self.filenames = util.get_file_list(os.path.join(self.tgt_dir, 'mesh_dict'))
42 |         reference_mesh_dict = torch.load(os.path.join(self.tgt_dir, 'reference_mesh.pt'))
43 | 
44 |         self.reference_mesh = landmarkdict_to_normalized_mesh_tensor(reference_mesh_dict)
45 |         
46 |         if opt.isTrain:
47 |             minlen = min(len(self.mesh_dict_list), self.audio_stft.shape[2] // 4)
48 |             train_idx = int(minlen * self.opt.train_rate)
49 |             self.mesh_dict_list = self.mesh_dict_list[:train_idx]
50 |             self.filenames = self.filenames[:train_idx]
51 | 
52 |         print('Training set size: ', len(self.filenames))
53 | 
54 |     def __len__(self):
55 |         return min(self.audio_stft.shape[2] // 4, len(self.filenames))
56 | 
57 |     def __getitem__(self, index):
58 | 
59 |         audio_idx = index * 4
60 | 
61 |         audio_feature_list = []
62 |         for i in range(audio_idx - 12, audio_idx + 12):
63 |             if i < 0:
64 |                 audio_feature_list.append(self.audio_stft[:, :, 0])
65 |             elif i >= self.audio_stft.shape[2]:
66 |                 audio_feature_list.append(self.audio_stft[:, :, -1])
67 |             else:
68 |                 audio_feature_list.append(self.audio_stft[:, :, i])
69 | 
70 |         audio_feature = torch.stack(audio_feature_list, 2)
71 | 
72 |         filename = os.path.basename(self.filenames[index])
73 | 
74 |         if not self.opt.isTrain:
75 |             landmark_dict = self.mesh_dict_list[index]
76 |             normalized_mesh = landmarkdict_to_normalized_mesh_tensor(landmark_dict)
77 |             # stabilized_mesh = torch.tensor(torch.load(self.stabilized_mesh[index]))
78 | 
79 |             R = torch.from_numpy(landmark_dict['R']).float()
80 |             t = torch.from_numpy(landmark_dict['t']).float()
81 |             c = float(landmark_dict['c'])
82 | 
83 |             return {'audio_feature': audio_feature, 'filename': filename, 
84 |                     'reference_mesh': self.reference_mesh, 'normalized_mesh': normalized_mesh,
85 |                     'R': R, 't': t, 'c': c}
86 | 
87 |         else:
88 |             landmark_dict = self.mesh_dict_list[index]
89 |             normalized_mesh = landmarkdict_to_normalized_mesh_tensor(landmark_dict)
90 |             # stabilized_mesh = torch.tensor(torch.load(self.stabilized_mesh[index]))
91 |             return {
92 |                 'audio_feature': audio_feature, 'filename': filename,
93 |                 'reference_mesh' : self.reference_mesh, 'normalized_mesh': normalized_mesh
94 |             }


--------------------------------------------------------------------------------
/lipsync3d/demo.sh:
--------------------------------------------------------------------------------
 1 | set -ex
 2 | 
 3 | # set data path
 4 | # target_dir : directory for training data
 5 | # source_dir : directory for inference data, put test audio in source_dir/audio directory
 6 | # video_dir : path for training video
 7 | 
 8 | target_dir="data/kkj/kkj04_lipsync3d"
 9 | source_dir="data/kkj/kkj04_lipsync3d"
10 | video_dir="data/kkj/kkj04_lipsync3d/KKJ_slow_04_stand.mp4"
11 | 
12 | 
13 | # set video clip duration
14 | start_time="00:00:00"
15 | end_time="240"
16 | 
17 | # mkdir -p $target_dir/full
18 | # mkdir -p $target_dir/crop
19 | # mkdir -p $target_dir/audio
20 | # mkdir -p $target_dir/results
21 | # mkdir -p $source_dir/audio
22 | # mkdir -p $source_dir/results
23 | 
24 | # 1. Take all frames and audio of training data
25 | # warning! the number of extracted frames should be dividable by 5. 
26 | # If the number of frames of training video is not dividable by 5, delete some frames manually to make the number of frames dividable by 5
27 | 
28 | # ffmpeg -hide_banner -y -i $video_dir -r 25 $target_dir/full/%05d.png
29 | # ffmpeg -hide_banner -y -i $video_dir -ar 16000 $target_dir/audio/audio.wav
30 | 
31 | # # crop and resize video frames
32 | # python audiodvp_utils/crop_portrait.py \
33 | #     --data_dir $target_dir \
34 | #     --crop_level 1.5 \
35 | #     --vertical_adjust 0.2
36 | 
37 | # pose normalization
38 | # python lipsync3d/pose_normalization.py --data_dir $target_dir --gpu_ids 0
39 | 
40 | # train lipsync3d net
41 | # python lipsync3d/train.py --src_dir $target_dir --tgt_dir $target_dir
42 | 
43 | # test lipsync3d net
44 | python lipsync3d/test.py \
45 |     --batch_size 1 \
46 |     --serial_batches False \
47 |     --isTrain False \
48 |     --gpu_ids 0 \
49 |     --src_dir $source_dir \
50 |     --tgt_dir $target_dir
51 | 
52 | # ffmpeg -y -loglevel warning \
53 | #     -thread_queue_size 8192 -i $target_dir/mesh_image/%05d.png \
54 | #     -thread_queue_size 8192 -i $source_dir/reenact_mesh_image/%05d.png \
55 | #     -i $source_dir/audio/audio.wav \
56 | #     -filter_complex hstack=inputs=2 -shortest -vcodec libx264 -preset slower -profile:v high -crf 18 -pix_fmt yuv420p $source_dir/results/tgt_kkj04_src_kkj00_mesh_reenact.mp4
57 | 


--------------------------------------------------------------------------------
/lipsync3d/hparams.py:
--------------------------------------------------------------------------------
  1 | from glob import glob
  2 | import os
  3 | 
  4 | def get_image_list(data_root, split):
  5 | 	filelist = []
  6 | 
  7 | 	with open('filelists/{}.txt'.format(split)) as f:
  8 | 		for line in f:
  9 | 			line = line.strip()
 10 | 			if ' ' in line: line = line.split()[0]
 11 | 			filelist.append(os.path.join(data_root, line))
 12 | 
 13 | 	return filelist
 14 | 
 15 | class HParams:
 16 | 	def __init__(self, **kwargs):
 17 | 		self.data = {}
 18 | 
 19 | 		for key, value in kwargs.items():
 20 | 			self.data[key] = value
 21 | 
 22 | 	def __getattr__(self, key):
 23 | 		if key not in self.data:
 24 | 			raise AttributeError("'HParams' object has no attribute %s" % key)
 25 | 		return self.data[key]
 26 | 
 27 | 	def set_hparam(self, key, value):
 28 | 		self.data[key] = value
 29 | 
 30 | 
 31 | # Default hyperparameters
 32 | hparams = HParams(
 33 | 	num_mels=80,  # Number of mel-spectrogram channels and local conditioning dimensionality
 34 | 	#  network
 35 | 	rescale=True,  # Whether to rescale audio prior to preprocessing
 36 | 	rescaling_max=0.9,  # Rescaling value
 37 | 	
 38 | 	# Use LWS (https://github.com/Jonathan-LeRoux/lws) for STFT and phase reconstruction
 39 | 	# It"s preferred to set True to use with https://github.com/r9y9/wavenet_vocoder
 40 | 	# Does not work if n_ffit is not multiple of hop_size!!
 41 | 	use_lws=False,
 42 | 	
 43 | 	n_fft=800,  # Extra window size is filled with 0 paddings to match this parameter
 44 | 	hop_size=200,  # For 16000Hz, 200 = 12.5 ms (0.0125 * sample_rate)
 45 | 	win_size=800,  # For 16000Hz, 800 = 50 ms (If None, win_size = n_fft) (0.05 * sample_rate)
 46 | 	sample_rate=16000,  # 16000Hz (corresponding to librispeech) (sox --i <filename>)
 47 | 	
 48 | 	frame_shift_ms=None,  # Can replace hop_size parameter. (Recommended: 12.5)
 49 | 	
 50 | 	# Mel and Linear spectrograms normalization/scaling and clipping
 51 | 	signal_normalization=True,
 52 | 	# Whether to normalize mel spectrograms to some predefined range (following below parameters)
 53 | 	allow_clipping_in_normalization=True,  # Only relevant if mel_normalization = True
 54 | 	symmetric_mels=True,
 55 | 	# Whether to scale the data to be symmetric around 0. (Also multiplies the output range by 2, 
 56 | 	# faster and cleaner convergence)
 57 | 	max_abs_value=4.,
 58 | 	# max absolute value of data. If symmetric, data will be [-max, max] else [0, max] (Must not 
 59 | 	# be too big to avoid gradient explosion, 
 60 | 	# not too small for fast convergence)
 61 | 	# Contribution by @begeekmyfriend
 62 | 	# Spectrogram Pre-Emphasis (Lfilter: Reduce spectrogram noise and helps model certitude 
 63 | 	# levels. Also allows for better G&L phase reconstruction)
 64 | 	preemphasize=True,  # whether to apply filter
 65 | 	preemphasis=0.97,  # filter coefficient.
 66 | 	
 67 | 	# Limits
 68 | 	min_level_db=-100,
 69 | 	ref_level_db=20,
 70 | 	fmin=55,
 71 | 	# Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To 
 72 | 	# test depending on dataset. Pitch info: male~[65, 260], female~[100, 525])
 73 | 	fmax=7600,  # To be increased/reduced depending on data.
 74 | 
 75 | 	###################### Our training parameters #################################
 76 | 	img_size=96,
 77 | 	fps=25,
 78 | 	
 79 | 	batch_size=24,
 80 | 	initial_learning_rate=5e-3,
 81 | 	nepochs=200000000000000000,  ### ctrl + c, stop whenever eval loss is consistently greater than train loss for ~10 epochs
 82 | 	num_workers=40,
 83 | 	checkpoint_interval=10000,
 84 | 	eval_interval=10000,
 85 |     save_optimizer_state=True,
 86 | 
 87 |     syncnet_wt=0.01, # is initially zero, will be set automatically to 0.03 later. Leads to faster convergence. 
 88 | 	syncnet_batch_size=64,
 89 | 	syncnet_lr=1e-2,
 90 | 	syncnet_eval_interval=4000,
 91 | 	syncnet_checkpoint_interval=4000,
 92 | 
 93 | 	disc_wt=0.05,
 94 | 	disc_initial_learning_rate=1e-4,
 95 | )
 96 | 
 97 | 
 98 | def hparams_debug_string():
 99 | 	values = hparams.values()
100 | 	hp = ["  %s: %s" % (name, values[name]) for name in sorted(values) if name != "sentences"]
101 | 	return "Hyperparameters:\n" + "\n".join(hp)
102 | 


--------------------------------------------------------------------------------
/lipsync3d/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class L2Loss(nn.Module):
 5 |     def __init__(self):
 6 |         super(L2Loss, self).__init__()
 7 |     
 8 |     def forward(self, input, target):
 9 |         l2_loss = (target - input) ** 2
10 |         l2_loss = torch.mean(l2_loss)
11 | 
12 |         return l2_loss
13 | 
14 | class L1Loss(nn.Module):
15 |     def __init__(self):
16 |         super(L1Loss, self).__init__()
17 |     
18 |     def forward(self, input, target):
19 |         
20 |         return (torch.abs(input - target)).mean()


--------------------------------------------------------------------------------
/lipsync3d/model.py:
--------------------------------------------------------------------------------
 1 | from cv2 import getOptimalNewCameraMatrix
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.nn import functional as F
 5 | 
 6 | class View(nn.Module):
 7 |     def __init__(self, shape):
 8 |         super(View, self).__init__()
 9 |         self.shape = shape
10 |     
11 |     def forward(self, x):
12 |         return x.view(*self.shape)
13 | 
14 | class Lipsync3DMesh(nn.Module):
15 |     def __init__(self):
16 |         super().__init__()
17 | 
18 |         #TODO
19 |         self.AudioEncoder = nn.Sequential(
20 |             # Define Network Architecture (Hint: Architecture mentioned in the paper, Change in latent space dimensions are as follows)
21 |             # 2 x 256 x 24 -> 72 x 128 x 24
22 |             # 72 x 128 x 24 -> 108 x 64 x 24
23 |             # 108 x 64 x 24 -> 162 x 32 x 24
24 |             # 162 x 32 x 24 -> 243 x 16 x 24
25 |             # 243 x 16 x 24 -> 256 x 8 x 24
26 |             # 256 x 8 x 24 -> 256 x 4 x 24
27 |             # 256 x 4 x 24 -> 128 x 4 x 13
28 |             # 128 x 4 x 13 -> 64 x 4 x 8
29 |             # 64 x 4 x 8 -> 32 x 4 x 5
30 |             # 32 x 4 x 5 -> 16 x 4 x 4
31 |             # 16 x 4 x 4 -> 8 x 4 x 3
32 |             # 8 x 4 x 3 -> 4 x 4 x 2
33 |             View([-1, 32]),
34 |         )
35 | 
36 |         self.GeometryDecoder = nn.Sequential(
37 |             nn.Linear(32, 150),
38 |             nn.Dropout(0.5),
39 |             nn.Linear(150, 1434)
40 |         )
41 | 
42 |     def forward(self, spec, latentMode=False):
43 |         # spec : B x 2 x 256 x 24
44 |         # texture : B x 3 x 128 x 128
45 | 
46 |         latent = self.AudioEncoder(spec)
47 |         if latentMode:
48 |             return latent
49 |         geometry_diff = self.GeometryDecoder(latent)
50 | 
51 |         return geometry_diff


--------------------------------------------------------------------------------
/lipsync3d/options.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch
 3 | 
 4 | 
 5 | class Options:
 6 |     def __init__(self):
 7 |         self.parser = argparse.ArgumentParser()
 8 |         self.parser.add_argument('--data_dir', type=str, default=None)
 9 |         self.parser.add_argument('--src_dir', type=str, default=None)
10 |         self.parser.add_argument('--tgt_dir', type=str, default=None)
11 | 
12 |         self.parser.add_argument('--train_rate', type=float, default=0.8)
13 |         self.parser.add_argument('--num_epoch', type=int, default=250)
14 |         self.parser.add_argument('--batch_size', type=int, default=128)
15 |         self.parser.add_argument('--serial_batches', type=self.str2bool, default=False)
16 |         self.parser.add_argument('--num_workers', type=int, default=4)
17 |         self.parser.add_argument('--isTrain', type=self.str2bool, default=True)
18 |         self.parser.add_argument('--lr', type=float, default=2e-5, help='initial learning rate for adam')
19 |         self.parser.add_argument('--lambda_geo', type=float, default=0.3)
20 |         self.parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU')
21 | 
22 |         self.parser.add_argument('--display_port', type=int, default=11111, help='tensorboard port of the web display')
23 |         self.parser.add_argument('--display_freq', type=int, default=2000, help='frequency of showing training results on screen')
24 |         self.parser.add_argument('--print_freq', type=int, default=200, help='frequency of showing training results on console')
25 |         self.parser.add_argument('--freeze_mesh', type=bool, default=False, help='Choose if you want to freeze mesh training pipeline or not')
26 |         self.parser.add_argument('--load_model', type=bool, default=False, help='Load model from the checkpoint')
27 |         self.parser.add_argument('--model_name', type=str, default=None, help='Name of the checkpoint file')
28 |         self.parser.add_argument('--mesh_model_path', type=str, default='', help='Path of the mesh model checkpoint file')
29 |         self.parser.add_argument('--checkpoint_interval', type=int, default=10, help='Checkpoint interval')
30 |         
31 | 
32 |     def parse_args(self):
33 |         self.args = self.parser.parse_args()
34 |         self.args.device = torch.device('cuda:{}'.format(self.args.gpu_ids[0])) if self.args.gpu_ids else torch.device('cpu')
35 |         return self.args
36 | 
37 |     def str2bool(self, v):
38 |         if isinstance(v, bool):
39 |             return v
40 |         if v.lower() in ('yes', 'true', 't', 'y', '1'):
41 |             return True
42 |         elif v.lower() in ('no', 'false', 'f', 'n', '0'):
43 |             return False
44 |         else:
45 |             raise argparse.ArgumentTypeError('Boolean value expected.')
46 | 


--------------------------------------------------------------------------------
/lipsync3d/pose_normalization.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Crop upper boddy in every video frame, square bounding box is averaged among all frames and fixed.
  3 | """
  4 | import sys
  5 | import os
  6 | import cv2
  7 | import argparse
  8 | import math
  9 | from tqdm import tqdm
 10 | import torch
 11 | import utils
 12 | from utils import landmark_to_dict
 13 | import numpy as np
 14 | import cv2
 15 | import mediapipe as mp
 16 | import matplotlib.pyplot as plt
 17 | from audiodvp_utils import util
 18 | import mediapipe.python.solutions.face_mesh as mp_face_mesh
 19 | import mediapipe.python.solutions.drawing_utils as mp_drawing
 20 | import mediapipe.python.solutions.drawing_styles as mp_drawing_styles
 21 | from multiprocessing import Pool
 22 | 
 23 | 
 24 | def get_reference_dict(data_dir):
 25 |     image = cv2.imread(os.path.join(data_dir, 'reference_frame.png'))
 26 |     image_rows, image_cols, _ = image.shape
 27 | 
 28 |     with mp_face_mesh.FaceMesh(
 29 |         static_image_mode=True,
 30 |         max_num_faces=1,
 31 |         refine_landmarks=True,
 32 |         min_detection_confidence=0.5) as face_mesh:
 33 | 
 34 |         results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
 35 |         reference_dict = landmark_to_dict(results.multi_face_landmarks[0].landmark)
 36 |         reference_dict = normalized_to_pixel_coordinates(reference_dict, image_cols, image_rows)
 37 |     return reference_dict
 38 | 
 39 | def draw_landmark(results, image, save_path):
 40 |     drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
 41 | 
 42 |     mp_drawing.draw_landmarks(
 43 |         image=image,
 44 |         landmark_list=results.multi_face_landmarks[0],
 45 |         connections=mp_face_mesh.FACEMESH_TESSELATION,
 46 |         landmark_drawing_spec=None,
 47 |         connection_drawing_spec=mp_drawing_styles
 48 |         .get_default_face_mesh_tesselation_style())
 49 | 
 50 |     cv2.imwrite(save_path, image)
 51 | 
 52 | 
 53 | def normalized_to_pixel_coordinates(landmark_dict, image_width, image_height):
 54 |     def is_valid_normalized_value(value):
 55 |         return (value > 0 or math.isclose(0, value)) and (value < 1 or math.isclose(1, value))
 56 | 
 57 |     landmark_pixel_coord_dict = {}
 58 | 
 59 |     for idx, coord in landmark_dict.items():
 60 |         if (idx == 'R') or (idx == 't') or (idx == 'c'):
 61 |             continue
 62 | 
 63 |         if not (is_valid_normalized_value(coord[0]) and
 64 |                 is_valid_normalized_value(coord[1])):
 65 |             # TODO: Draw coordinates even if it's outside of the image bounds.
 66 |             return None
 67 |         x_px = coord[0] * image_width
 68 |         y_px = coord[1] * image_height
 69 |         z_px = coord[2] * image_width
 70 |         landmark_pixel_coord_dict[idx] = [x_px, y_px, z_px]
 71 |     return landmark_pixel_coord_dict
 72 | 
 73 | 
 74 | def draw_pose_normalized_mesh(target_dict, image, save_path):
 75 |     connections = mp_face_mesh.FACEMESH_TESSELATION
 76 |     drawing_spec = mp_drawing.DrawingSpec(color= mp_drawing.BLACK_COLOR, thickness=1, circle_radius=1)
 77 | 
 78 |     image_rows, image_cols, _ = image.shape
 79 |     R = target_dict['R']
 80 |     t = target_dict['t']
 81 |     c = target_dict['c']
 82 | 
 83 |     idx_to_coordinates = {}
 84 |     for idx, coord in target_dict.items():
 85 |         if (idx == 'R') or (idx == 't') or (idx == 'c'):
 86 |             continue
 87 |         tgt = np.array(coord).reshape(3, 1)
 88 |         norm_tgt = (c * np.matmul(R, tgt) + t).squeeze()
 89 |         x_px = min(math.floor(norm_tgt[0]), image_cols - 1)
 90 |         y_px = min(math.floor(norm_tgt[1]), image_rows - 1)
 91 |         landmark_px = (x_px, y_px)
 92 |         if landmark_px:
 93 |             idx_to_coordinates[idx] = landmark_px
 94 |     
 95 |     white_image = np.zeros([image_rows, image_cols, 3], dtype=np.uint8)
 96 |     white_image[:] = 255
 97 |     for connection in connections:
 98 |         start_idx = connection[0]
 99 |         end_idx = connection[1]
100 | 
101 |         if start_idx in idx_to_coordinates and end_idx in idx_to_coordinates:
102 |             cv2.line(white_image, 
103 |                 idx_to_coordinates[start_idx],
104 |                 idx_to_coordinates[end_idx], 
105 |                 drawing_spec.color,
106 |                 drawing_spec.thickness
107 |             )
108 |     cv2.imwrite(save_path, white_image)
109 | 
110 | 
111 | def draw_3d_mesh(target_dict, save_path, elevation=10, azimuth=10):
112 |     connections = mp_face_mesh.FACEMESH_TESSELATION
113 |     drawing_spec = mp_drawing.DrawingSpec(color= mp_drawing.BLACK_COLOR, thickness=1, circle_radius=1)
114 | 
115 |     plt.figure(figsize=(10, 10))
116 |     ax = plt.axes(projection='3d')
117 |     ax.view_init(elev=elevation, azim=azimuth)
118 |     plotted_landmarks = {}
119 | 
120 |     for idx, coord in target_dict.items():
121 |         if (idx == 'R') or (idx == 't') or (idx == 'c'):
122 |             continue
123 |         plotted_landmarks[idx] = (-coord[2], coord[0], -coord[1])
124 | 
125 |     for connection in connections:
126 |         start_idx = connection[0]
127 |         end_idx = connection[1]
128 | 
129 |         if start_idx in plotted_landmarks and end_idx in plotted_landmarks:
130 |             landmark_pair = [plotted_landmarks[start_idx], plotted_landmarks[end_idx]]
131 |             ax.plot3D(
132 |                 xs=[landmark_pair[0][0], landmark_pair[1][0]],
133 |                 ys=[landmark_pair[0][1], landmark_pair[1][1]],
134 |                 zs=[landmark_pair[0][2], landmark_pair[1][2]],
135 |                 color=(0., 0., 1.),
136 |                 linewidth=1)
137 |     plt.savefig(save_path)
138 | 
139 | def multiProcess(im, data_dir, reference_dict):
140 |     with mp_face_mesh.FaceMesh(
141 |     max_num_faces=1,
142 |     refine_landmarks=True,
143 |     min_detection_confidence=0.5,
144 |     min_tracking_confidence=0.5) as face_mesh:
145 |         image = cv2.imread(im)
146 |         annotated_image = image.copy()
147 |         image_rows, image_cols, _ = image.shape
148 |         results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
149 |         target_dict = landmark_to_dict(results.multi_face_landmarks[0].landmark)
150 |         target_dict = normalized_to_pixel_coordinates(target_dict, image_cols, image_rows)
151 |         R, t, c = utils.Umeyama_algorithm(reference_dict, target_dict)
152 |         target_dict['R'] = R
153 |         target_dict['t'] = t
154 |         target_dict['c'] = c
155 |         torch.save(target_dict, os.path.join(data_dir, 'mesh_dict', os.path.basename(im))[:-4]+'.pt')
156 | 
157 |         if args.draw_mesh:
158 |             img_save_path = os.path.join(data_dir, 'mesh_image', os.path.basename(im)[:-4] + '.png')
159 |             draw_landmark(results, annotated_image, img_save_path)
160 | 
161 |         if args.draw_norm_mesh:
162 |             img_save_path = os.path.join(data_dir, 'mesh_norm_image', os.path.basename(im)[:-4] + '.png')
163 |             draw_pose_normalized_mesh(target_dict, annotated_image, img_save_path)
164 | 
165 |         if args.draw_norm_3d_mesh:
166 |             img_save_path = os.path.join(data_dir, 'mesh_norm_3d_image', os.path.basename(im)[:-4] + '.png')
167 |             draw_3d_mesh(target_dict, img_save_path, elevation=10, azimuth=10)
168 | 
169 | def pose_normalization(args):
170 |     data_dir = args.data_dir
171 |     image_list = util.get_file_list(os.path.join(data_dir, 'crop'))
172 |     reference_dict = get_reference_dict(data_dir)
173 |     torch.save(reference_dict, os.path.join(data_dir, 'reference_mesh.pt'))
174 | 
175 |     data_dirs = []
176 |     reference_dicts = []
177 | 
178 |     for i in range(len(image_list)):
179 |         data_dirs.append(data_dir)
180 |         reference_dicts.append(reference_dict)
181 | 
182 |     pool = Pool(processes=40)
183 |     pool.starmap(multiProcess, zip(image_list, data_dirs, reference_dicts))
184 | 
185 |     # with mp_face_mesh.FaceMesh(
186 |     # max_num_faces=1,
187 |     # refine_landmarks=True,
188 |     # min_detection_confidence=0.5,
189 |     # min_tracking_confidence=0.5) as face_mesh:
190 |         # for i in tqdm(range(len(image_list))):
191 |         #     image = cv2.imread(image_list[i])
192 |         #     annotated_image = image.copy()
193 |         #     image_rows, image_cols, _ = image.shape
194 |         #     results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
195 |         #     target_dict = landmark_to_dict(results.multi_face_landmarks[0].landmark)
196 |         #     target_dict = normalized_to_pixel_coordinates(target_dict, image_cols, image_rows)
197 |         #     R, t, c = utils.Umeyama_algorithm(reference_dict, target_dict)
198 |         #     target_dict['R'] = R
199 |         #     target_dict['t'] = t
200 |         #     target_dict['c'] = c
201 |         #     torch.save(target_dict, os.path.join(data_dir, 'mesh_dict', os.path.basename(image_list[i]))[:-4]+'.pt')
202 | 
203 |         #     if args.draw_mesh:
204 |         #         img_save_path = os.path.join(data_dir, 'mesh_image', os.path.basename(image_list[i])[:-4] + '.png')
205 |         #         draw_landmark(results, annotated_image, img_save_path)
206 | 
207 |         #     if args.draw_norm_mesh:
208 |         #         img_save_path = os.path.join(data_dir, 'mesh_norm_image', os.path.basename(image_list[i])[:-4] + '.png')
209 |         #         draw_pose_normalized_mesh(target_dict, annotated_image, img_save_path)
210 | 
211 |         #     if args.draw_norm_3d_mesh:
212 |         #         img_save_path = os.path.join(data_dir, 'mesh_norm_3d_image', os.path.basename(image_list[i])[:-4] + '.png')
213 |         #         draw_3d_mesh(target_dict, img_save_path, elevation=10, azimuth=10)
214 | 
215 | 
216 | def create_dirs(opt):
217 |     os.makedirs(os.path.join(args.data_dir, 'mesh_dict'), exist_ok=True)
218 |     if opt.draw_mesh:
219 |         os.makedirs(os.path.join(args.data_dir, 'mesh_image'), exist_ok=True)
220 |     
221 |     if opt.draw_norm_mesh:
222 |         os.makedirs(os.path.join(args.data_dir, 'mesh_norm_image'), exist_ok=True)
223 | 
224 |     if opt.draw_norm_3d_mesh:
225 |         os.makedirs(os.path.join(args.data_dir, 'mesh_norm_3d_image'), exist_ok=True)
226 | 
227 | if __name__ == '__main__':
228 |     parser = argparse.ArgumentParser(description='Process some integers.')
229 |     parser.add_argument('--data_dir', type=str, default=None)
230 |     parser.add_argument('--draw_mesh', type=bool, default=False)
231 |     parser.add_argument('--draw_norm_mesh', type=bool, default=False)
232 |     parser.add_argument('--draw_norm_3d_mesh', type=bool, default=False)
233 |     args = parser.parse_args()
234 | 
235 |     create_dirs(args)
236 |     pose_normalization(args)
237 | 


--------------------------------------------------------------------------------
/lipsync3d/test.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('/home/server01/jyeongho_workspace/3d_face_gcns/')
  3 | 
  4 | import torch
  5 | from torch.utils.data import DataLoader
  6 | from options import Options
  7 | from dataset import Lipsync3DMeshDataset
  8 | from model import Lipsync3DMesh
  9 | from loss import L2Loss
 10 | import time
 11 | from utils import mesh_tensor_to_landmarkdict, draw_mesh_images
 12 | import os
 13 | from tqdm import tqdm
 14 | import cv2
 15 | import shutil
 16 | 
 17 | 
 18 | if __name__ == '__main__':
 19 |     opt = Options().parse_args()
 20 |     device = opt.device
 21 |     calculate_test_loss = (opt.src_dir == opt.tgt_dir)
 22 |     dataset = Lipsync3DMeshDataset(opt)
 23 |     test_dataloader = torch.utils.data.DataLoader(
 24 |         dataset,
 25 |         batch_size=opt.batch_size,
 26 |         shuffle=False,  # default not shuffle
 27 |         num_workers=opt.num_workers,
 28 |         drop_last=True  # the batch size cannot change during the training so the last uncomplete batch need to be dropped
 29 |     )
 30 | 
 31 |     model = Lipsync3DMesh().to(device)
 32 |     criterionGeo = L2Loss()
 33 | 
 34 |     if opt.model_name is not None:
 35 |         state_dict = torch.load(os.path.join(opt.tgt_dir, opt.model_name))
 36 |         audioEncoder_state = {}
 37 |         geometryDecoder_state = {}
 38 | 
 39 |         for key, value in state_dict.items():
 40 |             if 'AudioEncoder' in key:
 41 |                 audioEncoder_state[key.replace('AudioEncoder.', '')] = value
 42 |             if 'GeometryDecoder' in key:
 43 |                 geometryDecoder_state[key.replace('GeometryDecoder.', '')] = value
 44 |         model.AudioEncoder.load_state_dict(audioEncoder_state)
 45 |         model.GeometryDecoder.load_state_dict(geometryDecoder_state)
 46 |     else:
 47 |         raise ValueError('No checkpoint specified')
 48 | 
 49 |     def emptyFolder(path):
 50 |         if os.path.exists(path):
 51 |             shutil.rmtree(path)
 52 |         os.makedirs(path, exist_ok=True)
 53 | 
 54 |     ckpt = torch.load(os.path.join(opt.tgt_dir, opt.model_name), map_location=device)
 55 |     model.load_state_dict(ckpt)
 56 |     
 57 |     emptyFolder(os.path.join(opt.src_dir, 'reenact_mesh'))
 58 |     emptyFolder(os.path.join(opt.src_dir, 'reenact_mesh_image'))
 59 |     emptyFolder(os.path.join(opt.src_dir, 'reenact_texture'))
 60 |     emptyFolder(os.path.join(opt.src_dir, 'predicted_normalised_mesh'))
 61 |     
 62 |     avg_loss = 0
 63 | 
 64 |     # previous_texture = torch.zeros((1, 3, 140, 280)).to(device)
 65 |     with torch.no_grad():
 66 |         model.eval()
 67 |         for i, data in enumerate(tqdm(test_dataloader)):
 68 |             audio_feature = data['audio_feature'].to(device)
 69 |             reference_mesh = data['reference_mesh'].to(device)
 70 |             normalized_mesh = data['normalized_mesh'].to(device)
 71 |             filename = data['filename'][0]
 72 |             R = data['R'][0].to(device)
 73 |             RT = R.transpose(0, 1)
 74 |             t = data['t'][0].to(device)
 75 |             c = data['c'][0].to(device)
 76 | 
 77 |             geometry_diff = model(audio_feature)
 78 |             geometry_diff = geometry_diff.reshape(-1, 478, 3)
 79 |             geometry = reference_mesh + geometry_diff
 80 | 
 81 |             if calculate_test_loss and (i > int(len(test_dataloader) * opt.train_rate)):
 82 |                 geoLoss = criterionGeo(geometry, normalized_mesh)
 83 |                 avg_loss += geoLoss.detach() / int(len(test_dataloader) * (1 - opt.train_rate))
 84 | 
 85 |             geometry = geometry[0].transpose(0, 1)
 86 |             normlaised_geometry = geometry.clone().detach()
 87 |             normalised_landmark_dict = mesh_tensor_to_landmarkdict(normlaised_geometry)
 88 |             
 89 |             geometry = (torch.matmul(RT, (geometry - t)) / c).transpose(0, 1).cpu().detach()
 90 |             landmark_dict = mesh_tensor_to_landmarkdict(geometry)
 91 |             
 92 |             # save_image(predicted_mouth[0], os.path.join(opt.src_dir, 'reenact_texture',filename.split('.')[0]+'.jpg'))
 93 |             torch.save(normalised_landmark_dict, os.path.join(opt.src_dir,'predicted_normalised_mesh',filename))
 94 |             torch.save(landmark_dict, os.path.join(opt.src_dir, 'reenact_mesh', filename))
 95 |     
 96 |     if calculate_test_loss:
 97 |         print('Average Test loss : ', avg_loss)
 98 | 
 99 |     print('Start drawing reenact mesh')
100 |     image = cv2.imread(os.path.join(opt.tgt_dir, 'reference_frame.png'))
101 |     image_rows, image_cols, _ = image.shape
102 |     draw_mesh_images(os.path.join(opt.src_dir, 'reenact_mesh'), os.path.join(opt.src_dir, 'reenact_mesh_image'), image_rows, image_cols)
103 |             
104 | 
105 | 


--------------------------------------------------------------------------------
/lipsync3d/train.py:
--------------------------------------------------------------------------------
 1 | from torch import optim
 2 | from torch.optim import optimizer
 3 | 
 4 | import torch
 5 | from torch.utils.data import DataLoader
 6 | from options import Options
 7 | from dataset import Lipsync3DMeshDataset
 8 | from model import Lipsync3DMesh
 9 | from loss import L2Loss
10 | from audiodvp_utils.visualizer import Visualizer
11 | import time
12 | import os
13 | 
14 | import torch.nn as nn
15 | 
16 | if __name__ == '__main__':
17 |     opt = Options().parse_args()
18 |     device = opt.device
19 | 
20 |     dataset = Lipsync3DMeshDataset(opt)
21 |     train_dataloader = DataLoader(
22 |         dataset,
23 |         batch_size = opt.batch_size,
24 |         shuffle = not opt.serial_batches, # default not shuffle
25 |         num_workers = opt.num_workers,
26 |         drop_last = True
27 |     )
28 | 
29 |     visualizer = Visualizer(opt)
30 |     model = Lipsync3DMesh().to(device)
31 | 
32 |     #TODO : Define Loss function------
33 |     criterionGeo = None
34 |     #---------------------------------
35 | 
36 |     if opt.load_model:
37 |         if os.path.exists(os.path.join(opt.tgt_dir, opt.model_name)):
38 |             state_dict = torch.load(os.path.join(opt.tgt_dir, opt.model_name))
39 |             audioEncoder_state = {}
40 |             geometryDecoder_state = {}
41 | 
42 |             for key, value in state_dict.items():
43 |                 if 'AudioEncoder' in key:
44 |                     audioEncoder_state[key.replace('AudioEncoder.', '')] = value
45 |                 if 'GeometryDecoder' in key:
46 |                     geometryDecoder_state[key.replace('GeometryDecoder.', '')] = value
47 | 
48 |             model.AudioEncoder.load_state_dict(audioEncoder_state)
49 |             model.GeometryDecoder.load_state_dict(geometryDecoder_state)
50 |            
51 |     optimizer = optim.Adam(model.parameters(), lr=opt.lr)
52 | 
53 |     os.makedirs(os.path.join(opt.tgt_dir, 'mesh_checkpoint'), exist_ok=True)
54 | 
55 |     # model = nn.DataParallel(model)
56 | 
57 |     total_iters = 0
58 | 
59 |     for epoch in range(opt.num_epoch):
60 |         epoch_start_time = time.time()
61 |         epoch_iter = 0
62 | 
63 |         for i, data in enumerate(train_dataloader):
64 |             total_iters += opt.batch_size
65 |             epoch_iter += opt.batch_size
66 | 
67 |             # TODO : Implement training process -------
68 |             geoLoss = None
69 |             # -----------------------------------------
70 | 
71 |             if total_iters % opt.print_freq == 0:
72 |                 losses = {'geoLoss' : geoLoss}
73 | 
74 |                 visualizer.print_current_losses(epoch, epoch_iter, losses, 0, 0)
75 |                 visualizer.plot_current_losses(total_iters, losses)
76 | 
77 | 
78 |         print('End of epoch %d / %d \t Time Taken: %d sec' % (epoch, opt.num_epoch, time.time() - epoch_start_time))
79 | 
80 |         if epoch % opt.checkpoint_interval == 0 and epoch != 0:
81 |             torch.save(model.state_dict(), os.path.join(opt.tgt_dir, 'mesh_checkpoint', 'checkpoint_{}.pth'.format(epoch)))
82 |             print("Checkpoint saved")
83 | 
84 |     torch.save(model.state_dict(), os.path.join(opt.tgt_dir, 'mesh.pth'))
85 | 


--------------------------------------------------------------------------------
/lipsync3d/utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('/home/server01/jyeongho_workspace/3d_face_gcns/')
  3 | 
  4 | from audiodvp_utils import util
  5 | import numpy as np
  6 | import math
  7 | import torch
  8 | import os
  9 | import mediapipe.python.solutions.face_mesh as mp_face_mesh
 10 | import mediapipe.python.solutions.drawing_utils as mp_drawing
 11 | import mediapipe.python.solutions.drawing_styles as mp_drawing_styles
 12 | from tqdm import tqdm
 13 | import cv2
 14 | 
 15 | # Input :
 16 | #       reference(dictionary from vertex idx to normalized landmark, dict[idx] = [x, y, z]) : landmark of reference frame.
 17 | #       target(dictionary from vertex idx to normalized landmark, dict[idx] = [x, y, z]) : landmark of target frame.
 18 | # Output : 
 19 | #       R : 3x3 Rotation matrix(np.array)
 20 | #       c : scale value(float)
 21 | #       t : 3x1 translation matrix(np.array)
 22 | 
 23 | def Umeyama_algorithm(reference, target):
 24 |     # idx 2 -> nose, 130 -> left eye, 359 -> right eye
 25 |     idx_list = [2, 94, 19, 1, 4, 5, 195, 197, 6, 168, 8, 9, 151, 10, 109, 108, 67, 69, 103, 104, 54, 68, 338, 337, 297, 299, 332, 333, 284, 298, 130, 243, 244, 359, 362, 463,
 26 |                 21, 71, 162, 139, 156, 70, 63, 105, 66, 107, 336, 296, 334, 293, 300, 301, 251, 55, 285, 193, 417, 122, 351, 196, 419, 3, 248, 51, 281,
 27 |                 45, 275, 44, 274, 220, 440, 134, 363, 236, 456]
 28 |     # idx_list = [19, 243, 463]
 29 |     ref_points = []
 30 |     tgt_points = []
 31 | 
 32 |     for idx in idx_list:
 33 |         ref_points.append(reference[idx])
 34 |         tgt_points.append(target[idx])
 35 | 
 36 |     ref_points = np.array(ref_points)
 37 |     tgt_points = np.array(tgt_points)
 38 | 
 39 |     ref_mu = ref_points.mean(axis=0)
 40 |     tgt_mu = tgt_points.mean(axis=0)
 41 |     ref_var = ref_points.var(axis=0).sum()
 42 |     tgt_var = tgt_points.var(axis=0).sum()
 43 |     n, m = ref_points.shape
 44 |     covar = np.matmul((ref_points - ref_mu).T, tgt_points - tgt_mu) / n
 45 |     det_covar = np.linalg.det(covar)
 46 |     u, d, vh = np.linalg.svd(covar)
 47 |     detuv = np.linalg.det(u) * np.linalg.det(vh.T)
 48 |     cov_rank = np.linalg.matrix_rank(covar)
 49 |     S = np.identity(m)
 50 | 
 51 |     if cov_rank > m - 1:
 52 |         if det_covar < 0:
 53 |             S[m - 1, m - 1] = -1
 54 |     else: 
 55 |         if detuv < 0:
 56 |             S[m - 1, m - 1] = -1
 57 | 
 58 |     R = np.matmul(np.matmul(u, S), vh)
 59 |     c = (1 / tgt_var) * np.trace(np.matmul(np.diag(d), S))
 60 |     t = ref_mu.reshape(3, 1) - c * np.matmul(R, tgt_mu.reshape(3, 1))
 61 | 
 62 |     return R, t, c
 63 | 
 64 | 
 65 | def landmark_to_dict(landmark_list):
 66 |     landmark_dict = {}
 67 |     for idx, landmark in enumerate(landmark_list):
 68 |         landmark_dict[idx] = [landmark.x, landmark.y, landmark.z]
 69 | 
 70 |     return landmark_dict
 71 | 
 72 | def landmarkdict_to_normalized_mesh_tensor(landmark_dict):
 73 |     vertex_list = []
 74 |     for idx, coord in landmark_dict.items():
 75 |         if (idx == 'R') or (idx == 't') or (idx == 'c'):
 76 |             continue
 77 |         vertex_list.append(coord)
 78 |     
 79 |     if not ('R' in landmark_dict):
 80 |         return torch.tensor(vertex_list)
 81 |     
 82 |     R = torch.from_numpy(landmark_dict['R']).float()
 83 |     t = torch.from_numpy(landmark_dict['t']).float()
 84 |     c = float(landmark_dict['c'])
 85 |     vertices = torch.tensor(vertex_list).transpose(0, 1)
 86 |     norm_vertices = (c * torch.matmul(R, vertices) + t).transpose(0, 1)
 87 |     return norm_vertices
 88 | 
 89 | 
 90 | def landmarkdict_to_mesh_tensor(landmark_dict):
 91 |     vertex_list = []
 92 |     for idx, coord in landmark_dict.items():
 93 |         if (idx == 'R') or (idx == 't') or (idx == 'c'):
 94 |             continue
 95 |         vertex_list.append(coord)
 96 | 
 97 |     vertices = torch.tensor(vertex_list)
 98 |     return vertices
 99 | 
100 | def mesh_tensor_to_landmarkdict(mesh_tensor):
101 |     landmark_dict = {}
102 |     for i in range(mesh_tensor.shape[0]):
103 |         landmark_dict[i] = mesh_tensor[i].tolist()
104 |     
105 |     return landmark_dict
106 | 
107 | 
108 | def draw_mesh_image(mesh_dict, save_path, image_rows, image_cols):
109 |     connections = mp_face_mesh.FACEMESH_TESSELATION
110 |     drawing_spec = mp_drawing.DrawingSpec(color= mp_drawing.BLACK_COLOR, thickness=1, circle_radius=1)
111 | 
112 |     idx_to_coordinates = {}
113 |     for idx, coord in mesh_dict.items():
114 |         if (idx == 'R') or (idx == 't') or (idx == 'c'):
115 |             continue
116 |         x_px = min(math.floor(coord[0]), image_cols - 1)
117 |         y_px = min(math.floor(coord[1]), image_rows - 1)
118 |         landmark_px = (x_px, y_px)
119 |         if landmark_px:
120 |             idx_to_coordinates[idx] = landmark_px
121 |     
122 |     white_image = np.zeros([image_rows, image_cols, 3], dtype=np.uint8)
123 |     white_image[:] = 255
124 |     for connection in connections:
125 |         start_idx = connection[0]
126 |         end_idx = connection[1]
127 | 
128 |         if start_idx in idx_to_coordinates and end_idx in idx_to_coordinates:
129 |             cv2.line(white_image, 
130 |                 idx_to_coordinates[start_idx],
131 |                 idx_to_coordinates[end_idx], 
132 |                 drawing_spec.color,
133 |                 drawing_spec.thickness
134 |             )
135 |     cv2.imwrite(save_path, white_image)
136 | 
137 | 
138 | def draw_mesh_images(mesh_dir, save_dir, image_rows, image_cols):
139 |     mesh_filename_list = util.get_file_list(mesh_dir)
140 | 
141 |     for mesh_filename in tqdm(mesh_filename_list):
142 |         mesh_dict = torch.load(mesh_filename)
143 |         save_path = os.path.join(save_dir, os.path.basename(mesh_filename)[:-3] + '.png')
144 |         draw_mesh_image(mesh_dict, save_path, image_rows, image_cols)
145 |     
146 |     return
147 | 


--------------------------------------------------------------------------------
/make_video.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | from natsort import natsorted
 4 | import argparse
 5 | 
 6 | parser = argparse.ArgumentParser()
 7 | parser.add_argument('--src_directory', type=str, required=True)
 8 | 
 9 | args = parser.parse_args()
10 | 
11 | if __name__ == '__main__':
12 |     norm_images = natsorted([os.path.join(args.src_directory, 'reenact_mesh_image', x) for x in os.listdir(os.path.join(args.src_directory, 'reenact_mesh_image'))])
13 |     out = cv2.VideoWriter('{}/temp_original.mp4'.format(args.src_directory), cv2.VideoWriter_fourcc(*'mp4v'), 25, (256, 256))
14 | 
15 |     for im in norm_images:
16 |         image = cv2.imread(im)
17 |         out.write(image)
18 | 
19 |     out.release()
20 | 
21 |     os.system('ffmpeg -y -i {}/temp_original.mp4 -i {}/audio/audio.wav -c:v copy -c:a aac {}/predicted_mesh.mp4'.format(args.src_directory, args.src_directory, args.src_directory))


--------------------------------------------------------------------------------