├── mtcnn_weights
├── det1.npy
├── det2.npy
├── det3.npy
└── README.md
├── converter
├── kalman_filter.py
├── vc_utils.py
├── landmarks_alignment.py
├── color_correction.py
├── face_transformer.py
└── video_converter.py
├── networks
├── custom_inits
│ └── icnr_initializer.py
├── custom_layers
│ └── scale_layer.py
├── pixel_shuffler.py
├── GroupNormalization.py
├── instance_normalization.py
├── losses.py
├── nn_blocks.py
└── faceswap_gan_model.py
├── legacy
├── training_data.py
├── README.md
├── model_GAN_v2.py
├── pixel_shuffler.py
├── utils.py
├── image_augmentation.py
├── instance_normalization.py
├── FCN8s_keras.py
└── FaceSwap_GAN_v2_test_img.ipynb
├── preprocess.py
├── data_loader
├── data_loader.py
└── data_augmentation.py
├── umeyama.py
├── image_augmentation.py
├── utils.py
├── detector
└── face_detector.py
├── FaceSwap_GAN_v2.2_video_conversion.ipynb
├── MTCNN_video_face_detection_alignment.ipynb
├── README.md
├── notes
└── README.md
└── prep_binary_masks.ipynb
/mtcnn_weights/det1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/product/faceswap-GAN/master/mtcnn_weights/det1.npy
--------------------------------------------------------------------------------
/mtcnn_weights/det2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/product/faceswap-GAN/master/mtcnn_weights/det2.npy
--------------------------------------------------------------------------------
/mtcnn_weights/det3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/product/faceswap-GAN/master/mtcnn_weights/det3.npy
--------------------------------------------------------------------------------
/mtcnn_weights/README.md:
--------------------------------------------------------------------------------
1 | Weights files are form https://github.com/davidsandberg/facenet/tree/master/src/align
2 |
--------------------------------------------------------------------------------
/converter/kalman_filter.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | class KalmanFilter():
5 | def __init__(self, noise_coef):
6 | self.noise_coef = noise_coef
7 | self.kf = self.init_kalman_filter(noise_coef)
8 |
9 | @staticmethod
10 | def init_kalman_filter(noise_coef):
11 | kf = cv2.KalmanFilter(4,2)
12 | kf.measurementMatrix = np.array([[1,0,0,0],[0,1,0,0]], np.float32)
13 | kf.transitionMatrix = np.array([[1,0,1,0],[0,1,0,1],[0,0,1,0],[0,0,0,1]], np.float32)
14 | kf.processNoiseCov = noise_coef * np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]], np.float32)
15 | return kf
16 |
17 | def correct(self, xy):
18 | return self.kf.correct(xy)
19 |
20 | def predict(self):
21 | return self.kf.predict()
--------------------------------------------------------------------------------
/networks/custom_inits/icnr_initializer.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | def icnr_keras(shape, dtype=None):
4 | """
5 | From https://github.com/kostyaev/ICNR
6 | Custom initializer for subpix upscaling
7 | Note: upscale factor is fixzed to 2, and the base initializer is fixed to random normal.
8 | """
9 | shape = list(shape)
10 |
11 | scale = 2
12 | initializer = tf.keras.initializers.RandomNormal(0, 0.02)
13 |
14 | new_shape = shape[:3] + [int(shape[3] / (scale ** 2))]
15 | x = initializer(new_shape, dtype)
16 | x = tf.transpose(x, perm=[2, 0, 1, 3])
17 | x = tf.image.resize_nearest_neighbor(x, size=(shape[0] * scale, shape[1] * scale))
18 | x = tf.space_to_depth(x, block_size=scale)
19 | x = tf.transpose(x, perm=[1, 2, 0, 3])
20 | return x
21 |
--------------------------------------------------------------------------------
/legacy/training_data.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | from image_augmentation import random_transform
3 | from image_augmentation import random_warp
4 |
5 | random_transform_args = {
6 | 'rotation_range': 15,
7 | 'zoom_range': 0.1,
8 | 'shift_range': 0.05,
9 | 'random_flip': 0.5,
10 | }
11 |
12 | def get_training_data( images, batch_size ):
13 | indices = numpy.random.randint( len(images), size=batch_size )
14 | for i,index in enumerate(indices):
15 | image = images[index]
16 | image = random_transform( image, **random_transform_args )
17 | warped_img, target_img = random_warp( image )
18 |
19 | if i == 0:
20 | warped_images = numpy.empty( (batch_size,) + warped_img.shape, warped_img.dtype )
21 | target_images = numpy.empty( (batch_size,) + target_img.shape, warped_img.dtype )
22 |
23 | warped_images[i] = warped_img
24 | target_images[i] = target_img
25 |
26 | return warped_images, target_images
27 |
--------------------------------------------------------------------------------
/networks/custom_layers/scale_layer.py:
--------------------------------------------------------------------------------
1 | from keras.layers.core import Layer
2 | from keras.engine import InputSpec
3 | from keras import backend as K
4 | from keras import initializers
5 |
6 | class Scale(Layer):
7 | '''
8 | Code borrows from https://github.com/flyyufelix/cnn_finetune
9 | '''
10 | def __init__(self, weights=None, axis=-1, gamma_init='zero', **kwargs):
11 | self.axis = axis
12 | self.gamma_init = initializers.get(gamma_init)
13 | self.initial_weights = weights
14 | super(Scale, self).__init__(**kwargs)
15 |
16 | def build(self, input_shape):
17 | self.input_spec = [InputSpec(shape=input_shape)]
18 |
19 | # Compatibility with TensorFlow >= 1.0.0
20 | self.gamma = K.variable(self.gamma_init((1,)), name='{}_gamma'.format(self.name))
21 | self.trainable_weights = [self.gamma]
22 |
23 | if self.initial_weights is not None:
24 | self.set_weights(self.initial_weights)
25 | del self.initial_weights
26 |
27 | def call(self, x, mask=None):
28 | return self.gamma * x
29 |
30 | def get_config(self):
31 | config = {"axis": self.axis}
32 | base_config = super(Scale, self).get_config()
33 | return dict(list(base_config.items()) + list(config.items()))
34 |
--------------------------------------------------------------------------------
/converter/vc_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 | # ==================================================
5 | # Output image initialization functions
6 | # ==================================================
7 | def get_init_mask_map(image):
8 | return np.zeros_like(image)
9 |
10 | def get_init_comb_img(input_img):
11 | comb_img = np.zeros([input_img.shape[0], input_img.shape[1]*2,input_img.shape[2]])
12 | comb_img[:, :input_img.shape[1], :] = input_img
13 | comb_img[:, input_img.shape[1]:, :] = input_img
14 | return comb_img
15 |
16 | def get_init_triple_img(input_img, no_face=False):
17 | if no_face:
18 | triple_img = np.zeros([input_img.shape[0], input_img.shape[1]*3,input_img.shape[2]])
19 | triple_img[:, :input_img.shape[1], :] = input_img
20 | triple_img[:, input_img.shape[1]:input_img.shape[1]*2, :] = input_img
21 | triple_img[:, input_img.shape[1]*2:, :] = (input_img * .15).astype('uint8')
22 | return triple_img
23 | else:
24 | triple_img = np.zeros([input_img.shape[0], input_img.shape[1]*3,input_img.shape[2]])
25 | return triple_img
26 |
27 | def get_mask(roi_image, h, w):
28 | mask = np.zeros_like(roi_image)
29 | mask[h//15:-h//15,w//15:-w//15,:] = 255
30 | mask = cv2.GaussianBlur(mask,(15,15),10)
31 | return mask
--------------------------------------------------------------------------------
/converter/landmarks_alignment.py:
--------------------------------------------------------------------------------
1 | from umeyama import umeyama
2 | import numpy as np
3 | import cv2
4 |
5 | def get_src_landmarks(x0, x1, y0, y1, pnts):
6 | """
7 | x0, x1, y0, y1: (smoothed) bbox coord.
8 | pnts: landmarks predicted by MTCNN
9 | """
10 | src_landmarks = [(int(pnts[i+5][0]-x0), int(pnts[i][0]-y0)) for i in range(5)]
11 | return src_landmarks
12 |
13 | def get_tar_landmarks(img):
14 | """
15 | img: detected face image
16 | """
17 | avg_landmarks = [
18 | (0.31339227236234224, 0.3259269274198092),
19 | (0.31075140146108776, 0.7228453709528997),
20 | (0.5523683107816256, 0.5187296867370605),
21 | (0.7752419985257663, 0.37262483743520886),
22 | (0.7759613623985877, 0.6772957581740159)
23 | ]
24 | img_sz = img.shape
25 | tar_landmarks = [(int(xy[0]*img_sz[0]), int(xy[1]*img_sz[1])) for xy in avg_landmarks]
26 | return tar_landmarks
27 |
28 | def landmarks_match_mtcnn(src_im, src_landmarks, tar_landmarks):
29 | """
30 | umeyama(src, dst, estimate_scale),
31 | src/dst landmarks coord. should be (y, x)
32 | """
33 | src_size = src_im.shape
34 | src_tmp = [(int(xy[1]), int(xy[0])) for xy in src_landmarks]
35 | dst_tmp = [(int(xy[1]), int(xy[0])) for xy in tar_landmarks]
36 | M = umeyama(np.array(src_tmp), np.array(dst_tmp), True)[0:2]
37 | result = cv2.warpAffine(src_im, M, (src_size[1], src_size[0]), borderMode=cv2.BORDER_REPLICATE)
38 | return result
--------------------------------------------------------------------------------
/legacy/README.md:
--------------------------------------------------------------------------------
1 | ## Notebooks that are not maintained anymore are in this folder.
2 |
3 | ### faceswap-GAN v2.1
4 | * [FaceSwap_GAN_v2.1_train.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/legacy/FaceSwap_GAN_v2.1_train.ipynb)
5 | - A experimental model that provides architectures like VAE and [XGAN](https://arxiv.org/abs/1711.05139).
6 | - In video conversion, it ultilizes FCN for face segmentation to generate a hybrid alpha mask.
7 |
8 | V2.1 model is an improved architecture in order to stablize training. The architecture is greatly inspired by [XGAN](https://arxiv.org/abs/1711.05139) ~~and [MS-D neural network](http://www.pnas.org/content/115/2/254)~~. (Note: V2.1 script is experimental and not well-maintained)
9 | - V2.1 model provides three base architectures: (i) XGAN, (ii) VAE-GAN, and (iii) a variant of v2 GAN. (default `base_model="GAN"`)
10 | - FCN8s for face segmentation is introduced to improve masking in video conversion (default `use_FCN_mask = True`).
11 | - To enable this feature, keras weights file should be generated through jupyter notebook provided in [this repo](https://github.com/shaoanlu/face_segmentation_keras).
12 |
13 | ### faceswap-GAN v2
14 | * [FaceSwap_GAN_v2_train.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/legacy/FaceSwap_GAN_v2_train.ipynb)
15 | - Notebook for training the version 2 GAN model.
16 | - Video conversion functions are also included.
17 |
18 | * [FaceSwap_GAN_v2_test_video_MTCNN.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/legacy/FaceSwap_GAN_v2_test_video_MTCNN.ipynb)
19 | - Notebook for generating videos. Use MTCNN for face detection.
20 |
21 | * [faceswap_WGAN-GP_keras_github.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/lefacy/faceswap_WGAN-GP_keras_github.ipynb)
22 | - This notebook is an independent training script for a GAN model of [WGAN-GP](https://arxiv.org/abs/1704.00028) in which perceptual loss is discarded for simplicity.
23 | - Training can be start easily as the following:
24 | ```python
25 | gan = FaceSwapGAN() # instantiate the class
26 | gan.train(max_iters=10e4, save_interval=500) # start training
27 | ```
28 | * [FaceSwap_GAN_v2_sz128_train.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2_sz128_train.ipynb)
29 | - This notebook is an independent script for a model with 128x128 input/output resolution.
30 |
31 | ### faceswap-GAN v1
32 | * [FaceSwap_GAN_github.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/legacy/FaceSwap_GAN_github.ipynb)
33 | - V1 model directly predicts color output images without masking.
34 | - Video conversion functions are also included.
35 |
--------------------------------------------------------------------------------
/preprocess.py:
--------------------------------------------------------------------------------
1 | import keras.backend as K
2 | from moviepy.editor import VideoFileClip
3 | from matplotlib import pyplot as plt
4 | from pathlib import Path
5 | import os
6 |
7 | from converter.landmarks_alignment import *
8 |
9 | class VideoInfo:
10 | def __init__(self):
11 | self.frame = 0
12 |
13 | def process_image(input_img, info, detector, save_interval, save_path):
14 | minsize = 30 # minimum size of face
15 | detec_threshold = 0.9
16 | threshold = [0.7, 0.8, detec_threshold] # three steps's threshold
17 | factor = 0.709 # scale factor
18 |
19 | info.frame += 1
20 | frame = info.frame
21 | if frame % save_interval == 0:
22 | faces, pnts = detector.detect_face(input_img, threshold=detec_threshold, use_auto_downscaling=False)
23 | for idx, (x0, y1, x1, y0, conf_score) in enumerate(faces):
24 | det_face_im = input_img[int(x0):int(x1),int(y0):int(y1),:]
25 |
26 | # get src/tar landmarks
27 | src_landmarks = get_src_landmarks(x0, x1, y0, y1, pnts)
28 | tar_landmarks = get_tar_landmarks(det_face_im)
29 |
30 | # align detected face
31 | aligned_det_face_im = landmarks_match_mtcnn(det_face_im, src_landmarks, tar_landmarks)
32 |
33 | Path(os.path.join(f"{save_path}", "rgb")).mkdir(parents=True, exist_ok=True)
34 | fname = os.path.join(f"{save_path}", "rgb", f"frame{frame}face{str(idx)}.jpg")
35 | plt.imsave(fname, aligned_det_face_im, format="jpg")
36 | #fname = f"./faces/raw_faces/frame{frames}face{str(idx)}.jpg"
37 | #plt.imsave(fname, det_face_im, format="jpg")
38 |
39 | bm = np.zeros_like(aligned_det_face_im)
40 | h, w = bm.shape[:2]
41 | bm[int(src_landmarks[0][0]-h/15):int(src_landmarks[0][0]+h/15),
42 | int(src_landmarks[0][1]-w/8):int(src_landmarks[0][1]+w/8),:] = 255
43 | bm[int(src_landmarks[1][0]-h/15):int(src_landmarks[1][0]+h/15),
44 | int(src_landmarks[1][1]-w/8):int(src_landmarks[1][1]+w/8),:] = 255
45 | bm = landmarks_match_mtcnn(bm, src_landmarks, tar_landmarks)
46 | Path(os.path.join(f"{save_path}", "binary_mask")).mkdir(parents=True, exist_ok=True)
47 | fname = os.path.join(f"{save_path}", "binary_mask", f"frame{frame}face{str(idx)}.jpg")
48 | plt.imsave(fname, bm, format="jpg")
49 |
50 | return np.zeros((3,3,3))
51 |
52 | def preprocess_video(fn_input_video, fd, save_interval, save_path):
53 | info = VideoInfo()
54 | output = 'dummy.mp4'
55 | clip1 = VideoFileClip(fn_input_video)
56 | clip = clip1.fl_image(lambda img: process_image(img, info, fd, save_interval, save_path))
57 | clip.write_videofile(output, audio=False, verbose=False)
58 | clip1.reader.close()
59 |
60 |
61 |
--------------------------------------------------------------------------------
/data_loader/data_loader.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from .data_augmentation import *
3 |
4 |
5 | class DataLoader(object):
6 | def __init__(self, filenames, all_filenames, batch_size, dir_bm_eyes,
7 | resolution, num_cpus, sess, **da_config):
8 | self.filenames = filenames
9 | self.all_filenames = all_filenames
10 | self.batch_size = batch_size
11 | self.dir_bm_eyes = dir_bm_eyes
12 | self.resolution = resolution
13 | self.num_cpus = num_cpus
14 | self.sess = sess
15 |
16 | self.set_data_augm_config(
17 | da_config["prob_random_color_match"],
18 | da_config["use_da_motion_blur"],
19 | da_config["use_bm_eyes"])
20 |
21 | self.data_iter_next = self.create_tfdata_iter(
22 | self.filenames,
23 | self.all_filenames,
24 | self.batch_size,
25 | self.dir_bm_eyes,
26 | self.resolution,
27 | self.prob_random_color_match,
28 | self.use_da_motion_blur,
29 | self.use_bm_eyes,
30 | )
31 |
32 | def set_data_augm_config(self, prob_random_color_match=0.5,
33 | use_da_motion_blur=True, use_bm_eyes=True):
34 | self.prob_random_color_match = prob_random_color_match
35 | self.use_da_motion_blur = use_da_motion_blur
36 | self.use_bm_eyes = use_bm_eyes
37 |
38 | def create_tfdata_iter(self, filenames, fns_all_trn_data, batch_size, dir_bm_eyes, resolution,
39 | prob_random_color_match, use_da_motion_blur, use_bm_eyes):
40 | tf_fns = tf.constant(filenames, dtype=tf.string) # use tf_fns=filenames is also fine
41 | dataset = tf.data.Dataset.from_tensor_slices(tf_fns)
42 | dataset = dataset.shuffle(len(filenames))
43 | dataset = dataset.apply(
44 | tf.contrib.data.map_and_batch(
45 | lambda filenames: tf.py_func(
46 | func=read_image,
47 | inp=[filenames,
48 | fns_all_trn_data,
49 | dir_bm_eyes,
50 | resolution,
51 | prob_random_color_match,
52 | use_da_motion_blur,
53 | use_bm_eyes],
54 | Tout=[tf.float32, tf.float32, tf.float32]
55 | ),
56 | batch_size=batch_size,
57 | num_parallel_batches=self.num_cpus, # cpu cores
58 | drop_remainder=True
59 | )
60 | )
61 | dataset = dataset.repeat()
62 | dataset = dataset.prefetch(32)
63 |
64 | iterator = dataset.make_one_shot_iterator()
65 | next_element = iterator.get_next() # this tensor can also be useed as Input(tensor=next_element)
66 | return next_element
67 |
68 | def get_next_batch(self):
69 | return self.sess.run(self.data_iter_next)
--------------------------------------------------------------------------------
/legacy/model_GAN_v2.py:
--------------------------------------------------------------------------------
1 | from keras.models import Sequential, Model
2 | from keras.layers import *
3 | from keras.layers.advanced_activations import LeakyReLU
4 | from keras.activations import relu
5 | from keras.initializers import RandomNormal
6 | from keras.applications import *
7 | import keras.backend as K
8 | from pixel_shuffler import PixelShuffler
9 |
10 | conv_init = RandomNormal(0, 0.02)
11 |
12 | def conv_block(input_tensor, f):
13 | x = input_tensor
14 | x = Conv2D(f, kernel_size=3, strides=2, kernel_initializer=conv_init, use_bias=False, padding="same")(x)
15 | x = Activation("relu")(x)
16 | return x
17 |
18 | def conv_block_d(input_tensor, f, use_instance_norm=True):
19 | x = input_tensor
20 | x = Conv2D(f, kernel_size=4, strides=2, kernel_initializer=conv_init, use_bias=False, padding="same")(x)
21 | x = LeakyReLU(alpha=0.2)(x)
22 | return x
23 |
24 | def res_block(input_tensor, f):
25 | x = input_tensor
26 | x = Conv2D(f, kernel_size=3, kernel_initializer=conv_init, use_bias=False, padding="same")(x)
27 | x = LeakyReLU(alpha=0.2)(x)
28 | x = Conv2D(f, kernel_size=3, kernel_initializer=conv_init, use_bias=False, padding="same")(x)
29 | x = add([x, input_tensor])
30 | x = LeakyReLU(alpha=0.2)(x)
31 | return x
32 |
33 | def upscale_ps(filters, use_norm=True):
34 | def block(x):
35 | x = Conv2D(filters*4, kernel_size=3, use_bias=False, kernel_initializer=RandomNormal(0, 0.02), padding='same' )(x)
36 | x = LeakyReLU(0.1)(x)
37 | x = PixelShuffler()(x)
38 | return x
39 | return block
40 |
41 | def Discriminator(nc_in, input_size=64):
42 | inp = Input(shape=(input_size, input_size, nc_in))
43 | #x = GaussianNoise(0.05)(inp)
44 | x = conv_block_d(inp, 64, False)
45 | x = conv_block_d(x, 128, False)
46 | x = conv_block_d(x, 256, False)
47 | out = Conv2D(1, kernel_size=4, kernel_initializer=conv_init, use_bias=False, padding="same", activation="sigmoid")(x)
48 | return Model(inputs=[inp], outputs=out)
49 |
50 | def Encoder(nc_in=3, input_size=64):
51 | inp = Input(shape=(input_size, input_size, nc_in))
52 | x = Conv2D(64, kernel_size=5, kernel_initializer=conv_init, use_bias=False, padding="same")(inp)
53 | x = conv_block(x,128)
54 | x = conv_block(x,256)
55 | x = conv_block(x,512)
56 | x = conv_block(x,1024)
57 | x = Dense(1024)(Flatten()(x))
58 | x = Dense(4*4*1024)(x)
59 | x = Reshape((4, 4, 1024))(x)
60 | out = upscale_ps(512)(x)
61 | return Model(inputs=inp, outputs=out)
62 |
63 | def Decoder_ps(nc_in=512, input_size=8):
64 | input_ = Input(shape=(input_size, input_size, nc_in))
65 | x = input_
66 | x = upscale_ps(256)(x)
67 | x = upscale_ps(128)(x)
68 | x = upscale_ps(64)(x)
69 | x = res_block(x, 64)
70 | x = res_block(x, 64)
71 | #x = Conv2D(4, kernel_size=5, padding='same')(x)
72 | alpha = Conv2D(1, kernel_size=5, padding='same', activation="sigmoid")(x)
73 | rgb = Conv2D(3, kernel_size=5, padding='same', activation="tanh")(x)
74 | out = concatenate([alpha, rgb])
75 | return Model(input_, out )
--------------------------------------------------------------------------------
/converter/color_correction.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 | """ Color corretion functions"""
5 | def hist_match(source, template):
6 | # Code borrow from:
7 | # https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x
8 | oldshape = source.shape
9 | source = source.ravel()
10 | template = template.ravel()
11 | s_values, bin_idx, s_counts = np.unique(source, return_inverse=True,
12 | return_counts=True)
13 | t_values, t_counts = np.unique(template, return_counts=True)
14 |
15 | s_quantiles = np.cumsum(s_counts).astype(np.float64)
16 | s_quantiles /= s_quantiles[-1]
17 | t_quantiles = np.cumsum(t_counts).astype(np.float64)
18 | t_quantiles /= t_quantiles[-1]
19 | interp_t_values = np.interp(s_quantiles, t_quantiles, t_values)
20 |
21 | return interp_t_values[bin_idx].reshape(oldshape)
22 |
23 | def color_hist_match(src_im, tar_im, color_space="RGB"):
24 | if color_space.lower() != "rgb":
25 | src_im = trans_color_space(src_im, color_space)
26 | tar_im = trans_color_space(tar_im, color_space)
27 |
28 | matched_R = hist_match(src_im[:,:,0], tar_im[:,:,0])
29 | matched_G = hist_match(src_im[:,:,1], tar_im[:,:,1])
30 | matched_B = hist_match(src_im[:,:,2], tar_im[:,:,2])
31 | matched = np.stack((matched_R, matched_G, matched_B), axis=2).astype(np.float32)
32 | matched = np.clip(matched, 0, 255)
33 |
34 | if color_space.lower() != "rgb":
35 | result = trans_color_space(result.astype(np.uint8), color_space, rev=True)
36 | return matched
37 |
38 | def adain(src_im, tar_im, eps=1e-7, color_space="RGB"):
39 | # https://github.com/ftokarev/tf-adain/blob/master/adain/norm.py
40 | if color_space.lower() != "rgb":
41 | src_im = trans_color_space(src_im, color_space)
42 | tar_im = trans_color_space(tar_im, color_space)
43 |
44 | mt = np.mean(tar_im, axis=(0,1))
45 | st = np.std(tar_im, axis=(0,1))
46 | ms = np.mean(src_im, axis=(0,1))
47 | ss = np.std(src_im, axis=(0,1))
48 | if ss.any() <= eps: return src_im
49 | result = st * (src_im.astype(np.float32) - ms) / (ss+eps) + mt
50 | result = np.clip(result, 0, 255)
51 |
52 | if color_space.lower() != "rgb":
53 | result = trans_color_space(result.astype(np.uint8), color_space, rev=True)
54 | return result
55 |
56 | def trans_color_space(im, color_space, rev=False):
57 | if color_space.lower() == "lab":
58 | clr_spc = cv2.COLOR_BGR2Lab
59 | rev_clr_spc = cv2.COLOR_Lab2BGR
60 | elif color_space.lower() == "ycbcr":
61 | clr_spc = cv2.COLOR_BGR2YCR_CB
62 | rev_clr_spc = cv2.COLOR_YCR_CB2BGR
63 | elif color_space.lower() == "xyz":
64 | clr_spc = cv2.COLOR_BGR2XYZ
65 | rev_clr_spc = cv2.COLOR_XYZ2BGR
66 | elif color_space.lower() == "luv":
67 | clr_spc = cv2.COLOR_BGR2Luv
68 | rev_clr_spc = cv2.COLOR_Luv2BGR
69 | elif color_space.lower() == "rgb":
70 | pass
71 | else:
72 | raise NotImplementedError()
73 |
74 | if color_space.lower() != "rgb":
75 | trans_clr_spc = rev_clr_spc if rev else clr_spc
76 | im = cv2.cvtColor(im, trans_clr_spc)
77 | return im
78 |
--------------------------------------------------------------------------------
/networks/pixel_shuffler.py:
--------------------------------------------------------------------------------
1 | # PixelShuffler layer for Keras
2 | # by t-ae
3 | # https://gist.github.com/t-ae/6e1016cc188104d123676ccef3264981
4 |
5 | from keras.utils import conv_utils
6 | from keras.engine.topology import Layer
7 | import keras.backend as K
8 |
9 | class PixelShuffler(Layer):
10 | def __init__(self, size=(2, 2), data_format=None, **kwargs):
11 | super(PixelShuffler, self).__init__(**kwargs)
12 | self.data_format = K.image_data_format()
13 | self.size = conv_utils.normalize_tuple(size, 2, 'size')
14 |
15 | def call(self, inputs):
16 |
17 | input_shape = K.int_shape(inputs)
18 | if len(input_shape) != 4:
19 | raise ValueError('Inputs should have rank ' +
20 | str(4) +
21 | '; Received input shape:', str(input_shape))
22 |
23 | if self.data_format == 'channels_first':
24 | batch_size, c, h, w = input_shape
25 | if batch_size is None:
26 | batch_size = -1
27 | rh, rw = self.size
28 | oh, ow = h * rh, w * rw
29 | oc = c // (rh * rw)
30 |
31 | out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w))
32 | out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2))
33 | out = K.reshape(out, (batch_size, oc, oh, ow))
34 | return out
35 |
36 | elif self.data_format == 'channels_last':
37 | batch_size, h, w, c = input_shape
38 | if batch_size is None:
39 | batch_size = -1
40 | rh, rw = self.size
41 | oh, ow = h * rh, w * rw
42 | oc = c // (rh * rw)
43 |
44 | out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc))
45 | out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5))
46 | out = K.reshape(out, (batch_size, oh, ow, oc))
47 | return out
48 |
49 | def compute_output_shape(self, input_shape):
50 |
51 | if len(input_shape) != 4:
52 | raise ValueError('Inputs should have rank ' +
53 | str(4) +
54 | '; Received input shape:', str(input_shape))
55 |
56 | if self.data_format == 'channels_first':
57 | height = input_shape[2] * self.size[0] if input_shape[2] is not None else None
58 | width = input_shape[3] * self.size[1] if input_shape[3] is not None else None
59 | channels = input_shape[1] // self.size[0] // self.size[1]
60 |
61 | if channels * self.size[0] * self.size[1] != input_shape[1]:
62 | raise ValueError('channels of input and size are incompatible')
63 |
64 | return (input_shape[0],
65 | channels,
66 | height,
67 | width)
68 |
69 | elif self.data_format == 'channels_last':
70 | height = input_shape[1] * self.size[0] if input_shape[1] is not None else None
71 | width = input_shape[2] * self.size[1] if input_shape[2] is not None else None
72 | channels = input_shape[3] // self.size[0] // self.size[1]
73 |
74 | if channels * self.size[0] * self.size[1] != input_shape[3]:
75 | raise ValueError('channels of input and size are incompatible')
76 |
77 | return (input_shape[0],
78 | height,
79 | width,
80 | channels)
81 |
82 | def get_config(self):
83 | config = {'size': self.size,
84 | 'data_format': self.data_format}
85 | base_config = super(PixelShuffler, self).get_config()
86 |
87 | return dict(list(base_config.items()) + list(config.items()))
88 |
--------------------------------------------------------------------------------
/legacy/pixel_shuffler.py:
--------------------------------------------------------------------------------
1 | # PixelShuffler layer for Keras
2 | # by t-ae
3 | # https://gist.github.com/t-ae/6e1016cc188104d123676ccef3264981
4 |
5 | from keras.utils import conv_utils
6 | from keras.engine.topology import Layer
7 | import keras.backend as K
8 |
9 | class PixelShuffler(Layer):
10 | def __init__(self, size=(2, 2), data_format=None, **kwargs):
11 | super(PixelShuffler, self).__init__(**kwargs)
12 | self.data_format = conv_utils.normalize_data_format(data_format)
13 | self.size = conv_utils.normalize_tuple(size, 2, 'size')
14 |
15 | def call(self, inputs):
16 |
17 | input_shape = K.int_shape(inputs)
18 | if len(input_shape) != 4:
19 | raise ValueError('Inputs should have rank ' +
20 | str(4) +
21 | '; Received input shape:', str(input_shape))
22 |
23 | if self.data_format == 'channels_first':
24 | batch_size, c, h, w = input_shape
25 | if batch_size is None:
26 | batch_size = -1
27 | rh, rw = self.size
28 | oh, ow = h * rh, w * rw
29 | oc = c // (rh * rw)
30 |
31 | out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w))
32 | out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2))
33 | out = K.reshape(out, (batch_size, oc, oh, ow))
34 | return out
35 |
36 | elif self.data_format == 'channels_last':
37 | batch_size, h, w, c = input_shape
38 | if batch_size is None:
39 | batch_size = -1
40 | rh, rw = self.size
41 | oh, ow = h * rh, w * rw
42 | oc = c // (rh * rw)
43 |
44 | out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc))
45 | out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5))
46 | out = K.reshape(out, (batch_size, oh, ow, oc))
47 | return out
48 |
49 | def compute_output_shape(self, input_shape):
50 |
51 | if len(input_shape) != 4:
52 | raise ValueError('Inputs should have rank ' +
53 | str(4) +
54 | '; Received input shape:', str(input_shape))
55 |
56 | if self.data_format == 'channels_first':
57 | height = input_shape[2] * self.size[0] if input_shape[2] is not None else None
58 | width = input_shape[3] * self.size[1] if input_shape[3] is not None else None
59 | channels = input_shape[1] // self.size[0] // self.size[1]
60 |
61 | if channels * self.size[0] * self.size[1] != input_shape[1]:
62 | raise ValueError('channels of input and size are incompatible')
63 |
64 | return (input_shape[0],
65 | channels,
66 | height,
67 | width)
68 |
69 | elif self.data_format == 'channels_last':
70 | height = input_shape[1] * self.size[0] if input_shape[1] is not None else None
71 | width = input_shape[2] * self.size[1] if input_shape[2] is not None else None
72 | channels = input_shape[3] // self.size[0] // self.size[1]
73 |
74 | if channels * self.size[0] * self.size[1] != input_shape[3]:
75 | raise ValueError('channels of input and size are incompatible')
76 |
77 | return (input_shape[0],
78 | height,
79 | width,
80 | channels)
81 |
82 | def get_config(self):
83 | config = {'size': self.size,
84 | 'data_format': self.data_format}
85 | base_config = super(PixelShuffler, self).get_config()
86 |
87 | return dict(list(base_config.items()) + list(config.items()))
88 |
--------------------------------------------------------------------------------
/umeyama.py:
--------------------------------------------------------------------------------
1 | ## License (Modified BSD)
2 | ## Copyright (C) 2011, the scikit-image team All rights reserved.
3 | ##
4 | ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5 | ##
6 | ## Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7 | ## Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8 | ## Neither the name of skimage nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9 | ## THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
10 |
11 | # umeyama function from scikit-image/skimage/transform/_geometric.py
12 |
13 | import numpy as np
14 |
15 | def umeyama( src, dst, estimate_scale ):
16 | """Estimate N-D similarity transformation with or without scaling.
17 | Parameters
18 | ----------
19 | src : (M, N) array
20 | Source coordinates.
21 | dst : (M, N) array
22 | Destination coordinates.
23 | estimate_scale : bool
24 | Whether to estimate scaling factor.
25 | Returns
26 | -------
27 | T : (N + 1, N + 1)
28 | The homogeneous similarity transformation matrix. The matrix contains
29 | NaN values only if the problem is not well-conditioned.
30 | References
31 | ----------
32 | .. [1] "Least-squares estimation of transformation parameters between two
33 | point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573
34 | """
35 |
36 | num = src.shape[0]
37 | dim = src.shape[1]
38 |
39 | # Compute mean of src and dst.
40 | src_mean = src.mean(axis=0)
41 | dst_mean = dst.mean(axis=0)
42 |
43 | # Subtract mean from src and dst.
44 | src_demean = src - src_mean
45 | dst_demean = dst - dst_mean
46 |
47 | # Eq. (38).
48 | A = np.dot(dst_demean.T, src_demean) / num
49 |
50 | # Eq. (39).
51 | d = np.ones((dim,), dtype=np.double)
52 | if np.linalg.det(A) < 0:
53 | d[dim - 1] = -1
54 |
55 | T = np.eye(dim + 1, dtype=np.double)
56 |
57 | U, S, V = np.linalg.svd(A)
58 |
59 | # Eq. (40) and (43).
60 | rank = np.linalg.matrix_rank(A)
61 | if rank == 0:
62 | return np.nan * T
63 | elif rank == dim - 1:
64 | if np.linalg.det(U) * np.linalg.det(V) > 0:
65 | T[:dim, :dim] = np.dot(U, V)
66 | else:
67 | s = d[dim - 1]
68 | d[dim - 1] = -1
69 | T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V))
70 | d[dim - 1] = s
71 | else:
72 | T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V.T))
73 |
74 | if estimate_scale:
75 | # Eq. (41) and (42).
76 | scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d)
77 | else:
78 | scale = 1.0
79 |
80 | T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T)
81 | T[:dim, :dim] *= scale
82 |
83 | return T
84 |
85 |
--------------------------------------------------------------------------------
/legacy/utils.py:
--------------------------------------------------------------------------------
1 | from IPython.display import display
2 | from PIL import Image
3 | import cv2
4 | import numpy as np
5 | import os
6 |
7 | def get_image_paths(directory):
8 | return [x.path for x in os.scandir(directory) if x.name.endswith(".jpg") or x.name.endswith(".png")]
9 |
10 | def load_images(image_paths, convert=None):
11 | iter_all_images = (cv2.resize(cv2.imread(fn), (256,256)) for fn in image_paths)
12 | if convert:
13 | iter_all_images = (convert(img) for img in iter_all_images)
14 | for i,image in enumerate( iter_all_images ):
15 | if i == 0:
16 | all_images = np.empty((len(image_paths),) + image.shape, dtype=image.dtype)
17 | all_images[i] = image
18 | return all_images
19 |
20 | def get_transpose_axes( n ):
21 | if n % 2 == 0:
22 | y_axes = list(range(1, n-1, 2))
23 | x_axes = list(range(0, n-1, 2))
24 | else:
25 | y_axes = list(range(0, n-1, 2))
26 | x_axes = list(range(1, n-1, 2))
27 | return y_axes, x_axes, [n-1]
28 |
29 | def stack_images(images):
30 | images_shape = np.array(images.shape)
31 | new_axes = get_transpose_axes(len(images_shape))
32 | new_shape = [np.prod(images_shape[x]) for x in new_axes]
33 | return np.transpose(
34 | images,
35 | axes = np.concatenate(new_axes)
36 | ).reshape(new_shape)
37 |
38 | def showG(test_A, test_B, path_A, path_B, batchSize):
39 | figure_A = np.stack([
40 | test_A,
41 | np.squeeze(np.array([path_A([test_A[i:i+1]]) for i in range(test_A.shape[0])])),
42 | np.squeeze(np.array([path_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])),
43 | ], axis=1 )
44 | figure_B = np.stack([
45 | test_B,
46 | np.squeeze(np.array([path_B([test_B[i:i+1]]) for i in range(test_B.shape[0])])),
47 | np.squeeze(np.array([path_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])),
48 | ], axis=1 )
49 |
50 | figure = np.concatenate([figure_A, figure_B], axis=0)
51 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:])
52 | figure = stack_images(figure)
53 | figure = np.clip((figure + 1) * 255 / 2, 0, 255).astype('uint8')
54 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB)
55 |
56 | display(Image.fromarray(figure))
57 |
58 | def showG_mask(test_A, test_B, path_A, path_B, batchSize):
59 | figure_A = np.stack([
60 | test_A,
61 | (np.squeeze(np.array([path_A([test_A[i:i+1]]) for i in range(test_A.shape[0])])))*2-1,
62 | (np.squeeze(np.array([path_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])))*2-1,
63 | ], axis=1 )
64 | figure_B = np.stack([
65 | test_B,
66 | (np.squeeze(np.array([path_B([test_B[i:i+1]]) for i in range(test_B.shape[0])])))*2-1,
67 | (np.squeeze(np.array([path_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])))*2-1,
68 | ], axis=1 )
69 |
70 | figure = np.concatenate([figure_A, figure_B], axis=0)
71 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:])
72 | figure = stack_images(figure)
73 | figure = np.clip((figure + 1) * 255 / 2, 0, 255).astype('uint8')
74 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB)
75 |
76 | display(Image.fromarray(figure))
77 |
78 | def showG_eyes(test_A, test_B, bm_eyes_A, bm_eyes_B, batchSize):
79 | figure_A = np.stack([
80 | (test_A + 1)/2,
81 | bm_eyes_A,
82 | bm_eyes_A * (test_A + 1)/2,
83 | ], axis=1 )
84 | figure_B = np.stack([
85 | (test_B + 1)/2,
86 | bm_eyes_B,
87 | bm_eyes_B * (test_B+1)/2,
88 | ], axis=1 )
89 |
90 | figure = np.concatenate([figure_A, figure_B], axis=0)
91 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:])
92 | figure = stack_images(figure)
93 | figure = np.clip(figure * 255, 0, 255).astype('uint8')
94 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB)
95 |
96 | display(Image.fromarray(figure))
97 |
--------------------------------------------------------------------------------
/image_augmentation.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy
3 |
4 | from umeyama import umeyama
5 |
6 | def random_channel_shift(x, intensity=10, channel_axis=2):
7 | x = numpy.rollaxis(x, channel_axis, 0)
8 | min_x, max_x = numpy.min(x), numpy.max(x)
9 | intensity = max_x/255*15.
10 | channel_images = [numpy.clip(x_channel + numpy.random.uniform(-intensity, intensity), min_x, max_x) for x_channel in x]
11 | x = numpy.stack(channel_images, axis=0)
12 | x = numpy.rollaxis(x, 0, channel_axis + 1)
13 | return x
14 |
15 | def random_transform( image, rotation_range, zoom_range, shift_range, random_flip ):
16 | h,w = image.shape[0:2]
17 | #color_shifted_image = random_channel_shift(image)
18 | rotation = numpy.random.uniform( -rotation_range, rotation_range )
19 | scale = numpy.random.uniform( 1 - zoom_range, 1 + zoom_range )
20 | tx = numpy.random.uniform( -shift_range, shift_range ) * w
21 | ty = numpy.random.uniform( -shift_range, shift_range ) * h
22 | mat = cv2.getRotationMatrix2D( (w//2,h//2), rotation, scale )
23 | mat[:,2] += (tx,ty)
24 | result = cv2.warpAffine( image, mat, (w,h), borderMode=cv2.BORDER_REPLICATE )
25 | if numpy.random.random() < random_flip:
26 | result = result[:,::-1]
27 | return result
28 |
29 | # get pair of random warped images from aligened face image
30 | def random_warp( image ):
31 | assert image.shape == (256,256,3)
32 | #range_ = numpy.linspace( 128-80, 128+80, 5 )
33 | range_ = numpy.linspace( 128-110, 128+110, 5 )
34 | mapx = numpy.broadcast_to( range_, (5,5) )
35 | mapy = mapx.T
36 |
37 | mapx = mapx + numpy.random.normal( size=(5,5), scale=6 )
38 | mapy = mapy + numpy.random.normal( size=(5,5), scale=6 )
39 |
40 | interp_mapx = cv2.resize( mapx, (80,80) )[8:72,8:72].astype('float32')
41 | interp_mapy = cv2.resize( mapy, (80,80) )[8:72,8:72].astype('float32')
42 |
43 | warped_image = cv2.remap( image, interp_mapx, interp_mapy, cv2.INTER_LINEAR )
44 |
45 | src_points = numpy.stack( [ mapx.ravel(), mapy.ravel() ], axis=-1 )
46 | dst_points = numpy.mgrid[0:65:16,0:65:16].T.reshape(-1,2)
47 | mat = umeyama( src_points, dst_points, True )[0:2]
48 |
49 | target_image = cv2.warpAffine( image, mat, (64,64) )
50 |
51 | return warped_image, target_image
52 |
53 | # get pair of random warped images from aligened face image
54 | def random_warp128(image):
55 | assert image.shape == (256,256,3)
56 | range_ = numpy.linspace(128-110, 128+110, 5)
57 | mapx = numpy.broadcast_to(range_, (5,5))
58 | mapy = mapx.T
59 |
60 | mapx = mapx + numpy.random.normal(size=(5,5), scale=6)
61 | mapy = mapy + numpy.random.normal(size=(5,5), scale=6)
62 |
63 | interp_mapx = cv2.resize(mapx, (80*2,80*2))[8*2:72*2,8*2:72*2].astype('float32')
64 | interp_mapy = cv2.resize(mapy, (80*2,80*2))[8*2:72*2,8*2:72*2].astype('float32')
65 |
66 | warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR)
67 |
68 | src_points = numpy.stack([mapx.ravel(), mapy.ravel() ], axis=-1)
69 | dst_points = numpy.mgrid[0:65*2:16*2,0:65*2:16*2].T.reshape(-1,2)
70 | mat = umeyama(src_points, dst_points, True)[0:2]
71 |
72 | target_image = cv2.warpAffine(image, mat, (64*2,64*2))
73 |
74 | return warped_image, target_image
75 |
76 | # get pair of random warped images from aligened face image
77 | def random_warp224(image):
78 | assert image.shape == (256,256,3)
79 | range_ = numpy.linspace(128-110, 128+110, 5)
80 | mapx = numpy.broadcast_to(range_, (5,5))
81 | mapy = mapx.T
82 |
83 | mapx = mapx + numpy.random.normal(size=(5,5), scale=6)
84 | mapy = mapy + numpy.random.normal(size=(5,5), scale=6)
85 |
86 | interp_mapx = cv2.resize(mapx, (80*4,80*4))[8*4:72*4,8*4:72*4].astype('float32')
87 | interp_mapy = cv2.resize(mapy, (80*4,80*4))[8*4:72*4,8*4:72*4].astype('float32')
88 |
89 | warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR)
90 |
91 | src_points = numpy.stack([mapx.ravel(), mapy.ravel() ], axis=-1)
92 | dst_points = numpy.mgrid[0:65*4:16*4,0:65*4:16*4].T.reshape(-1,2)
93 | mat = umeyama(src_points, dst_points, True)[0:2]
94 |
95 | target_image = cv2.warpAffine(image, mat, (64*4,64*4))
96 |
97 | target_image = cv2.resize(target_image, (224,224))
98 | warped_image = cv2.resize(warped_image, (224,224))
99 |
100 | return warped_image, target_image
101 |
--------------------------------------------------------------------------------
/legacy/image_augmentation.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy
3 |
4 | from umeyama import umeyama
5 |
6 | def random_channel_shift(x, intensity=10, channel_axis=2):
7 | x = numpy.rollaxis(x, channel_axis, 0)
8 | min_x, max_x = numpy.min(x), numpy.max(x)
9 | intensity = max_x/255*15.
10 | channel_images = [numpy.clip(x_channel + numpy.random.uniform(-intensity, intensity), min_x, max_x) for x_channel in x]
11 | x = numpy.stack(channel_images, axis=0)
12 | x = numpy.rollaxis(x, 0, channel_axis + 1)
13 | return x
14 |
15 | def random_transform( image, rotation_range, zoom_range, shift_range, random_flip ):
16 | h,w = image.shape[0:2]
17 | #color_shifted_image = random_channel_shift(image)
18 | rotation = numpy.random.uniform( -rotation_range, rotation_range )
19 | scale = numpy.random.uniform( 1 - zoom_range, 1 + zoom_range )
20 | tx = numpy.random.uniform( -shift_range, shift_range ) * w
21 | ty = numpy.random.uniform( -shift_range, shift_range ) * h
22 | mat = cv2.getRotationMatrix2D( (w//2,h//2), rotation, scale )
23 | mat[:,2] += (tx,ty)
24 | result = cv2.warpAffine( image, mat, (w,h), borderMode=cv2.BORDER_REPLICATE )
25 | if numpy.random.random() < random_flip:
26 | result = result[:,::-1]
27 | return result
28 |
29 | # get pair of random warped images from aligened face image
30 | def random_warp( image ):
31 | assert image.shape == (256,256,3)
32 | #range_ = numpy.linspace( 128-80, 128+80, 5 )
33 | range_ = numpy.linspace( 128-110, 128+110, 5 )
34 | mapx = numpy.broadcast_to( range_, (5,5) )
35 | mapy = mapx.T
36 |
37 | mapx = mapx + numpy.random.normal( size=(5,5), scale=6 )
38 | mapy = mapy + numpy.random.normal( size=(5,5), scale=6 )
39 |
40 | interp_mapx = cv2.resize( mapx, (80,80) )[8:72,8:72].astype('float32')
41 | interp_mapy = cv2.resize( mapy, (80,80) )[8:72,8:72].astype('float32')
42 |
43 | warped_image = cv2.remap( image, interp_mapx, interp_mapy, cv2.INTER_LINEAR )
44 |
45 | src_points = numpy.stack( [ mapx.ravel(), mapy.ravel() ], axis=-1 )
46 | dst_points = numpy.mgrid[0:65:16,0:65:16].T.reshape(-1,2)
47 | mat = umeyama( src_points, dst_points, True )[0:2]
48 |
49 | target_image = cv2.warpAffine( image, mat, (64,64) )
50 |
51 | return warped_image, target_image
52 |
53 | # get pair of random warped images from aligened face image
54 | def random_warp128(image):
55 | assert image.shape == (256,256,3)
56 | range_ = numpy.linspace(128-110, 128+110, 5)
57 | mapx = numpy.broadcast_to(range_, (5,5))
58 | mapy = mapx.T
59 |
60 | mapx = mapx + numpy.random.normal(size=(5,5), scale=6)
61 | mapy = mapy + numpy.random.normal(size=(5,5), scale=6)
62 |
63 | interp_mapx = cv2.resize(mapx, (80*2,80*2))[8*2:72*2,8*2:72*2].astype('float32')
64 | interp_mapy = cv2.resize(mapy, (80*2,80*2))[8*2:72*2,8*2:72*2].astype('float32')
65 |
66 | warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR)
67 |
68 | src_points = numpy.stack([mapx.ravel(), mapy.ravel() ], axis=-1)
69 | dst_points = numpy.mgrid[0:65*2:16*2,0:65*2:16*2].T.reshape(-1,2)
70 | mat = umeyama(src_points, dst_points, True)[0:2]
71 |
72 | target_image = cv2.warpAffine(image, mat, (64*2,64*2))
73 |
74 | return warped_image, target_image
75 |
76 | # get pair of random warped images from aligened face image
77 | def random_warp224(image):
78 | assert image.shape == (256,256,3)
79 | range_ = numpy.linspace(128-110, 128+110, 5)
80 | mapx = numpy.broadcast_to(range_, (5,5))
81 | mapy = mapx.T
82 |
83 | mapx = mapx + numpy.random.normal(size=(5,5), scale=6)
84 | mapy = mapy + numpy.random.normal(size=(5,5), scale=6)
85 |
86 | interp_mapx = cv2.resize(mapx, (80*4,80*4))[8*4:72*4,8*4:72*4].astype('float32')
87 | interp_mapy = cv2.resize(mapy, (80*4,80*4))[8*4:72*4,8*4:72*4].astype('float32')
88 |
89 | warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR)
90 |
91 | src_points = numpy.stack([mapx.ravel(), mapy.ravel() ], axis=-1)
92 | dst_points = numpy.mgrid[0:65*4:16*4,0:65*4:16*4].T.reshape(-1,2)
93 | mat = umeyama(src_points, dst_points, True)[0:2]
94 |
95 | target_image = cv2.warpAffine(image, mat, (64*4,64*4))
96 |
97 | target_image = cv2.resize(target_image, (224,224))
98 | warped_image = cv2.resize(warped_image, (224,224))
99 |
100 | return warped_image, target_image
101 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | from IPython.display import display
2 | from PIL import Image
3 | import numpy as np
4 | import cv2
5 | import os
6 | import yaml
7 |
8 | def get_image_paths(directory):
9 | return [x.path for x in os.scandir(directory) if x.name.endswith(".jpg") or x.name.endswith(".png")]
10 |
11 | def load_images(image_paths, convert=None):
12 | iter_all_images = (cv2.resize(cv2.imread(fn), (256,256)) for fn in image_paths)
13 | if convert:
14 | iter_all_images = (convert(img) for img in iter_all_images)
15 | for i,image in enumerate( iter_all_images ):
16 | if i == 0:
17 | all_images = np.empty((len(image_paths),) + image.shape, dtype=image.dtype)
18 | all_images[i] = image
19 | return all_images
20 |
21 | def get_transpose_axes( n ):
22 | if n % 2 == 0:
23 | y_axes = list(range(1, n-1, 2))
24 | x_axes = list(range(0, n-1, 2))
25 | else:
26 | y_axes = list(range(0, n-1, 2))
27 | x_axes = list(range(1, n-1, 2))
28 | return y_axes, x_axes, [n-1]
29 |
30 | def stack_images(images):
31 | images_shape = np.array(images.shape)
32 | new_axes = get_transpose_axes(len(images_shape))
33 | new_shape = [np.prod(images_shape[x]) for x in new_axes]
34 | return np.transpose(
35 | images,
36 | axes = np.concatenate(new_axes)
37 | ).reshape(new_shape)
38 |
39 | def showG(test_A, test_B, path_A, path_B, batchSize):
40 | figure_A = np.stack([
41 | test_A,
42 | np.squeeze(np.array([path_A([test_A[i:i+1]]) for i in range(test_A.shape[0])])),
43 | np.squeeze(np.array([path_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])),
44 | ], axis=1 )
45 | figure_B = np.stack([
46 | test_B,
47 | np.squeeze(np.array([path_B([test_B[i:i+1]]) for i in range(test_B.shape[0])])),
48 | np.squeeze(np.array([path_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])),
49 | ], axis=1 )
50 |
51 | figure = np.concatenate([figure_A, figure_B], axis=0)
52 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:])
53 | figure = stack_images(figure)
54 | figure = np.clip((figure + 1) * 255 / 2, 0, 255).astype('uint8')
55 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB)
56 | display(Image.fromarray(figure))
57 |
58 | def showG_mask(test_A, test_B, path_A, path_B, batchSize):
59 | figure_A = np.stack([
60 | test_A,
61 | (np.squeeze(np.array([path_A([test_A[i:i+1]]) for i in range(test_A.shape[0])])))*2-1,
62 | (np.squeeze(np.array([path_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])))*2-1,
63 | ], axis=1 )
64 | figure_B = np.stack([
65 | test_B,
66 | (np.squeeze(np.array([path_B([test_B[i:i+1]]) for i in range(test_B.shape[0])])))*2-1,
67 | (np.squeeze(np.array([path_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])))*2-1,
68 | ], axis=1 )
69 |
70 | figure = np.concatenate([figure_A, figure_B], axis=0)
71 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:])
72 | figure = stack_images(figure)
73 | figure = np.clip((figure + 1) * 255 / 2, 0, 255).astype('uint8')
74 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB)
75 | display(Image.fromarray(figure))
76 |
77 | def showG_eyes(test_A, test_B, bm_eyes_A, bm_eyes_B, batchSize):
78 | figure_A = np.stack([
79 | (test_A + 1)/2,
80 | bm_eyes_A,
81 | bm_eyes_A * (test_A + 1)/2,
82 | ], axis=1 )
83 | figure_B = np.stack([
84 | (test_B + 1)/2,
85 | bm_eyes_B,
86 | bm_eyes_B * (test_B+1)/2,
87 | ], axis=1 )
88 |
89 | figure = np.concatenate([figure_A, figure_B], axis=0)
90 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:])
91 | figure = stack_images(figure)
92 | figure = np.clip(figure * 255, 0, 255).astype('uint8')
93 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB)
94 |
95 | display(Image.fromarray(figure))
96 |
97 | def save_preview_image(test_A, test_B,
98 | path_A, path_B,
99 | path_bgr_A, path_bgr_B,
100 | path_mask_A, path_mask_B,
101 | batchSize, save_fn="preview.jpg"):
102 | figure_A = np.stack([
103 | test_A,
104 | np.squeeze(np.array([path_bgr_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])),
105 | (np.squeeze(np.array([path_mask_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])))*2-1,
106 | np.squeeze(np.array([path_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])),
107 | ], axis=1 )
108 | figure_B = np.stack([
109 | test_B,
110 | np.squeeze(np.array([path_bgr_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])),
111 | (np.squeeze(np.array([path_mask_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])))*2-1,
112 | np.squeeze(np.array([path_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])),
113 | ], axis=1 )
114 |
115 | figure = np.concatenate([figure_A, figure_B], axis=0)
116 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:])
117 | figure = stack_images(figure)
118 | figure = np.clip((figure + 1) * 255 / 2, 0, 255).astype('uint8')
119 | cv2.imwrite(save_fn, figure)
120 |
121 | def load_yaml(path_configs):
122 | with open(path_configs, 'r') as f:
123 | return yaml.load(f)
124 |
125 | def show_loss_config(loss_config):
126 | """
127 | Print out loss configuration. Called in loss function automation.
128 |
129 | Argument:
130 | loss_config: A dictionary. Configuration regarding the optimization.
131 | """
132 | for config, value in loss_config.items():
133 | print(f"{config} = {value}")
134 |
--------------------------------------------------------------------------------
/networks/GroupNormalization.py:
--------------------------------------------------------------------------------
1 | from keras.engine import Layer, InputSpec
2 | from keras import initializers, regularizers
3 | from keras import backend as K
4 | from keras.utils import conv_utils
5 |
6 | try:
7 | from keras.utils.conv_utils import normalize_data_format
8 | except:
9 | from keras.backend.common import normalize_data_format
10 |
11 | def to_list(x):
12 | if type(x) not in [list, tuple]:
13 | return [x]
14 | else:
15 | return list(x)
16 |
17 | class GroupNormalization(Layer):
18 | def __init__(self, axis=-1,
19 | gamma_init='one', beta_init='zero',
20 | gamma_regularizer=None, beta_regularizer=None,
21 | epsilon=1e-6,
22 | group=32,
23 | data_format=None,
24 | **kwargs):
25 | super(GroupNormalization, self).__init__(**kwargs)
26 |
27 | self.axis = to_list(axis)
28 | self.gamma_init = initializers.get(gamma_init)
29 | self.beta_init = initializers.get(beta_init)
30 | self.gamma_regularizer = regularizers.get(gamma_regularizer)
31 | self.beta_regularizer = regularizers.get(beta_regularizer)
32 | self.epsilon = epsilon
33 | self.group = group
34 | self.data_format = normalize_data_format(data_format)
35 |
36 | self.supports_masking = True
37 |
38 | def build(self, input_shape):
39 | self.input_spec = [InputSpec(shape=input_shape)]
40 | shape = [1 for _ in input_shape]
41 | if self.data_format == 'channels_last':
42 | channel_axis = -1
43 | shape[channel_axis] = input_shape[channel_axis]
44 | elif self.data_format == 'channels_first':
45 | channel_axis = 1
46 | shape[channel_axis] = input_shape[channel_axis]
47 | #for i in self.axis:
48 | # shape[i] = input_shape[i]
49 | self.gamma = self.add_weight(shape=shape,
50 | initializer=self.gamma_init,
51 | regularizer=self.gamma_regularizer,
52 | name='gamma')
53 | self.beta = self.add_weight(shape=shape,
54 | initializer=self.beta_init,
55 | regularizer=self.beta_regularizer,
56 | name='beta')
57 | self.built = True
58 |
59 | def call(self, inputs, mask=None):
60 | input_shape = K.int_shape(inputs)
61 | if len(input_shape) != 4 and len(input_shape) != 2:
62 | raise ValueError('Inputs should have rank ' +
63 | str(4) + " or " + str(2) +
64 | '; Received input shape:', str(input_shape))
65 |
66 | if len(input_shape) == 4:
67 | if self.data_format == 'channels_last':
68 | batch_size, h, w, c = input_shape
69 | if batch_size is None:
70 | batch_size = -1
71 |
72 | if c < self.group:
73 | raise ValueError('Input channels should be larger than group size' +
74 | '; Received input channels: ' + str(c) +
75 | '; Group size: ' + str(self.group)
76 | )
77 |
78 | x = K.reshape(inputs, (batch_size, h, w, self.group, c // self.group))
79 | mean = K.mean(x, axis=[1, 2, 4], keepdims=True)
80 | std = K.sqrt(K.var(x, axis=[1, 2, 4], keepdims=True) + self.epsilon)
81 | x = (x - mean) / std
82 |
83 | x = K.reshape(x, (batch_size, h, w, c))
84 | return self.gamma * x + self.beta
85 | elif self.data_format == 'channels_first':
86 | batch_size, c, h, w = input_shape
87 | if batch_size is None:
88 | batch_size = -1
89 |
90 | if c < self.group:
91 | raise ValueError('Input channels should be larger than group size' +
92 | '; Received input channels: ' + str(c) +
93 | '; Group size: ' + str(self.group)
94 | )
95 |
96 | x = K.reshape(inputs, (batch_size, self.group, c // self.group, h, w))
97 | mean = K.mean(x, axis=[2, 3, 4], keepdims=True)
98 | std = K.sqrt(K.var(x, axis=[2, 3, 4], keepdims=True) + self.epsilon)
99 | x = (x - mean) / std
100 |
101 | x = K.reshape(x, (batch_size, c, h, w))
102 | return self.gamma * x + self.beta
103 |
104 | elif len(input_shape) == 2:
105 | reduction_axes = list(range(0, len(input_shape)))
106 | del reduction_axes[0]
107 | batch_size, _ = input_shape
108 | if batch_size is None:
109 | batch_size = -1
110 |
111 | mean = K.mean(inputs, keepdims=True)
112 | std = K.sqrt(K.var(inputs, keepdims=True) + self.epsilon)
113 | x = (inputs - mean) / std
114 |
115 | return self.gamma * x + self.beta
116 |
117 |
118 | def get_config(self):
119 | config = {'epsilon': self.epsilon,
120 | 'axis': self.axis,
121 | 'gamma_init': initializers.serialize(self.gamma_init),
122 | 'beta_init': initializers.serialize(self.beta_init),
123 | 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
124 | 'beta_regularizer': regularizers.serialize(self.gamma_regularizer),
125 | 'group': self.group
126 | }
127 | base_config = super(GroupNormalization, self).get_config()
128 | return dict(list(base_config.items()) + list(config.items()))
129 |
--------------------------------------------------------------------------------
/detector/face_detector.py:
--------------------------------------------------------------------------------
1 | import mtcnn_detect_face
2 | import tensorflow as tf
3 | from keras import backend as K
4 | import numpy as np
5 | import cv2
6 | import os
7 |
8 | class MTCNNFaceDetector():
9 | """
10 | This class load the MTCNN network and perform face detection.
11 |
12 | Attributes:
13 | model_path: path to the MTCNN weights files
14 | """
15 | def __init__(self, sess, model_path="./mtcnn_weights/"):
16 | self.pnet = None
17 | self.rnet = None
18 | self.onet = None
19 | self.create_mtcnn(sess, model_path)
20 |
21 | def create_mtcnn(self, sess, model_path):
22 | if not model_path:
23 | model_path, _ = os.path.split(os.path.realpath(__file__))
24 |
25 | with tf.variable_scope('pnet'):
26 | data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
27 | pnet = mtcnn_detect_face.PNet({'data':data})
28 | pnet.load(os.path.join(model_path, 'det1.npy'), sess)
29 | with tf.variable_scope('rnet'):
30 | data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
31 | rnet = mtcnn_detect_face.RNet({'data':data})
32 | rnet.load(os.path.join(model_path, 'det2.npy'), sess)
33 | with tf.variable_scope('onet'):
34 | data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
35 | onet = mtcnn_detect_face.ONet({'data':data})
36 | onet.load(os.path.join(model_path, 'det3.npy'), sess)
37 | self.pnet = K.function([pnet.layers['data']], [pnet.layers['conv4-2'], pnet.layers['prob1']])
38 | self.rnet = K.function([rnet.layers['data']], [rnet.layers['conv5-2'], rnet.layers['prob1']])
39 | self.onet = K.function([onet.layers['data']], [onet.layers['conv6-2'], onet.layers['conv6-3'], onet.layers['prob1']])
40 |
41 | def detect_face(self, image, minsize=20, threshold=0.7, factor=0.709, use_auto_downscaling=True, min_face_area=25*25):
42 | if use_auto_downscaling:
43 | image, scale_factor = self.auto_downscale(image)
44 |
45 | faces, pnts = mtcnn_detect_face.detect_face(
46 | image, minsize,
47 | self.pnet, self.rnet, self.onet,
48 | [0.6, 0.7, threshold],
49 | factor)
50 | faces = self.process_mtcnn_bbox(faces, image.shape)
51 | faces, pnts = self.remove_small_faces(faces, pnts, min_face_area)
52 |
53 | if use_auto_downscaling:
54 | faces = self.calibrate_coord(faces, scale_factor)
55 | pnts = self.calibrate_landmarks(pnts, scale_factor)
56 | return faces, pnts
57 |
58 | def auto_downscale(self, image):
59 | if self.is_higher_than_1080p(image):
60 | scale_factor = 4
61 | resized_image = cv2.resize(image,
62 | (image.shape[1]//scale_factor,
63 | image.shape[0]//scale_factor))
64 | elif self.is_higher_than_720p(image):
65 | scale_factor = 3
66 | resized_image = cv2.resize(image,
67 | (image.shape[1]//scale_factor,
68 | image.shape[0]//scale_factor))
69 | elif self.is_higher_than_480p(image):
70 | scale_factor = 2
71 | resized_image = cv2.resize(image,
72 | (image.shape[1]//scale_factor,
73 | image.shape[0]//scale_factor))
74 | else:
75 | scale_factor = 1
76 | resized_image = image.copy()
77 | return resized_image, scale_factor
78 |
79 | @staticmethod
80 | def is_higher_than_480p(x):
81 | return (x.shape[0] * x.shape[1]) >= (858*480)
82 |
83 | @staticmethod
84 | def is_higher_than_720p(x):
85 | return (x.shape[0] * x.shape[1]) >= (1280*720)
86 |
87 | @staticmethod
88 | def is_higher_than_1080p(x):
89 | return (x.shape[0] * x.shape[1]) >= (1920*1080)
90 |
91 | @staticmethod
92 | def process_mtcnn_bbox(bboxes, im_shape):
93 | # output bbox coordinate of MTCNN is (y0, x0, y1, x1)
94 | # Here we process the bbox coord. to a square bbox with ordering (x0, y1, x1, y0)
95 | for i, bbox in enumerate(bboxes):
96 | y0, x0, y1, x1 = bboxes[i,0:4]
97 | w = int(y1 - y0)
98 | h = int(x1 - x0)
99 | length = (w + h)/2
100 | center = (int((x1+x0)/2),int((y1+y0)/2))
101 | new_x0 = np.max([0, (center[0]-length//2)])#.astype(np.int32)
102 | new_x1 = np.min([im_shape[0], (center[0]+length//2)])#.astype(np.int32)
103 | new_y0 = np.max([0, (center[1]-length//2)])#.astype(np.int32)
104 | new_y1 = np.min([im_shape[1], (center[1]+length//2)])#.astype(np.int32)
105 | bboxes[i,0:4] = new_x0, new_y1, new_x1, new_y0
106 | return bboxes
107 |
108 | @staticmethod
109 | def calibrate_coord(faces, scale_factor):
110 | for i, (x0, y1, x1, y0, _) in enumerate(faces):
111 | faces[i] = (x0*scale_factor, y1*scale_factor,
112 | x1*scale_factor, y0*scale_factor, _)
113 | return faces
114 |
115 | @staticmethod
116 | def calibrate_landmarks(pnts, scale_factor):
117 | # pnts is a numpy array
118 | return np.array([xy * scale_factor for xy in pnts])
119 |
120 | @staticmethod
121 | def remove_small_faces(faces, pnts, min_area=25*25):
122 | def compute_area(face_coord):
123 | x0, y1, x1, y0, _ = face_coord
124 | area = np.abs((x1 - x0) * (y1 - y0))
125 | return area
126 |
127 | new_faces = []
128 | new_pnts = []
129 | # faces has shape (num_faces, coord), and pnts has shape (coord, num_faces)
130 | for face,pnt in zip(faces, pnts.transpose()):
131 | if compute_area(face) >= min_area:
132 | new_faces.append(face)
133 | new_pnts.append(pnt)
134 | new_faces = np.array(new_faces)
135 | new_pnts = np.array(new_pnts).transpose()
136 | return new_faces, new_pnts
--------------------------------------------------------------------------------
/converter/face_transformer.py:
--------------------------------------------------------------------------------
1 | from .color_correction import *
2 | import cv2
3 | import numpy as np
4 |
5 |
6 | class FaceTransformer(object):
7 | """
8 | Attributes:
9 | path_func: string, direction for the transformation: either AtoB or BtoA.
10 | model: the generator of the faceswap-GAN model
11 | """
12 | def __init__(self):
13 | self.path_func = None
14 | self.model = None
15 |
16 | self.inp_img = None
17 | self.input_size = None
18 | self.img_bgr = None
19 | self.roi = None
20 | self.roi_size = None
21 | self.ae_input = None
22 | self.ae_output = None
23 | self.ae_output_masked = None
24 | self.ae_output_bgr = None
25 | self.ae_output_a = None
26 | self.result = None
27 | self.result_rawRGB = None
28 | self.result_alpha = None
29 |
30 | def set_model(self, model):
31 | self.model = model
32 |
33 | def _preprocess_inp_img(self, inp_img, roi_coverage, IMAGE_SHAPE):
34 | img_bgr = cv2.cvtColor(inp_img, cv2.COLOR_RGB2BGR)
35 | input_size = img_bgr.shape
36 | roi_x, roi_y = int(input_size[0]*(1-roi_coverage)), int(input_size[1]*(1-roi_coverage))
37 | roi = img_bgr[roi_x:-roi_x, roi_y:-roi_y,:] # BGR, [0, 255]
38 | roi_size = roi.shape
39 | ae_input = cv2.resize(roi, IMAGE_SHAPE[:2])/255. * 2 - 1 # BGR, [-1, 1]
40 | self.img_bgr = img_bgr
41 | self.input_size = input_size
42 | self.roi = roi
43 | self.roi_size = roi_size
44 | self.ae_input = ae_input
45 |
46 | def _ae_forward_pass(self, ae_input):
47 | ae_out = self.path_func([[ae_input]])
48 | self.ae_output = np.squeeze(np.array([ae_out]))
49 |
50 | def _postprocess_roi_img(self, ae_output, roi, roi_size, color_correction):
51 | ae_output_a = ae_output[:,:,0] * 255
52 | ae_output_a = cv2.resize(ae_output_a, (roi_size[1],roi_size[0]))[...,np.newaxis]
53 | ae_output_bgr = np.clip( (ae_output[:,:,1:] + 1) * 255 / 2, 0, 255)
54 | ae_output_bgr = cv2.resize(ae_output_bgr, (roi_size[1],roi_size[0]))
55 | ae_output_masked = (ae_output_a/255 * ae_output_bgr + (1 - ae_output_a/255) * roi).astype('uint8') # BGR, [0, 255]
56 | self.ae_output_a = ae_output_a
57 | if color_correction == "adain":
58 | self.ae_output_masked = adain(ae_output_masked, roi)
59 | self.ae_output_bgr = adain(ae_output_bgr, roi)
60 | elif color_correction == "adain_xyz":
61 | self.ae_output_masked = adain(ae_output_masked, roi, color_space="XYZ")
62 | self.ae_output_bgr = adain(ae_output_bgr, roi, color_space="XYZ")
63 | elif color_correction == "hist_match":
64 | self.ae_output_masked = color_hist_match(ae_output_masked, roi)
65 | self.ae_output_bgr = color_hist_match(ae_output_bgr, roi)
66 | else:
67 | self.ae_output_masked = ae_output_masked
68 | self.ae_output_bgr = ae_output_bgr
69 |
70 | def _merge_img_and_mask(self, ae_output_bgr, ae_output_masked, input_size, roi, roi_coverage):
71 | blend_mask = self.get_feather_edges_mask(roi, roi_coverage)
72 | blended_img = blend_mask/255 * ae_output_masked + (1-blend_mask/255) * roi
73 | result = self.img_bgr.copy()
74 | roi_x, roi_y = int(input_size[0]*(1-roi_coverage)), int(input_size[1]*(1-roi_coverage))
75 | result[roi_x:-roi_x, roi_y:-roi_y,:] = blended_img
76 | result_rawRGB = self.img_bgr.copy()
77 | result_rawRGB[roi_x:-roi_x, roi_y:-roi_y,:] = ae_output_bgr
78 | result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
79 | result_rawRGB = cv2.cvtColor(result_rawRGB, cv2.COLOR_BGR2RGB)
80 | result_alpha = np.zeros_like(self.img_bgr)
81 | result_alpha[roi_x:-roi_x, roi_y:-roi_y,:] = (blend_mask/255) * self.ae_output_a
82 | self.result = result
83 | self.result_rawRGB = result_rawRGB
84 | self.result_alpha = result_alpha
85 |
86 | @staticmethod
87 | def get_feather_edges_mask(img, roi_coverage):
88 | img_size = img.shape
89 | mask = np.zeros_like(img)
90 | roi_x, roi_y = int(img_size[0]*(1-roi_coverage)), int(img_size[1]*(1-roi_coverage))
91 | mask[roi_x:-roi_x, roi_y:-roi_y,:] = 255
92 | mask = cv2.GaussianBlur(mask,(15,15),10)
93 | return mask
94 |
95 | def transform(self, inp_img, direction, roi_coverage, color_correction, IMAGE_SHAPE):
96 | self.check_generator_model(self.model)
97 | self.check_roi_coverage(inp_img, roi_coverage)
98 |
99 | if direction == "AtoB":
100 | self.path_func = self.model.path_abgr_B
101 | elif direction == "BtoA":
102 | self.path_func = self.model.path_abgr_A
103 | else:
104 | raise ValueError(f"direction should be either AtoB or BtoA, recieved {direction}.")
105 |
106 | self.inp_img = inp_img
107 |
108 | # pre-process input image
109 | # Set 5 members: self.img_bgr, self.input_size, self.roi, self.roi_size, self.ae_input
110 | self._preprocess_inp_img(self.inp_img, roi_coverage, IMAGE_SHAPE)
111 |
112 | # model inference
113 | # Set 1 member: self.ae_output
114 | self._ae_forward_pass(self.ae_input)
115 |
116 | # post-process transformed roi image
117 | # Set 3 members: self.ae_output_a, self.ae_output_masked, self.ae_output_bgr
118 | self._postprocess_roi_img(self.ae_output, self.roi, self.roi_size, color_correction)
119 |
120 | # merge transformed output back to input image
121 | # Set 3 members: self.result, self.result_rawRGB, self.result_alpha
122 | self._merge_img_and_mask(self.ae_output_bgr, self.ae_output_masked,
123 | self.input_size, self.roi, roi_coverage)
124 |
125 | return self.result, self.result_rawRGB, self.result_alpha
126 |
127 | @staticmethod
128 | def check_generator_model(model):
129 | if model is None:
130 | raise ValueError(f"Generator model has not been set.")
131 |
132 | @staticmethod
133 | def check_roi_coverage(inp_img, roi_coverage):
134 | input_size = inp_img.shape
135 | roi_x, roi_y = int(input_size[0]*(1-roi_coverage)), int(input_size[1]*(1-roi_coverage))
136 | if roi_x == 0 or roi_y == 0:
137 | raise ValueError("Error occurs when cropping roi image. \
138 | Consider increasing min_face_area or decreasing roi_coverage.")
139 |
--------------------------------------------------------------------------------
/legacy/instance_normalization.py:
--------------------------------------------------------------------------------
1 | from keras.engine import Layer, InputSpec
2 | from keras import initializers, regularizers, constraints
3 | from keras import backend as K
4 | from keras.utils.generic_utils import get_custom_objects
5 |
6 | import numpy as np
7 |
8 |
9 | class InstanceNormalization(Layer):
10 | """Instance normalization layer (Lei Ba et al, 2016, Ulyanov et al., 2016).
11 | Normalize the activations of the previous layer at each step,
12 | i.e. applies a transformation that maintains the mean activation
13 | close to 0 and the activation standard deviation close to 1.
14 | # Arguments
15 | axis: Integer, the axis that should be normalized
16 | (typically the features axis).
17 | For instance, after a `Conv2D` layer with
18 | `data_format="channels_first"`,
19 | set `axis=1` in `InstanceNormalization`.
20 | Setting `axis=None` will normalize all values in each instance of the batch.
21 | Axis 0 is the batch dimension. `axis` cannot be set to 0 to avoid errors.
22 | epsilon: Small float added to variance to avoid dividing by zero.
23 | center: If True, add offset of `beta` to normalized tensor.
24 | If False, `beta` is ignored.
25 | scale: If True, multiply by `gamma`.
26 | If False, `gamma` is not used.
27 | When the next layer is linear (also e.g. `nn.relu`),
28 | this can be disabled since the scaling
29 | will be done by the next layer.
30 | beta_initializer: Initializer for the beta weight.
31 | gamma_initializer: Initializer for the gamma weight.
32 | beta_regularizer: Optional regularizer for the beta weight.
33 | gamma_regularizer: Optional regularizer for the gamma weight.
34 | beta_constraint: Optional constraint for the beta weight.
35 | gamma_constraint: Optional constraint for the gamma weight.
36 | # Input shape
37 | Arbitrary. Use the keyword argument `input_shape`
38 | (tuple of integers, does not include the samples axis)
39 | when using this layer as the first layer in a model.
40 | # Output shape
41 | Same shape as input.
42 | # References
43 | - [Layer Normalization](https://arxiv.org/abs/1607.06450)
44 | - [Instance Normalization: The Missing Ingredient for Fast Stylization](https://arxiv.org/abs/1607.08022)
45 | """
46 | def __init__(self,
47 | axis=None,
48 | epsilon=1e-3,
49 | center=True,
50 | scale=True,
51 | beta_initializer='zeros',
52 | gamma_initializer='ones',
53 | beta_regularizer=None,
54 | gamma_regularizer=None,
55 | beta_constraint=None,
56 | gamma_constraint=None,
57 | **kwargs):
58 | super(InstanceNormalization, self).__init__(**kwargs)
59 | self.supports_masking = True
60 | self.axis = axis
61 | self.epsilon = epsilon
62 | self.center = center
63 | self.scale = scale
64 | self.beta_initializer = initializers.get(beta_initializer)
65 | self.gamma_initializer = initializers.get(gamma_initializer)
66 | self.beta_regularizer = regularizers.get(beta_regularizer)
67 | self.gamma_regularizer = regularizers.get(gamma_regularizer)
68 | self.beta_constraint = constraints.get(beta_constraint)
69 | self.gamma_constraint = constraints.get(gamma_constraint)
70 |
71 | def build(self, input_shape):
72 | ndim = len(input_shape)
73 | if self.axis == 0:
74 | raise ValueError('Axis cannot be zero')
75 |
76 | if (self.axis is not None) and (ndim == 2):
77 | raise ValueError('Cannot specify axis for rank 1 tensor')
78 |
79 | self.input_spec = InputSpec(ndim=ndim)
80 |
81 | if self.axis is None:
82 | shape = (1,)
83 | else:
84 | shape = (input_shape[self.axis],)
85 |
86 | if self.scale:
87 | self.gamma = self.add_weight(shape=shape,
88 | name='gamma',
89 | initializer=self.gamma_initializer,
90 | regularizer=self.gamma_regularizer,
91 | constraint=self.gamma_constraint)
92 | else:
93 | self.gamma = None
94 | if self.center:
95 | self.beta = self.add_weight(shape=shape,
96 | name='beta',
97 | initializer=self.beta_initializer,
98 | regularizer=self.beta_regularizer,
99 | constraint=self.beta_constraint)
100 | else:
101 | self.beta = None
102 | self.built = True
103 |
104 | def call(self, inputs, training=None):
105 | input_shape = K.int_shape(inputs)
106 | reduction_axes = list(range(0, len(input_shape)))
107 |
108 | if (self.axis is not None):
109 | del reduction_axes[self.axis]
110 |
111 | del reduction_axes[0]
112 |
113 | mean = K.mean(inputs, reduction_axes, keepdims=True)
114 | stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon
115 | normed = (inputs - mean) / stddev
116 |
117 | broadcast_shape = [1] * len(input_shape)
118 | if self.axis is not None:
119 | broadcast_shape[self.axis] = input_shape[self.axis]
120 |
121 | if self.scale:
122 | broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
123 | normed = normed * broadcast_gamma
124 | if self.center:
125 | broadcast_beta = K.reshape(self.beta, broadcast_shape)
126 | normed = normed + broadcast_beta
127 | return normed
128 |
129 | def get_config(self):
130 | config = {
131 | 'axis': self.axis,
132 | 'epsilon': self.epsilon,
133 | 'center': self.center,
134 | 'scale': self.scale,
135 | 'beta_initializer': initializers.serialize(self.beta_initializer),
136 | 'gamma_initializer': initializers.serialize(self.gamma_initializer),
137 | 'beta_regularizer': regularizers.serialize(self.beta_regularizer),
138 | 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
139 | 'beta_constraint': constraints.serialize(self.beta_constraint),
140 | 'gamma_constraint': constraints.serialize(self.gamma_constraint)
141 | }
142 | base_config = super(InstanceNormalization, self).get_config()
143 | return dict(list(base_config.items()) + list(config.items()))
144 |
145 | get_custom_objects().update({'InstanceNormalization': InstanceNormalization})
146 |
--------------------------------------------------------------------------------
/networks/instance_normalization.py:
--------------------------------------------------------------------------------
1 | from keras.engine import Layer, InputSpec
2 | from keras import initializers, regularizers, constraints
3 | from keras import backend as K
4 | from keras.utils.generic_utils import get_custom_objects
5 |
6 | import numpy as np
7 |
8 |
9 | class InstanceNormalization(Layer):
10 | """Instance normalization layer (Lei Ba et al, 2016, Ulyanov et al., 2016).
11 | Normalize the activations of the previous layer at each step,
12 | i.e. applies a transformation that maintains the mean activation
13 | close to 0 and the activation standard deviation close to 1.
14 | # Arguments
15 | axis: Integer, the axis that should be normalized
16 | (typically the features axis).
17 | For instance, after a `Conv2D` layer with
18 | `data_format="channels_first"`,
19 | set `axis=1` in `InstanceNormalization`.
20 | Setting `axis=None` will normalize all values in each instance of the batch.
21 | Axis 0 is the batch dimension. `axis` cannot be set to 0 to avoid errors.
22 | epsilon: Small float added to variance to avoid dividing by zero.
23 | center: If True, add offset of `beta` to normalized tensor.
24 | If False, `beta` is ignored.
25 | scale: If True, multiply by `gamma`.
26 | If False, `gamma` is not used.
27 | When the next layer is linear (also e.g. `nn.relu`),
28 | this can be disabled since the scaling
29 | will be done by the next layer.
30 | beta_initializer: Initializer for the beta weight.
31 | gamma_initializer: Initializer for the gamma weight.
32 | beta_regularizer: Optional regularizer for the beta weight.
33 | gamma_regularizer: Optional regularizer for the gamma weight.
34 | beta_constraint: Optional constraint for the beta weight.
35 | gamma_constraint: Optional constraint for the gamma weight.
36 | # Input shape
37 | Arbitrary. Use the keyword argument `input_shape`
38 | (tuple of integers, does not include the samples axis)
39 | when using this layer as the first layer in a model.
40 | # Output shape
41 | Same shape as input.
42 | # References
43 | - [Layer Normalization](https://arxiv.org/abs/1607.06450)
44 | - [Instance Normalization: The Missing Ingredient for Fast Stylization](https://arxiv.org/abs/1607.08022)
45 | """
46 | def __init__(self,
47 | axis=None,
48 | epsilon=1e-3,
49 | center=True,
50 | scale=True,
51 | beta_initializer='zeros',
52 | gamma_initializer='ones',
53 | beta_regularizer=None,
54 | gamma_regularizer=None,
55 | beta_constraint=None,
56 | gamma_constraint=None,
57 | **kwargs):
58 | super(InstanceNormalization, self).__init__(**kwargs)
59 | self.supports_masking = True
60 | self.axis = axis
61 | self.epsilon = epsilon
62 | self.center = center
63 | self.scale = scale
64 | self.beta_initializer = initializers.get(beta_initializer)
65 | self.gamma_initializer = initializers.get(gamma_initializer)
66 | self.beta_regularizer = regularizers.get(beta_regularizer)
67 | self.gamma_regularizer = regularizers.get(gamma_regularizer)
68 | self.beta_constraint = constraints.get(beta_constraint)
69 | self.gamma_constraint = constraints.get(gamma_constraint)
70 |
71 | def build(self, input_shape):
72 | ndim = len(input_shape)
73 | if self.axis == 0:
74 | raise ValueError('Axis cannot be zero')
75 |
76 | if (self.axis is not None) and (ndim == 2):
77 | raise ValueError('Cannot specify axis for rank 1 tensor')
78 |
79 | self.input_spec = InputSpec(ndim=ndim)
80 |
81 | if self.axis is None:
82 | shape = (1,)
83 | else:
84 | shape = (input_shape[self.axis],)
85 |
86 | if self.scale:
87 | self.gamma = self.add_weight(shape=shape,
88 | name='gamma',
89 | initializer=self.gamma_initializer,
90 | regularizer=self.gamma_regularizer,
91 | constraint=self.gamma_constraint)
92 | else:
93 | self.gamma = None
94 | if self.center:
95 | self.beta = self.add_weight(shape=shape,
96 | name='beta',
97 | initializer=self.beta_initializer,
98 | regularizer=self.beta_regularizer,
99 | constraint=self.beta_constraint)
100 | else:
101 | self.beta = None
102 | self.built = True
103 |
104 | def call(self, inputs, training=None):
105 | input_shape = K.int_shape(inputs)
106 | reduction_axes = list(range(0, len(input_shape)))
107 |
108 | if (self.axis is not None):
109 | del reduction_axes[self.axis]
110 |
111 | del reduction_axes[0]
112 |
113 | mean = K.mean(inputs, reduction_axes, keepdims=True)
114 | stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon
115 | normed = (inputs - mean) / stddev
116 |
117 | broadcast_shape = [1] * len(input_shape)
118 | if self.axis is not None:
119 | broadcast_shape[self.axis] = input_shape[self.axis]
120 |
121 | if self.scale:
122 | broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
123 | normed = normed * broadcast_gamma
124 | if self.center:
125 | broadcast_beta = K.reshape(self.beta, broadcast_shape)
126 | normed = normed + broadcast_beta
127 | return normed
128 |
129 | def get_config(self):
130 | config = {
131 | 'axis': self.axis,
132 | 'epsilon': self.epsilon,
133 | 'center': self.center,
134 | 'scale': self.scale,
135 | 'beta_initializer': initializers.serialize(self.beta_initializer),
136 | 'gamma_initializer': initializers.serialize(self.gamma_initializer),
137 | 'beta_regularizer': regularizers.serialize(self.beta_regularizer),
138 | 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
139 | 'beta_constraint': constraints.serialize(self.beta_constraint),
140 | 'gamma_constraint': constraints.serialize(self.gamma_constraint)
141 | }
142 | base_config = super(InstanceNormalization, self).get_config()
143 | return dict(list(base_config.items()) + list(config.items()))
144 |
145 | get_custom_objects().update({'InstanceNormalization': InstanceNormalization})
146 |
--------------------------------------------------------------------------------
/legacy/FCN8s_keras.py:
--------------------------------------------------------------------------------
1 | from keras.models import Sequential, Model
2 | from keras.layers import *
3 | from keras.layers.advanced_activations import LeakyReLU
4 | from keras.activations import relu
5 | from keras.initializers import RandomNormal
6 | from keras.applications import *
7 | import keras.backend as K
8 |
9 | def FCN(num_output=21, input_shape=(500, 500, 3)):
10 | """Instantiate the FCN8s architecture with keras.
11 | # Arguments
12 | basenet: type of basene {'vgg16'}
13 | trainable_base: Bool whether the basenet weights are trainable
14 | num_output: number of classes
15 | input_shape: input image shape
16 | weights: pre-trained weights to load (None for training from scratch)
17 | # Returns
18 | A Keras model instance
19 | """
20 | ROW_AXIS = 1
21 | COL_AXIS = 2
22 | CHANNEL_AXIS = 3
23 |
24 | def _crop(target_layer, offset=(None, None), name=None):
25 | """Crop the bottom such that it has the same shape as target_layer."""
26 | """ Use _keras_shape to prevent undefined output shape in Conv2DTranspose"""
27 | def f(x):
28 | width = x._keras_shape[ROW_AXIS]
29 | height = x._keras_shape[COL_AXIS]
30 | target_width = target_layer._keras_shape[ROW_AXIS]
31 | target_height = target_layer._keras_shape[COL_AXIS]
32 | cropped = Cropping2D(cropping=((offset[0], width - offset[0] - target_width), (offset[1], height - offset[1] - target_height)), name='{}'.format(name))(x)
33 | return cropped
34 | return f
35 |
36 | input_tensor = Input(shape=input_shape)
37 | pad1 = ZeroPadding2D(padding=(100, 100))(input_tensor)
38 | conv1_1 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu',
39 | padding='valid', name='conv1_1')(pad1)
40 | conv1_2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu',
41 | padding='same', name='conv1_2')(conv1_1)
42 | pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
43 | padding='same', name='pool1')(conv1_2)
44 | # Block 2
45 | conv2_1 = Conv2D(filters=128, kernel_size=(3, 3),
46 | activation='relu',
47 | padding='same', name='conv2_1')(pool1)
48 | conv2_2 = Conv2D(filters=128, kernel_size=(3, 3), activation='relu',
49 | padding='same', name='conv2_2')(conv2_1)
50 | pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
51 | padding='same', name='pool2')(conv2_2)
52 | # Block 3
53 | conv3_1 = Conv2D(filters=256, kernel_size=(3, 3), activation='relu',
54 | padding='same', name='conv3_1')(pool2)
55 | conv3_2 = Conv2D(filters=256, kernel_size=(3, 3), activation='relu',
56 | padding='same', name='conv3_2')(conv3_1)
57 | conv3_3 = Conv2D(filters=256, kernel_size=(3, 3), activation='relu',
58 | padding='same', name='conv3_3')(conv3_2)
59 | pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
60 | padding='same', name='pool3')(conv3_3)
61 | # Block 4
62 | conv4_1 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu',
63 | padding='same', name='conv4_1')(pool3)
64 | conv4_2 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu',
65 | padding='same', name='conv4_2')(conv4_1)
66 | conv4_3 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu',
67 | padding='same', name='conv4_3')(conv4_2)
68 | pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
69 | padding='same', name='pool4')(conv4_3)
70 | # Block 5
71 | conv5_1 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu',
72 | padding='same', name='conv5_1')(pool4)
73 | conv5_2 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu',
74 | padding='same', name='conv5_2')(conv5_1)
75 | conv5_3 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu',
76 | padding='same', name='conv5_3')(conv5_2)
77 | pool5 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
78 | padding='same', name='pool5')(conv5_3)
79 | # fully conv
80 | fc6 = Conv2D(filters=4096, kernel_size=(7, 7),
81 | activation='relu', padding='valid',
82 | name='fc6')(pool5)
83 | drop6 = Dropout(0.5)(fc6)
84 | fc7 = Conv2D(filters=4096, kernel_size=(1, 1),
85 | activation='relu', padding='valid',
86 | name='fc7')(drop6)
87 | drop7 = Dropout(0.5)(fc7)
88 |
89 | #basenet = VGG16_basenet()
90 | # input
91 | #input_tensor = Input(shape=input_shape)
92 | # Get skip_layers=[drop7, pool4, pool3] from the base net: VGG16
93 | #skip_layers = VGG16_basenet(input_tensor)
94 |
95 | #drop7 = skip_layers[0]
96 | score_fr = Conv2D(filters=num_output, kernel_size=(1, 1), padding='valid', name='score_fr')(drop7)
97 | upscore2 = Conv2DTranspose(num_output, kernel_size=4, strides=2, use_bias=False, name='upscore2')(score_fr)
98 |
99 | # scale pool4 skip for compatibility
100 | #pool4 = skip_layers[1]
101 | scale_pool4 = Lambda(lambda x: x * 0.01, name='scale_pool4')(pool4)
102 | score_pool4 = Conv2D(filters=num_output, kernel_size=(1, 1),
103 | padding='valid', name='score_pool4')(scale_pool4)
104 | score_pool4c = _crop(upscore2, offset=(5, 5),
105 | name='score_pool4c')(score_pool4)
106 | fuse_pool4 = add([upscore2, score_pool4c])
107 | upscore_pool4 = Conv2DTranspose(filters=num_output, kernel_size=(4, 4),
108 | strides=(2, 2), padding='valid',
109 | use_bias=False,
110 | data_format=K.image_data_format(),
111 | name='upscore_pool4')(fuse_pool4)
112 | # scale pool3 skip for compatibility
113 | #pool3 = skip_layers[2]
114 | scale_pool3 = Lambda(lambda x: x * 0.0001, name='scale_pool3')(pool3)
115 | score_pool3 = Conv2D(filters=num_output, kernel_size=(1, 1),
116 | padding='valid', name='score_pool3')(scale_pool3)
117 | score_pool3c = _crop(upscore_pool4, offset=(9, 9),
118 | name='score_pool3c')(score_pool3)
119 | fuse_pool3 = add([upscore_pool4, score_pool3c])
120 | # score
121 | upscore8 = Conv2DTranspose(filters=num_output, kernel_size=(16, 16),
122 | strides=(8, 8), padding='valid',
123 | use_bias=False,
124 | data_format=K.image_data_format(),
125 | name='upscore8')(fuse_pool3)
126 | score = _crop(input_tensor, offset=(31, 31), name='score')(upscore8)
127 |
128 | # model
129 | model = Model(input_tensor, score, name='fcn_vgg16')
130 |
131 | return model
--------------------------------------------------------------------------------
/networks/losses.py:
--------------------------------------------------------------------------------
1 | from keras.layers import Lambda, concatenate
2 | from tensorflow.contrib.distributions import Beta
3 | from .instance_normalization import InstanceNormalization
4 | import keras.backend as K
5 | import tensorflow as tf
6 |
7 | def first_order(x, axis=1):
8 | img_nrows = x.shape[1]
9 | img_ncols = x.shape[2]
10 | if axis == 1:
11 | return K.abs(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :])
12 | elif axis == 2:
13 | return K.abs(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :])
14 | else:
15 | return None
16 |
17 | def calc_loss(pred, target, loss='l2'):
18 | if loss.lower() == "l2":
19 | return K.mean(K.square(pred - target))
20 | elif loss.lower() == "l1":
21 | return K.mean(K.abs(pred - target))
22 | elif loss.lower() == "cross_entropy":
23 | return -K.mean(K.log(pred + K.epsilon())*target + K.log(1 - pred + K.epsilon())*(1 - target))
24 | else:
25 | raise ValueError(f'Recieve an unknown loss type: {loss}.')
26 |
27 | def cyclic_loss(netG1, netG2, real1):
28 | fake2 = netG2(real1)[-1] # fake2 ABGR
29 | fake2_alpha = Lambda(lambda x: x[:,:,:, :1])(fake2) # fake2 BGR
30 | fake2 = Lambda(lambda x: x[:,:,:, 1:])(fake2) # fake2 BGR
31 | cyclic1 = netG1(fake2)[-1] # cyclic1 ABGR
32 | cyclic1_alpha = Lambda(lambda x: x[:,:,:, :1])(cyclic1) # cyclic1 BGR
33 | cyclic1 = Lambda(lambda x: x[:,:,:, 1:])(cyclic1) # cyclic1 BGR
34 | loss = calc_loss(cyclic1, real1, loss='l1')
35 | loss += 0.1 * calc_loss(cyclic1_alpha, fake2_alpha, loss='l1')
36 | return loss
37 |
38 | def adversarial_loss(netD, real, fake_abgr, distorted, gan_training="mixup_LSGAN", **weights):
39 | alpha = Lambda(lambda x: x[:,:,:, :1])(fake_abgr)
40 | fake_bgr = Lambda(lambda x: x[:,:,:, 1:])(fake_abgr)
41 | fake = alpha * fake_bgr + (1-alpha) * distorted
42 |
43 | if gan_training == "mixup_LSGAN":
44 | dist = Beta(0.2, 0.2)
45 | lam = dist.sample()
46 | mixup = lam * concatenate([real, distorted]) + (1 - lam) * concatenate([fake, distorted])
47 | pred_fake = netD(concatenate([fake, distorted]))
48 | pred_mixup = netD(mixup)
49 | loss_D = calc_loss(pred_mixup, lam * K.ones_like(pred_mixup), "l2")
50 | loss_G = weights['w_D'] * calc_loss(pred_fake, K.ones_like(pred_fake), "l2")
51 | mixup2 = lam * concatenate([real, distorted]) + (1 - lam) * concatenate([fake_bgr, distorted])
52 | pred_fake_bgr = netD(concatenate([fake_bgr, distorted]))
53 | pred_mixup2 = netD(mixup2)
54 | loss_D += calc_loss(pred_mixup2, lam * K.ones_like(pred_mixup2), "l2")
55 | loss_G += weights['w_D'] * calc_loss(pred_fake_bgr, K.ones_like(pred_fake_bgr), "l2")
56 | elif gan_training == "relativistic_avg_LSGAN":
57 | real_pred = netD(concatenate([real, distorted]))
58 | fake_pred = netD(concatenate([fake, distorted]))
59 | loss_D = K.mean(K.square(real_pred - K.ones_like(fake_pred)))/2
60 | loss_D += K.mean(K.square(fake_pred - K.zeros_like(fake_pred)))/2
61 | loss_G = weights['w_D'] * K.mean(K.square(fake_pred - K.ones_like(fake_pred)))
62 |
63 | fake_pred2 = netD(concatenate([fake_bgr, distorted]))
64 | loss_D += K.mean(K.square(real_pred - K.mean(fake_pred2,axis=0) - K.ones_like(fake_pred2)))/2
65 | loss_D += K.mean(K.square(fake_pred2 - K.mean(real_pred,axis=0) - K.zeros_like(fake_pred2)))/2
66 | loss_G += weights['w_D'] * K.mean(K.square(real_pred - K.mean(fake_pred2,axis=0) - K.zeros_like(fake_pred2)))/2
67 | loss_G += weights['w_D'] * K.mean(K.square(fake_pred2 - K.mean(real_pred,axis=0) - K.ones_like(fake_pred2)))/2
68 | else:
69 | raise ValueError("Receive an unknown GAN training method: {gan_training}")
70 | return loss_D, loss_G
71 |
72 | def reconstruction_loss(real, fake_abgr, mask_eyes, model_outputs, **weights):
73 | alpha = Lambda(lambda x: x[:,:,:, :1])(fake_abgr)
74 | fake_bgr = Lambda(lambda x: x[:,:,:, 1:])(fake_abgr)
75 |
76 | loss_G = 0
77 | loss_G += weights['w_recon'] * calc_loss(fake_bgr, real, "l1")
78 | loss_G += weights['w_eyes'] * K.mean(K.abs(mask_eyes*(fake_bgr - real)))
79 |
80 | for out in model_outputs[:-1]:
81 | out_size = out.get_shape().as_list()
82 | resized_real = tf.image.resize_images(real, out_size[1:3])
83 | loss_G += weights['w_recon'] * calc_loss(out, resized_real, "l1")
84 | return loss_G
85 |
86 | def edge_loss(real, fake_abgr, mask_eyes, **weights):
87 | alpha = Lambda(lambda x: x[:,:,:, :1])(fake_abgr)
88 | fake_bgr = Lambda(lambda x: x[:,:,:, 1:])(fake_abgr)
89 |
90 | loss_G = 0
91 | loss_G += weights['w_edge'] * calc_loss(first_order(fake_bgr, axis=1), first_order(real, axis=1), "l1")
92 | loss_G += weights['w_edge'] * calc_loss(first_order(fake_bgr, axis=2), first_order(real, axis=2), "l1")
93 | shape_mask_eyes = mask_eyes.get_shape().as_list()
94 | resized_mask_eyes = tf.image.resize_images(mask_eyes, [shape_mask_eyes[1]-1, shape_mask_eyes[2]-1])
95 | loss_G += weights['w_eyes'] * K.mean(K.abs(resized_mask_eyes * \
96 | (first_order(fake_bgr, axis=1) - first_order(real, axis=1))))
97 | loss_G += weights['w_eyes'] * K.mean(K.abs(resized_mask_eyes * \
98 | (first_order(fake_bgr, axis=2) - first_order(real, axis=2))))
99 | return loss_G
100 |
101 | def perceptual_loss(real, fake_abgr, distorted, mask_eyes, vggface_feats, **weights):
102 | alpha = Lambda(lambda x: x[:,:,:, :1])(fake_abgr)
103 | fake_bgr = Lambda(lambda x: x[:,:,:, 1:])(fake_abgr)
104 | fake = alpha * fake_bgr + (1-alpha) * distorted
105 |
106 | def preprocess_vggface(x):
107 | x = (x + 1)/2 * 255 # channel order: BGR
108 | x -= [91.4953, 103.8827, 131.0912]
109 | return x
110 |
111 | real_sz224 = tf.image.resize_images(real, [224, 224])
112 | real_sz224 = Lambda(preprocess_vggface)(real_sz224)
113 | dist = Beta(0.2, 0.2)
114 | lam = dist.sample() # use mixup trick here to reduce foward pass from 2 times to 1.
115 | mixup = lam*fake_bgr + (1-lam)*fake
116 | fake_sz224 = tf.image.resize_images(mixup, [224, 224])
117 | fake_sz224 = Lambda(preprocess_vggface)(fake_sz224)
118 | real_feat112, real_feat55, real_feat28, real_feat7 = vggface_feats(real_sz224)
119 | fake_feat112, fake_feat55, fake_feat28, fake_feat7 = vggface_feats(fake_sz224)
120 |
121 | # Apply instance norm on VGG(ResNet) features
122 | # From MUNIT https://github.com/NVlabs/MUNIT
123 | loss_G = 0
124 | def instnorm(): return InstanceNormalization()
125 | loss_G += weights['w_pl'][0] * calc_loss(instnorm()(fake_feat7), instnorm()(real_feat7), "l2")
126 | loss_G += weights['w_pl'][1] * calc_loss(instnorm()(fake_feat28), instnorm()(real_feat28), "l2")
127 | loss_G += weights['w_pl'][2] * calc_loss(instnorm()(fake_feat55), instnorm()(real_feat55), "l2")
128 | loss_G += weights['w_pl'][3] * calc_loss(instnorm()(fake_feat112), instnorm()(real_feat112), "l2")
129 | return loss_G
130 |
--------------------------------------------------------------------------------
/data_loader/data_augmentation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | from umeyama import umeyama
4 | from scipy import ndimage
5 | from pathlib import PurePath, Path
6 |
7 | random_transform_args = {
8 | 'rotation_range': 10,
9 | 'zoom_range': 0.1,
10 | 'shift_range': 0.05,
11 | 'random_flip': 0.5,
12 | }
13 |
14 | # Motion blurs as data augmentation
15 | def get_motion_blur_kernel(sz=7):
16 | rot_angle = np.random.uniform(-180,180)
17 | kernel = np.zeros((sz,sz))
18 | kernel[int((sz-1)//2), :] = np.ones(sz)
19 | kernel = ndimage.interpolation.rotate(kernel, rot_angle, reshape=False)
20 | kernel = np.clip(kernel, 0, 1)
21 | normalize_factor = 1 / np.sum(kernel)
22 | kernel = kernel * normalize_factor
23 | return kernel
24 |
25 | def motion_blur(images, sz=7):
26 | # images is a list [image2, image2, ...]
27 | blur_sz = np.random.choice([5, 7, 9, 11])
28 | kernel_motion_blur = get_motion_blur_kernel(blur_sz)
29 | for i, image in enumerate(images):
30 | images[i] = cv2.filter2D(image, -1, kernel_motion_blur).astype(np.float64)
31 | return images
32 |
33 | def random_transform(image, rotation_range, zoom_range, shift_range, random_flip):
34 | h,w = image.shape[0:2]
35 | rotation = np.random.uniform(-rotation_range, rotation_range)
36 | scale = np.random.uniform(1 - zoom_range, 1 + zoom_range)
37 | tx = np.random.uniform(-shift_range, shift_range) * w
38 | ty = np.random.uniform(-shift_range, shift_range) * h
39 | mat = cv2.getRotationMatrix2D((w//2,h//2), rotation, scale)
40 | mat[:,2] += (tx,ty)
41 | result = cv2.warpAffine(image, mat, (w,h), borderMode=cv2.BORDER_REPLICATE)
42 | if np.random.random() < random_flip:
43 | result = result[:,::-1]
44 | return result
45 |
46 | def random_warp_rev(image, res=64):
47 | assert image.shape == (256,256,6)
48 | res_scale = res//64
49 | assert res_scale >= 1, f"Resolution should be >= 64. Recieved {res}."
50 | interp_param = 80 * res_scale
51 | interp_slice = slice(interp_param//10,9*interp_param//10)
52 | dst_pnts_slice = slice(0,65*res_scale,16*res_scale)
53 |
54 | rand_coverage = np.random.randint(20) + 78 # random warping coverage
55 | rand_scale = np.random.uniform(5., 6.2) # random warping scale
56 |
57 | range_ = np.linspace(128-rand_coverage, 128+rand_coverage, 5)
58 | mapx = np.broadcast_to(range_, (5,5))
59 | mapy = mapx.T
60 | mapx = mapx + np.random.normal(size=(5,5), scale=rand_scale)
61 | mapy = mapy + np.random.normal(size=(5,5), scale=rand_scale)
62 | interp_mapx = cv2.resize(mapx, (interp_param,interp_param))[interp_slice,interp_slice].astype('float32')
63 | interp_mapy = cv2.resize(mapy, (interp_param,interp_param))[interp_slice,interp_slice].astype('float32')
64 | warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR)
65 | src_points = np.stack([mapx.ravel(), mapy.ravel()], axis=-1)
66 | dst_points = np.mgrid[dst_pnts_slice,dst_pnts_slice].T.reshape(-1,2)
67 | mat = umeyama(src_points, dst_points, True)[0:2]
68 | target_image = cv2.warpAffine(image, mat, (res,res))
69 | return warped_image, target_image
70 |
71 | def random_color_match(image, fns_all_trn_data):
72 | rand_idx = np.random.randint(len(fns_all_trn_data))
73 | fn_match = fns_all_trn_data[rand_idx]
74 | tar_img = cv2.imread(fn_match)
75 | if tar_img is None:
76 | print(f"Failed reading image {fn_match} in random_color_match().")
77 | return image
78 | r = 60 # only take color information of the center area
79 | src_img = cv2.resize(image, (256,256))
80 | tar_img = cv2.resize(tar_img, (256,256))
81 |
82 | # randomly transform to XYZ color space
83 | rand_color_space_to_XYZ = np.random.choice([True, False])
84 | if rand_color_space_to_XYZ:
85 | src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2XYZ)
86 | tar_img = cv2.cvtColor(tar_img, cv2.COLOR_BGR2XYZ)
87 |
88 | # compute statistics
89 | mt = np.mean(tar_img[r:-r,r:-r,:], axis=(0,1))
90 | st = np.std(tar_img[r:-r,r:-r,:], axis=(0,1))
91 | ms = np.mean(src_img[r:-r,r:-r,:], axis=(0,1))
92 | ss = np.std(src_img[r:-r,r:-r,:], axis=(0,1))
93 |
94 | # randomly interpolate the statistics
95 | rand_ratio = np.random.uniform()
96 | mt = rand_ratio * mt + (1 - rand_ratio) * ms
97 | st = rand_ratio * st + (1 - rand_ratio) * ss
98 |
99 | # Apply color transfer from src to tar domain
100 | if ss.any() <= 1e-7: return src_img
101 | result = st * (src_img.astype(np.float32) - ms) / (ss+1e-7) + mt
102 | if result.min() < 0:
103 | result = result - result.min()
104 | if result.max() > 255:
105 | result = (255.0/result.max()*result).astype(np.float32)
106 |
107 | # transform back from XYZ to BGR color space if necessary
108 | if rand_color_space_to_XYZ:
109 | result = cv2.cvtColor(result.astype(np.uint8), cv2.COLOR_XYZ2BGR)
110 | return result
111 |
112 | def read_image(fn, fns_all_trn_data, dir_bm_eyes=None, res=64, prob_random_color_match=0.5,
113 | use_da_motion_blur=True, use_bm_eyes=True,
114 | random_transform_args=random_transform_args):
115 | if dir_bm_eyes is None:
116 | raise ValueError(f"dir_bm_eyes is not set.")
117 |
118 | # https://github.com/tensorflow/tensorflow/issues/5552
119 | # TensorFlow converts str to bytes in most places, including sess.run().
120 | if type(fn) == type(b"bytes"):
121 | fn = fn.decode("utf-8")
122 | dir_bm_eyes = dir_bm_eyes.decode("utf-8")
123 | fns_all_trn_data = [fn_all.decode("utf-8") for fn_all in fns_all_trn_data]
124 |
125 | raw_fn = PurePath(fn).parts[-1]
126 | image = cv2.imread(fn)
127 | if image is None:
128 | print(f"Failed reading image {fn}.")
129 | raise IOError(f"Failed reading image {fn}.")
130 | if np.random.uniform() <= prob_random_color_match:
131 | image = random_color_match(image, fns_all_trn_data)
132 | image = cv2.resize(image, (256,256)) / 255 * 2 - 1
133 |
134 | if use_bm_eyes:
135 | bm_eyes = cv2.imread(f"{dir_bm_eyes}/{raw_fn}")
136 | if bm_eyes is None:
137 | print(f"Failed reading binary mask {dir_bm_eyes}/{raw_fn}. \
138 | If this message keeps showing, please check for existence of binary masks folder \
139 | or disable eye-aware training in the configuration.")
140 | bm_eyes = np.zeros_like(image)
141 | #raise IOError(f"Failed reading binary mask {dir_bm_eyes}/{raw_fn}.")
142 | bm_eyes = cv2.resize(bm_eyes, (256,256)) / 255.
143 | else:
144 | bm_eyes = np.zeros_like(image)
145 |
146 | image = np.concatenate([image, bm_eyes], axis=-1)
147 | image = random_transform(image, **random_transform_args)
148 | warped_img, target_img = random_warp_rev(image, res=res)
149 |
150 | bm_eyes = target_img[...,3:]
151 | warped_img = warped_img[...,:3]
152 | target_img = target_img[...,:3]
153 |
154 | # Motion blur data augmentation:
155 | # we want the model to learn to preserve motion blurs of input images
156 | if np.random.uniform() < 0.25 and use_da_motion_blur:
157 | warped_img, target_img = motion_blur([warped_img, target_img])
158 |
159 | warped_img, target_img, bm_eyes = \
160 | warped_img.astype(np.float32), target_img.astype(np.float32), bm_eyes.astype(np.float32)
161 |
162 | return warped_img, target_img, bm_eyes
--------------------------------------------------------------------------------
/FaceSwap_GAN_v2.2_video_conversion.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "# Import modules"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import keras.backend as K"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "\n",
25 | "# Model Configuration"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 2,
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "K.set_learning_phase(0)"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 3,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "# Input/Output resolution\n",
44 | "RESOLUTION = 64 # 64x64, 128x128, 256x256\n",
45 | "assert (RESOLUTION % 64) == 0, \"RESOLUTION should be 64, 128, 256\""
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 4,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "# Architecture configuration\n",
55 | "arch_config = {}\n",
56 | "arch_config['IMAGE_SHAPE'] = (RESOLUTION, RESOLUTION, 3)\n",
57 | "arch_config['use_self_attn'] = True\n",
58 | "arch_config['norm'] = \"instancenorm\" # instancenorm, batchnorm, layernorm, groupnorm, none\n",
59 | "arch_config['model_capacity'] = \"standard\" # standard, lite"
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "metadata": {},
65 | "source": [
66 | "\n",
67 | "# Define models"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": 5,
73 | "metadata": {},
74 | "outputs": [],
75 | "source": [
76 | "from networks.faceswap_gan_model import FaceswapGANModel"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 6,
82 | "metadata": {
83 | "scrolled": true
84 | },
85 | "outputs": [],
86 | "source": [
87 | "model = FaceswapGANModel(**arch_config)"
88 | ]
89 | },
90 | {
91 | "cell_type": "markdown",
92 | "metadata": {},
93 | "source": [
94 | "\n",
95 | "# Load Model Weights"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 10,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "name": "stdout",
105 | "output_type": "stream",
106 | "text": [
107 | "Model weights files are successfully loaded\n"
108 | ]
109 | }
110 | ],
111 | "source": [
112 | "model.load_weights(path=\"./models\")"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "\n",
120 | "# Video Conversion"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 10,
126 | "metadata": {},
127 | "outputs": [],
128 | "source": [
129 | "from converter.video_converter import VideoConverter\n",
130 | "from detector.face_detector import MTCNNFaceDetector"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {},
137 | "outputs": [],
138 | "source": [
139 | "mtcnn_weights_dir = \"./mtcnn_weights/\"\n",
140 | "\n",
141 | "fd = MTCNNFaceDetector(sess=K.get_session(), model_path=mtcnn_weights_dir)\n",
142 | "vc = VideoConverter()"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 12,
148 | "metadata": {},
149 | "outputs": [],
150 | "source": [
151 | "vc.set_face_detector(fd)\n",
152 | "vc.set_gan_model(model)"
153 | ]
154 | },
155 | {
156 | "cell_type": "markdown",
157 | "metadata": {},
158 | "source": [
159 | "### Video conversion configuration\n",
160 | "\n",
161 | "\n",
162 | "- `use_smoothed_bbox`: \n",
163 | " - Boolean. Whether to enable smoothed bbox.\n",
164 | "- `use_kalman_filter`: \n",
165 | " - Boolean. Whether to enable Kalman filter.\n",
166 | "- `use_auto_downscaling`:\n",
167 | " - Boolean. Whether to enable auto-downscaling in face detection (to prevent OOM error).\n",
168 | "- `bbox_moving_avg_coef`: \n",
169 | " - Float point between 0 and 1. Smoothing coef. used when use_kalman_filter is set False.\n",
170 | "- `min_face_area`:\n",
171 | " - int x int. Minimum size of face. Detected faces smaller than min_face_area will not be transformed.\n",
172 | "- `IMAGE_SHAPE`:\n",
173 | " - Input/Output resolution of the GAN model\n",
174 | "- `kf_noise_coef`:\n",
175 | " - Float point. Increase by 10x if tracking is slow. Decrease by 1/10x if trakcing works fine but jitter occurs.\n",
176 | "- `use_color_correction`: \n",
177 | " - String of \"adain\", \"adain_xyz\", \"hist_match\", or \"none\". The color correction method to be applied.\n",
178 | "- `detec_threshold`: \n",
179 | " - Float point between 0 and 1. Decrease its value if faces are missed. Increase its value to reduce false positives.\n",
180 | "- `roi_coverage`: \n",
181 | " - Float point between 0 and 1 (exclusive). Center area of input images to be cropped (Suggested range: 0.85 ~ 0.95)\n",
182 | "- `enhance`: \n",
183 | " - Float point. A coef. for contrast enhancement in the region of alpha mask (Suggested range: 0. ~ 0.4)\n",
184 | "- `output_type`: \n",
185 | " - Layout format of output video: 1. [ result ], 2. [ source | result ], 3. [ source | result | mask ]\n",
186 | "- `direction`: \n",
187 | " - String of \"AtoB\" or \"BtoA\". Direction of face transformation."
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 13,
193 | "metadata": {},
194 | "outputs": [],
195 | "source": [
196 | "options = {\n",
197 | " # ===== Fixed =====\n",
198 | " \"use_smoothed_bbox\": True,\n",
199 | " \"use_kalman_filter\": True,\n",
200 | " \"use_auto_downscaling\": False,\n",
201 | " \"bbox_moving_avg_coef\": 0.65,\n",
202 | " \"min_face_area\": 35 * 35,\n",
203 | " \"IMAGE_SHAPE\": model.IMAGE_SHAPE,\n",
204 | " # ===== Tunable =====\n",
205 | " \"kf_noise_coef\": 3e-3,\n",
206 | " \"use_color_correction\": \"hist_match\",\n",
207 | " \"detec_threshold\": 0.7,\n",
208 | " \"roi_coverage\": 0.9,\n",
209 | " \"enhance\": 0.,\n",
210 | " \"output_type\": 3,\n",
211 | " \"direction\": \"AtoB\",\n",
212 | "}"
213 | ]
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "metadata": {},
218 | "source": [
219 | "# Start video conversion\n",
220 | "\n",
221 | "\n",
222 | "- `input_fn`: \n",
223 | " - String. Input video path.\n",
224 | "- `output_fn`: \n",
225 | " - String. Output video path.\n",
226 | "- `duration`: \n",
227 | " - None or a non-negative float tuple: (start_sec, end_sec). Duration of input video to be converted\n",
228 | " - e.g., setting `duration = (5, 7.5)` outputs a 2.5-sec-long video clip corresponding to 5s ~ 7.5s of the input video."
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 14,
234 | "metadata": {},
235 | "outputs": [],
236 | "source": [
237 | "input_fn = \"INPUT_VIDEO.mp4\"\n",
238 | "output_fn = \"OUTPUT_VIDEO.mp4\"\n",
239 | "duration = None "
240 | ]
241 | },
242 | {
243 | "cell_type": "code",
244 | "execution_count": 15,
245 | "metadata": {},
246 | "outputs": [
247 | {
248 | "name": "stdout",
249 | "output_type": "stream",
250 | "text": [
251 | "[MoviePy] >>>> Building video OUTPUT_VIDEO.mp4\n",
252 | "[MoviePy] Writing video OUTPUT_VIDEO.mp4\n"
253 | ]
254 | },
255 | {
256 | "name": "stderr",
257 | "output_type": "stream",
258 | "text": [
259 | "100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:12<00:00, 1.48it/s]\n"
260 | ]
261 | },
262 | {
263 | "name": "stdout",
264 | "output_type": "stream",
265 | "text": [
266 | "[MoviePy] Done.\n",
267 | "[MoviePy] >>>> Video ready: OUTPUT_VIDEO.mp4 \n",
268 | "\n"
269 | ]
270 | }
271 | ],
272 | "source": [
273 | "vc.convert(input_fn=input_fn, output_fn=output_fn, options=options, duration=duration)"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": null,
279 | "metadata": {},
280 | "outputs": [],
281 | "source": []
282 | },
283 | {
284 | "cell_type": "code",
285 | "execution_count": null,
286 | "metadata": {},
287 | "outputs": [],
288 | "source": []
289 | }
290 | ],
291 | "metadata": {
292 | "kernelspec": {
293 | "display_name": "Python 3",
294 | "language": "python",
295 | "name": "python3"
296 | },
297 | "language_info": {
298 | "codemirror_mode": {
299 | "name": "ipython",
300 | "version": 3
301 | },
302 | "file_extension": ".py",
303 | "mimetype": "text/x-python",
304 | "name": "python",
305 | "nbconvert_exporter": "python",
306 | "pygments_lexer": "ipython3",
307 | "version": "3.6.4"
308 | }
309 | },
310 | "nbformat": 4,
311 | "nbformat_minor": 2
312 | }
313 |
--------------------------------------------------------------------------------
/networks/nn_blocks.py:
--------------------------------------------------------------------------------
1 | from keras.layers import *
2 | from keras.layers.advanced_activations import LeakyReLU
3 | from .instance_normalization import InstanceNormalization
4 | from .GroupNormalization import GroupNormalization
5 | from .pixel_shuffler import PixelShuffler
6 | from .custom_layers.scale_layer import Scale
7 | from .custom_inits.icnr_initializer import icnr_keras
8 | import tensorflow as tf
9 | import keras.backend as K
10 |
11 | # initializers and weight decay regularization are fixed
12 | conv_init = 'he_normal'
13 | w_l2 = 1e-4
14 |
15 | def self_attn_block(inp, nc, squeeze_factor=8):
16 | '''
17 | Code borrows from https://github.com/taki0112/Self-Attention-GAN-Tensorflow
18 | '''
19 | assert nc//squeeze_factor > 0, f"Input channels must be >= {squeeze_factor}, recieved nc={nc}"
20 | x = inp
21 | shape_x = x.get_shape().as_list()
22 |
23 | f = Conv2D(nc//squeeze_factor, 1, kernel_regularizer=regularizers.l2(w_l2))(x)
24 | g = Conv2D(nc//squeeze_factor, 1, kernel_regularizer=regularizers.l2(w_l2))(x)
25 | h = Conv2D(nc, 1, kernel_regularizer=regularizers.l2(w_l2))(x)
26 |
27 | shape_f = f.get_shape().as_list()
28 | shape_g = g.get_shape().as_list()
29 | shape_h = h.get_shape().as_list()
30 | flat_f = Reshape((-1, shape_f[-1]))(f)
31 | flat_g = Reshape((-1, shape_g[-1]))(g)
32 | flat_h = Reshape((-1, shape_h[-1]))(h)
33 |
34 | s = Lambda(lambda x: K.batch_dot(x[0], Permute((2,1))(x[1])))([flat_g, flat_f])
35 |
36 | beta = Softmax(axis=-1)(s)
37 | o = Lambda(lambda x: K.batch_dot(x[0], x[1]))([beta, flat_h])
38 | o = Reshape(shape_x[1:])(o)
39 | o = Scale()(o)
40 |
41 | out = add([o, inp])
42 | return out
43 |
44 | def dual_attn_block(inp, nc, squeeze_factor=8):
45 | '''
46 | https://github.com/junfu1115/DANet
47 | '''
48 | assert nc//squeeze_factor > 0, f"Input channels must be >= {squeeze_factor}, recieved nc={nc}"
49 | x = inp
50 | shape_x = x.get_shape().as_list()
51 |
52 | # position attention module
53 | x_pam = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
54 | kernel_initializer=conv_init, use_bias=False, padding="same")(x)
55 | x_pam = Activation("relu")(x_pam)
56 | x_pam = normalization(x_pam, norm, nc)
57 | f_pam = Conv2D(nc//squeeze_factor, 1, kernel_regularizer=regularizers.l2(w_l2))(x_pam)
58 | g_pam = Conv2D(nc//squeeze_factor, 1, kernel_regularizer=regularizers.l2(w_l2))(x_pam)
59 | h_pam = Conv2D(nc, 1, kernel_regularizer=regularizers.l2(w_l2))(x_pam)
60 | shape_f_pam = f_pam.get_shape().as_list()
61 | shape_g_pam = g_pam.get_shape().as_list()
62 | shape_h_pam = h_pam.get_shape().as_list()
63 | flat_f_pam = Reshape((-1, shape_f_pam[-1]))(f_pam)
64 | flat_g_pam = Reshape((-1, shape_g_pam[-1]))(g_pam)
65 | flat_h_pam = Reshape((-1, shape_h_pam[-1]))(h_pam)
66 | s_pam = Lambda(lambda x: K.batch_dot(x[0], Permute((2,1))(x[1])))([flat_g_pam, flat_f_pam])
67 | beta_pam = Softmax(axis=-1)(s_pam)
68 | o_pam = Lambda(lambda x: K.batch_dot(x[0], x[1]))([beta_pam, flat_h_pam])
69 | o_pam = Reshape(shape_x[1:])(o_pam)
70 | o_pam = Scale()(o_pam)
71 | out_pam = add([o_pam, x_pam])
72 | out_pam = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
73 | kernel_initializer=conv_init, use_bias=False, padding="same")(out_pam)
74 | out_pam = Activation("relu")(out_pam)
75 | out_pam = normalization(out_pam, norm, nc)
76 |
77 | # channel attention module
78 | x_chn = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
79 | kernel_initializer=conv_init, use_bias=False, padding="same")(x)
80 | x_chn = Activation("relu")(x_chn)
81 | x_chn = normalization(x_chn, norm, nc)
82 | shape_x_chn = x_chn.get_shape().as_list()
83 | flat_f_chn = Reshape((-1, shape_x_chn[-1]))(x_chn)
84 | flat_g_chn = Reshape((-1, shape_x_chn[-1]))(x_chn)
85 | flat_h_chn = Reshape((-1, shape_x_chn[-1]))(x_chn)
86 | s_chn = Lambda(lambda x: K.batch_dot(Permute((2,1))(x[0]), x[1]))([flat_g_chn, flat_f_chn])
87 | s_new_chn = Lambda(lambda x: K.repeat_elements(K.max(x, -1, keepdims=True), nc, -1))(s_chn)
88 | s_new_chn = Lambda(lambda x: x[0] - x[1])([s_new_chn, s_chn])
89 | beta_chn = Softmax(axis=-1)(s_new_chn)
90 | o_chn = Lambda(lambda x: K.batch_dot(x[0], Permute((2,1))(x[1])))([flat_h_chn, beta_chn])
91 | o_chn = Reshape(shape_x[1:])(o_chn)
92 | o_chn = Scale()(o_chn)
93 | out_chn = add([o_chn, x_chn])
94 | out_chn = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
95 | kernel_initializer=conv_init, use_bias=False, padding="same")(out_chn)
96 | out_chn = Activation("relu")(out_chn)
97 | out_chn = normalization(out_chn, norm, nc)
98 |
99 | out = add([out_pam, out_chn])
100 | return out
101 |
102 | def normalization(inp, norm='none', group='16'):
103 | x = inp
104 | if norm == 'layernorm':
105 | x = GroupNormalization(group=group)(x)
106 | elif norm == 'batchnorm':
107 | x = BatchNormalization()(x)
108 | elif norm == 'groupnorm':
109 | x = GroupNormalization(group=16)(x)
110 | elif norm == 'instancenorm':
111 | x = InstanceNormalization()(x)
112 | elif norm == 'hybrid':
113 | if group % 2 == 1:
114 | raise ValueError(f"Output channels must be an even number for hybrid norm, received {group}.")
115 | f = group
116 | x0 = Lambda(lambda x: x[...,:f//2])(x)
117 | x1 = Lambda(lambda x: x[...,f//2:])(x)
118 | x0 = Conv2D(f//2, kernel_size=1, kernel_regularizer=regularizers.l2(w_l2),
119 | kernel_initializer=conv_init)(x0)
120 | x1 = InstanceNormalization()(x1)
121 | x = concatenate([x0, x1], axis=-1)
122 | else:
123 | x = x
124 | return x
125 |
126 | def conv_block(input_tensor, f, use_norm=False, strides=2, w_l2=w_l2, norm='none'):
127 | x = input_tensor
128 | x = Conv2D(f, kernel_size=3, strides=strides, kernel_regularizer=regularizers.l2(w_l2),
129 | kernel_initializer=conv_init, use_bias=False, padding="same")(x)
130 | x = Activation("relu")(x)
131 | x = normalization(x, norm, f) if use_norm else x
132 | return x
133 |
134 | def conv_block_d(input_tensor, f, use_norm=False, w_l2=w_l2, norm='none'):
135 | x = input_tensor
136 | x = Conv2D(f, kernel_size=4, strides=2, kernel_regularizer=regularizers.l2(w_l2),
137 | kernel_initializer=conv_init, use_bias=False, padding="same")(x)
138 | x = LeakyReLU(alpha=0.2)(x)
139 | x = normalization(x, norm, f) if use_norm else x
140 | return x
141 |
142 | def res_block(input_tensor, f, use_norm=False, w_l2=w_l2, norm='none'):
143 | x = input_tensor
144 | x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
145 | kernel_initializer=conv_init, use_bias=False, padding="same")(x)
146 | x = LeakyReLU(alpha=0.2)(x)
147 | x = normalization(x, norm, f) if use_norm else x
148 | x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
149 | kernel_initializer=conv_init, use_bias=False, padding="same")(x)
150 | x = add([x, input_tensor])
151 | x = LeakyReLU(alpha=0.2)(x)
152 | x = normalization(x, norm, f) if use_norm else x
153 | return x
154 |
155 | def SPADE_res_block(input_tensor, cond_input_tensor, f, use_norm=True, norm='none'):
156 | """
157 | Semantic Image Synthesis with Spatially-Adaptive Normalization
158 | Taesung Park, Ming-Yu Liu, Ting-Chun Wang, Jun-Yan Zhu
159 | https://arxiv.org/abs/1903.07291
160 |
161 | Note:
162 | SPADE just works like a charm.
163 | It speeds up training alot and is also a very promosing approach for solving profile face generation issue.
164 | *(This implementation can be wrong since I haven't finished reading the paper.
165 | The author hasn't release their code either (https://github.com/NVlabs/SPADE).)
166 | """
167 | def SPADE(input_tensor, cond_input_tensor, f, use_norm=True, norm='none'):
168 | x = input_tensor
169 | x = normalization(x, norm, f) if use_norm else x
170 | y = cond_input_tensor
171 | y = Conv2D(128, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
172 | kernel_initializer=conv_init, padding='same')(y)
173 | y = Activation('relu')(y)
174 | gamma = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
175 | kernel_initializer=conv_init, padding='same')(y)
176 | beta = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
177 | kernel_initializer=conv_init, padding='same')(y)
178 | x = multiply([x, gamma])
179 | x = add([x, beta])
180 | return x
181 |
182 | x = input_tensor
183 | x = SPADE(x, cond_input_tensor, f, use_norm, norm)
184 | x = Activation('relu')(x)
185 | x = ReflectPadding2D(x)
186 | x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
187 | kernel_initializer=conv_init, use_bias=not use_norm)(x)
188 | x = SPADE(x, cond_input_tensor, f, use_norm, norm)
189 | x = Activation('relu')(x)
190 | x = ReflectPadding2D(x)
191 | x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
192 | kernel_initializer=conv_init)(x)
193 | x = add([x, input_tensor])
194 | x = Activation('relu')(x)
195 | return x
196 |
197 | def upscale_ps(input_tensor, f, use_norm=False, w_l2=w_l2, norm='none'):
198 | x = input_tensor
199 | x = Conv2D(f*4, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
200 | kernel_initializer=icnr_keras, padding='same')(x)
201 | x = LeakyReLU(0.2)(x)
202 | x = normalization(x, norm, f) if use_norm else x
203 | x = PixelShuffler()(x)
204 | return x
205 |
206 | def ReflectPadding2D(x, pad=1):
207 | x = Lambda(lambda x: tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]], mode='REFLECT'))(x)
208 | return x
209 |
210 | def upscale_nn(input_tensor, f, use_norm=False, w_l2=w_l2, norm='none'):
211 | x = input_tensor
212 | x = UpSampling2D()(x)
213 | x = ReflectPadding2D(x, 1)
214 | x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2),
215 | kernel_initializer=conv_init)(x)
216 | x = normalization(x, norm, f) if use_norm else x
217 | return x
218 |
--------------------------------------------------------------------------------
/MTCNN_video_face_detection_alignment.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Face detection for video\n",
8 | "Images of detected faces have format `frameXfaceY.jpg`, where `X` represents the Xth frame and `Y` the Yth face in Xth frame. "
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import os\n",
18 | "import cv2\n",
19 | "import numpy as np\n",
20 | "from matplotlib import pyplot as plt"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": null,
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "import tensorflow as tf\n",
30 | "from keras import backend as K\n",
31 | "from pathlib import PurePath, Path\n",
32 | "from moviepy.editor import VideoFileClip"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 2,
38 | "metadata": {},
39 | "outputs": [],
40 | "source": [
41 | "from umeyama import umeyama\n",
42 | "import mtcnn_detect_face"
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {},
48 | "source": [
49 | "Create MTCNN and its forward pass functions"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 3,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "def create_mtcnn(sess, model_path):\n",
59 | " if not model_path:\n",
60 | " model_path,_ = os.path.split(os.path.realpath(__file__))\n",
61 | "\n",
62 | " with tf.variable_scope('pnet2'):\n",
63 | " data = tf.placeholder(tf.float32, (None,None,None,3), 'input')\n",
64 | " pnet = mtcnn_detect_face.PNet({'data':data})\n",
65 | " pnet.load(os.path.join(model_path, 'det1.npy'), sess)\n",
66 | " with tf.variable_scope('rnet2'):\n",
67 | " data = tf.placeholder(tf.float32, (None,24,24,3), 'input')\n",
68 | " rnet = mtcnn_detect_face.RNet({'data':data})\n",
69 | " rnet.load(os.path.join(model_path, 'det2.npy'), sess)\n",
70 | " with tf.variable_scope('onet2'):\n",
71 | " data = tf.placeholder(tf.float32, (None,48,48,3), 'input')\n",
72 | " onet = mtcnn_detect_face.ONet({'data':data})\n",
73 | " onet.load(os.path.join(model_path, 'det3.npy'), sess)\n",
74 | " return pnet, rnet, onet"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": null,
80 | "metadata": {},
81 | "outputs": [],
82 | "source": [
83 | "WEIGHTS_PATH = \"./mtcnn_weights/\"\n",
84 | "\n",
85 | "sess = K.get_session()\n",
86 | "with sess.as_default():\n",
87 | " global pnet, rnet, onet \n",
88 | " pnet, rnet, onet = create_mtcnn(sess, WEIGHTS_PATH)\n",
89 | "\n",
90 | "global pnet, rnet, onet\n",
91 | " \n",
92 | "pnet = K.function([pnet.layers['data']],[pnet.layers['conv4-2'], pnet.layers['prob1']])\n",
93 | "rnet = K.function([rnet.layers['data']],[rnet.layers['conv5-2'], rnet.layers['prob1']])\n",
94 | "onet = K.function([onet.layers['data']],[onet.layers['conv6-2'], onet.layers['conv6-3'], onet.layers['prob1']])"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "metadata": {},
100 | "source": [
101 | "Create folder where images will be saved to"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 10,
107 | "metadata": {},
108 | "outputs": [],
109 | "source": [
110 | "Path(f\"faces/aligned_faces\").mkdir(parents=True, exist_ok=True)\n",
111 | "Path(f\"faces/raw_faces\").mkdir(parents=True, exist_ok=True)\n",
112 | "Path(f\"faces/binary_masks_eyes\").mkdir(parents=True, exist_ok=True)"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "Functions for video processing and face alignment"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 8,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "def get_src_landmarks(x0, x1, y0, y1, pnts):\n",
129 | " \"\"\"\n",
130 | " x0, x1, y0, y1: (smoothed) bbox coord.\n",
131 | " pnts: landmarks predicted by MTCNN\n",
132 | " \"\"\" \n",
133 | " src_landmarks = [(int(pnts[i+5][0]-x0), \n",
134 | " int(pnts[i][0]-y0)) for i in range(5)]\n",
135 | " return src_landmarks\n",
136 | "\n",
137 | "def get_tar_landmarks(img):\n",
138 | " \"\"\" \n",
139 | " img: detected face image\n",
140 | " \"\"\" \n",
141 | " ratio_landmarks = [\n",
142 | " (0.31339227236234224, 0.3259269274198092),\n",
143 | " (0.31075140146108776, 0.7228453709528997),\n",
144 | " (0.5523683107816256, 0.5187296867370605),\n",
145 | " (0.7752419985257663, 0.37262483743520886),\n",
146 | " (0.7759613623985877, 0.6772957581740159)\n",
147 | " ] \n",
148 | " \n",
149 | " img_size = img.shape\n",
150 | " tar_landmarks = [(int(xy[0]*img_size[0]), \n",
151 | " int(xy[1]*img_size[1])) for xy in ratio_landmarks]\n",
152 | " return tar_landmarks\n",
153 | "\n",
154 | "def landmarks_match_mtcnn(src_im, src_landmarks, tar_landmarks): \n",
155 | " \"\"\"\n",
156 | " umeyama(src, dst, estimate_scale)\n",
157 | " landmarks coord. for umeyama should be (width, height) or (y, x)\n",
158 | " \"\"\"\n",
159 | " src_size = src_im.shape\n",
160 | " src_tmp = [(int(xy[1]), int(xy[0])) for xy in src_landmarks]\n",
161 | " tar_tmp = [(int(xy[1]), int(xy[0])) for xy in tar_landmarks]\n",
162 | " M = umeyama(np.array(src_tmp), np.array(tar_tmp), True)[0:2]\n",
163 | " result = cv2.warpAffine(src_im, M, (src_size[1], src_size[0]), borderMode=cv2.BORDER_REPLICATE) \n",
164 | " return result\n",
165 | "\n",
166 | "def process_mtcnn_bbox(bboxes, im_shape):\n",
167 | " \"\"\"\n",
168 | " output bbox coordinate of MTCNN is (y0, x0, y1, x1)\n",
169 | " Here we process the bbox coord. to a square bbox with ordering (x0, y1, x1, y0)\n",
170 | " \"\"\"\n",
171 | " for i, bbox in enumerate(bboxes):\n",
172 | " y0, x0, y1, x1 = bboxes[i,0:4]\n",
173 | " w, h = int(y1 - y0), int(x1 - x0)\n",
174 | " length = (w + h)/2\n",
175 | " center = (int((x1+x0)/2),int((y1+y0)/2))\n",
176 | " new_x0 = np.max([0, (center[0]-length//2)])#.astype(np.int32)\n",
177 | " new_x1 = np.min([im_shape[0], (center[0]+length//2)])#.astype(np.int32)\n",
178 | " new_y0 = np.max([0, (center[1]-length//2)])#.astype(np.int32)\n",
179 | " new_y1 = np.min([im_shape[1], (center[1]+length//2)])#.astype(np.int32)\n",
180 | " bboxes[i,0:4] = new_x0, new_y1, new_x1, new_y0\n",
181 | " return bboxes\n",
182 | "\n",
183 | "def process_video(input_img): \n",
184 | " global frames, save_interval\n",
185 | " global pnet, rnet, onet\n",
186 | " minsize = 30 # minimum size of face\n",
187 | " detec_threshold = 0.7\n",
188 | " threshold = [0.6, 0.7, detec_threshold] # three steps's threshold\n",
189 | " factor = 0.709 # scale factor \n",
190 | " \n",
191 | " frames += 1 \n",
192 | " if frames % save_interval == 0:\n",
193 | " faces, pnts = mtcnn_detect_face.detect_face(\n",
194 | " input_img, minsize, pnet, rnet, onet, threshold, factor)\n",
195 | " faces = process_mtcnn_bbox(faces, input_img.shape)\n",
196 | " \n",
197 | " for idx, (x0, y1, x1, y0, conf_score) in enumerate(faces):\n",
198 | " det_face_im = input_img[int(x0):int(x1),int(y0):int(y1),:]\n",
199 | "\n",
200 | " # get src/tar landmarks\n",
201 | " src_landmarks = get_src_landmarks(x0, x1, y0, y1, pnts)\n",
202 | " tar_landmarks = get_tar_landmarks(det_face_im)\n",
203 | "\n",
204 | " # align detected face\n",
205 | " aligned_det_face_im = landmarks_match_mtcnn(\n",
206 | " det_face_im, src_landmarks, tar_landmarks)\n",
207 | "\n",
208 | " fname = f\"./faces/aligned_faces/frame{frames}face{str(idx)}.jpg\"\n",
209 | " plt.imsave(fname, aligned_det_face_im, format=\"jpg\")\n",
210 | " fname = f\"./faces/raw_faces/frame{frames}face{str(idx)}.jpg\"\n",
211 | " plt.imsave(fname, det_face_im, format=\"jpg\")\n",
212 | " \n",
213 | " bm = np.zeros_like(aligned_det_face_im)\n",
214 | " h, w = bm.shape[:2]\n",
215 | " bm[int(src_landmarks[0][0]-h/15):int(src_landmarks[0][0]+h/15),\n",
216 | " int(src_landmarks[0][1]-w/8):int(src_landmarks[0][1]+w/8),:] = 255\n",
217 | " bm[int(src_landmarks[1][0]-h/15):int(src_landmarks[1][0]+h/15),\n",
218 | " int(src_landmarks[1][1]-w/8):int(src_landmarks[1][1]+w/8),:] = 255\n",
219 | " bm = landmarks_match_mtcnn(bm, src_landmarks, tar_landmarks)\n",
220 | " fname = f\"./faces/binary_masks_eyes/frame{frames}face{str(idx)}.jpg\"\n",
221 | " plt.imsave(fname, bm, format=\"jpg\")\n",
222 | " \n",
223 | " return np.zeros((3,3,3))"
224 | ]
225 | },
226 | {
227 | "cell_type": "markdown",
228 | "metadata": {},
229 | "source": [
230 | "Start face detection\n",
231 | "\n",
232 | "Default input video filename: `INPUT_VIDEO.mp4`"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": 12,
238 | "metadata": {},
239 | "outputs": [],
240 | "source": [
241 | "global frames\n",
242 | "frames = 0\n",
243 | "\n",
244 | "# configuration\n",
245 | "save_interval = 6 # perform face detection every {save_interval} frames\n",
246 | "fn_input_video = \"INPUT_VIDEO.mp4\"\n",
247 | "\n",
248 | "output = 'dummy.mp4'\n",
249 | "clip1 = VideoFileClip(fn_input_video)\n",
250 | "clip = clip1.fl_image(process_video)#.subclip(0,3) #NOTE: this function expects color images!!\n",
251 | "clip.write_videofile(output, audio=False)\n",
252 | "clip1.reader.close()"
253 | ]
254 | },
255 | {
256 | "cell_type": "markdown",
257 | "metadata": {},
258 | "source": [
259 | "## Saved images will be in folder `faces/raw_faces` and `faces/aligned_faces` respectively. Binary masks will be in `faces/binary_masks_eyes`."
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": null,
265 | "metadata": {},
266 | "outputs": [],
267 | "source": []
268 | }
269 | ],
270 | "metadata": {
271 | "kernelspec": {
272 | "display_name": "Python 3",
273 | "language": "python",
274 | "name": "python3"
275 | },
276 | "language_info": {
277 | "codemirror_mode": {
278 | "name": "ipython",
279 | "version": 3
280 | },
281 | "file_extension": ".py",
282 | "mimetype": "text/x-python",
283 | "name": "python",
284 | "nbconvert_exporter": "python",
285 | "pygments_lexer": "ipython3",
286 | "version": "3.6.4"
287 | }
288 | },
289 | "nbformat": 4,
290 | "nbformat_minor": 2
291 | }
292 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # faceswap-GAN
2 | Adding Adversarial loss and perceptual loss (VGGface) to deepfakes'(reddit user) auto-encoder architecture.
3 |
4 | ## Updates
5 | | Date | Update |
6 | | ------------- | ------------- |
7 | | 2018-08-27 | **Colab support:** A [colab notebook](https://colab.research.google.com/github/shaoanlu/faceswap-GAN/blob/master/colab_demo/faceswap-GAN_colab_demo.ipynb) for faceswap-GAN v2.2 is provided.|
8 | | 2018-07-25 | **Data preparation:** Add a [new notebook](https://github.com/shaoanlu/faceswap-GAN/blob/master/MTCNN_video_face_detection_alignment.ipynb) for video pre-processing in which MTCNN is used for face detection as well as face alignment.|
9 | | 2018-06-29 | **Model architecture**: faceswap-GAN v2.2 now supports different output resolutions: 64x64, 128x128, and 256x256. Default `RESOLUTION = 64` can be changed in the config cell of [v2.2 notebook](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2.2_train_test.ipynb).|
10 | | 2018-06-25 | **New version**: faceswap-GAN v2.2 has been released. The main improvements of v2.2 model are its capability of generating realistic and consistent eye movements (results are shown below, or Ctrl+F for eyes), as well as higher video quality with face alignment.|
11 | | 2018-06-06 | **Model architecture**: Add a self-attention mechanism proposed in [SAGAN](https://arxiv.org/abs/1805.08318) into V2 GAN model. (Note: There is still no official code release for SAGAN, the implementation in this repo. could be wrong. We'll keep an eye on it.)|
12 |
13 | ## Google Colab support
14 | Here is a [playground notebook](https://colab.research.google.com/github/shaoanlu/faceswap-GAN/blob/master/colab_demo/faceswap-GAN_colab_demo.ipynb) for faceswap-GAN v2.2 on Google Colab. Users can train their own model in the browser.
15 |
16 | ## Descriptions
17 | ### faceswap-GAN v2.2
18 | * [FaceSwap_GAN_v2.2_train_test.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2.2_train_test.ipynb)
19 | - Notebook for model training of faceswap-GAN model version 2.2.
20 | - This notebook also provides code for still image transformation at the bottom.
21 | - Require additional training images generated through [prep_binary_masks.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/prep_binary_masks.ipynb).
22 |
23 | * [FaceSwap_GAN_v2.2_video_conversion.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2.2_video_conversion.ipynb)
24 | - Notebook for video conversion of faceswap-GAN model version 2.2.
25 | - Face alignment using 5-points landmarks is introduced to video conversion.
26 |
27 | * [prep_binary_masks.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/prep_binary_masks.ipynb)
28 | - Notebook for training data preprocessing. Output binary masks are save in `./binary_masks/faceA_eyes` and `./binary_masks/faceB_eyes` folders.
29 | - Require [face_alignment](https://github.com/1adrianb/face-alignment) package. (An alternative method for generating binary masks (not requiring `face_alignment` and `dlib` packages) can be found in [MTCNN_video_face_detection_alignment.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/MTCNN_video_face_detection_alignment.ipynb).)
30 |
31 | * [MTCNN_video_face_detection_alignment.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/MTCNN_video_face_detection_alignment.ipynb)
32 | - This notebook performs face detection/alignment on the input video.
33 | - Detected faces are saved in `./faces/raw_faces` and `./faces/aligned_faces` for non-aligned/aligned results respectively.
34 | - Crude eyes binary masks are also generated and saved in `./faces/binary_masks_eyes`. These binary masks can serve as a suboptimal alternative to masks generated through [prep_binary_masks.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/prep_binary_masks.ipynb).
35 |
36 | **Usage**
37 | 1. Run [MTCNN_video_face_detection_alignment.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/MTCNN_video_face_detection_alignment.ipynb) to extract faces from videos. Manually move/rename the aligned face images into `./faceA/` or `./faceB/` folders.
38 | 2. Run [prep_binary_masks.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/prep_binary_masks.ipynb) to generate binary masks of training images.
39 | - You can skip this pre-processing step by (1) setting `use_bm_eyes=False` in the config cell of the train_test notebook, or (2) use low-quality binary masks generated in step 1.
40 | 3. Run [FaceSwap_GAN_v2.2_train_test.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2.2_train_test.ipynb) to train models.
41 | 4. Run [FaceSwap_GAN_v2.2_video_conversion.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2.2_video_conversion.ipynb) to create videos using the trained models in step 3.
42 |
43 | ### Miscellaneous
44 | * [faceswap-GAN_colab_demo.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/colab_demo/faceswap-GAN_colab_demo.ipynb)
45 | - An all-in-one notebook for demostration purpose that can be run on Google colab.
46 |
47 | ### Training data format
48 | - Face images are supposed to be in `./faceA/` or `./faceB/` folder for each taeget respectively.
49 | - Images will be resized to 256x256 during training.
50 |
51 | ## Generative adversarial networks for face swapping
52 | ### 1. Architecture
53 | 
54 |
55 | 
56 |
57 | 
58 |
59 | ### 2. Results
60 | - **Improved output quality:** Adversarial loss improves reconstruction quality of generated images.
61 | 
62 |
63 | - **Additional results:** [This image](https://www.dropbox.com/s/2nc5guogqk7nwdd/rand_160_2.jpg?raw=1) shows 160 random results generated by v2 GAN with self-attention mechanism (image format: source -> mask -> transformed).
64 |
65 | - **Consistent eye movements (v2.2 model):** Results of the v2.2 model which specializes on eye direcitons are presented below. V2.2 model generates more realistic eyes within shorter training iteations. (Input gifs are created using [DeepWarp](http://163.172.78.19/).)
66 | - Top row: v2 model; Bottom row: v2.2 model. In column 1, 3, and 5 show input gifs.
67 | - 
68 | - 
69 |
70 | - **Evaluations:** Evaluations of the output quality on Trump/Cage dataset can be found [here](https://github.com/shaoanlu/faceswap-GAN/blob/master/notes/README.md#13-model-evaluation-for-trumpcage-dataset).
71 |
72 | ###### The Trump/Cage images are obtained from the reddit user [deepfakes' project](https://pastebin.com/hYaLNg1T) on pastebin.com.
73 |
74 | ### 3. Features
75 | - **[VGGFace](https://github.com/rcmalli/keras-vggface) perceptual loss:** Perceptual loss improves direction of eyeballs to be more realistic and consistent with input face. It also smoothes out artifacts in the segmentation mask, resulting higher output quality.
76 |
77 | - **Attention mask:** Model predicts an attention mask that helps on handling occlusion, eliminating artifacts, and producing natrual skin tone. In below are results transforming Hinako Sano ([佐野ひなこ](https://ja.wikipedia.org/wiki/%E4%BD%90%E9%87%8E%E3%81%B2%E3%81%AA%E3%81%93)) to Emi Takei ([武井咲](https://ja.wikipedia.org/wiki/%E6%AD%A6%E4%BA%95%E5%92%B2)).
78 |
79 |  
80 | - From left to right: source face, swapped face (before masking), swapped face (after masking).
81 |
82 | 
83 | - From left to right: source face, swapped face (after masking), mask heatmap.
84 | ###### Source video: [佐野ひなことすごくどうでもいい話?(遊戯王)](https://www.youtube.com/watch?v=tzlD1CQvkwU)
85 |
86 | - **Configurable input/output resolution (v2.2)**: The model supports 64x64, 128x128, and 256x256 outupt resolutions.
87 |
88 | - **Face tracking/alignment using MTCNN and Kalman filter during video conversion**:
89 | - MTCNN is introduced for more stable detections and reliable face alignment (FA).
90 | - Kalman filter smoothen the bounding box positions over frames and eliminate jitter on the swapped face.
91 |
92 | 
93 | 
94 |
95 | - **Training schedule**: Notebooks for training provide a predefined training schedule. The above Trump/Cage face-swapping are generated by model trained for 21k iters using `TOTAL_ITERS = 30000` predefined training schedule.
96 | - Training tricks: Swapping the decoders in the late stage of training reduces artifacts caused by the extreme facial expressions. E.g., some of the failure cases (of results above) having their mouth open wide are better transformed using this trick.
97 |
98 | 
99 |
100 | - **Eyes-aware training:** Introduce high reconstruction loss and edge loss around eyes area, which guides the model to generate realistic eyes.
101 |
102 | ## Frequently asked questions and troubleshooting
103 |
104 | #### 1. How does it work?
105 | - The following illustration shows a very high-level and abstract (but not exactly the same) flowchart of the denoising autoencoder algorithm. The objective functions look like [this](https://www.dropbox.com/s/e5j5rl7o3tmw6q0/faceswap_GAN_arch4.jpg?raw=1).
106 | 
107 | #### 2. Previews look good, but it does not transform to the output videos?
108 | - Model performs its full potential when the input images are preprocessed with face alignment methods.
109 | - 
110 |
111 | ## Requirements
112 |
113 | * keras 2.1.5
114 | * Tensorflow 1.6.0
115 | * Python 3.6.4
116 | * OpenCV
117 | * [keras-vggface](https://github.com/rcmalli/keras-vggface)
118 | * [moviepy](http://zulko.github.io/moviepy/)
119 | * [prefetch_generator](https://github.com/justheuristic/prefetch_generator) (required for v2.2 model)
120 | * [face-alignment](https://github.com/1adrianb/face-alignment) (required as preprocessing for v2.2 model)
121 |
122 | ## Acknowledgments
123 | Code borrows from [tjwei](https://github.com/tjwei/GANotebooks), [eriklindernoren](https://github.com/eriklindernoren/Keras-GAN/blob/master/aae/adversarial_autoencoder.py), [fchollet](https://github.com/fchollet/deep-learning-with-python-notebooks/blob/master/8.5-introduction-to-gans.ipynb), [keras-contrib](https://github.com/keras-team/keras-contrib/blob/master/examples/improved_wgan.py) and [reddit user deepfakes' project](https://pastebin.com/hYaLNg1T). The generative network is adopted from [CycleGAN](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix). Weights and scripts of MTCNN are from [FaceNet](https://github.com/davidsandberg/facenet). Illustrations are from [irasutoya](http://www.irasutoya.com/).
124 |
--------------------------------------------------------------------------------
/converter/video_converter.py:
--------------------------------------------------------------------------------
1 | from .kalman_filter import KalmanFilter
2 | from .landmarks_alignment import *
3 | from .face_transformer import FaceTransformer
4 | from .vc_utils import *
5 | import numpy as np
6 | from moviepy.editor import VideoFileClip
7 |
8 |
9 | class VideoConverter(object):
10 | """
11 | This class is for video conversion
12 |
13 | Attributes:
14 | ftrans: FaceTransformer instance
15 | fdetect: MTCNNFaceDetector instance
16 | prev_x0, prev_x1, prev_y0, prev_y1, frames: Variables for smoothing bounding box
17 | kf0, kf1: KalmanFilter instances for smoothing bounding box
18 | """
19 | def __init__(self):
20 | # Variables for smoothing bounding box
21 | self.prev_x0 = 0
22 | self.prev_x1 = 0
23 | self.prev_y0 = 0
24 | self.prev_y1 = 0
25 | self.frames = 0
26 |
27 | # face transformer
28 | self.ftrans = FaceTransformer()
29 |
30 | # MTCNN face detector
31 | self.fdetect = None
32 |
33 | # Kalman filters
34 | self.kf0 = None
35 | self.kf1 = None
36 |
37 | def set_gan_model(self, model):
38 | self.ftrans.set_model(model)
39 |
40 | def set_face_detector(self, fdetect):
41 | self.fdetect = fdetect
42 |
43 | def _get_smoothed_coord(self, x0, x1, y0, y1, img_shape, use_kalman_filter=True, ratio=0.65):
44 | if not use_kalman_filter:
45 | x0 = int(ratio * self.prev_x0 + (1-ratio) * x0)
46 | x1 = int(ratio * self.prev_x1 + (1-ratio) * x1)
47 | y1 = int(ratio * self.prev_y1 + (1-ratio) * y1)
48 | y0 = int(ratio * self.prev_y0 + (1-ratio) * y0)
49 | else:
50 | x0y0 = np.array([x0, y0]).astype(np.float32)
51 | x1y1 = np.array([x1, y1]).astype(np.float32)
52 | self.kf0.correct(x0y0)
53 | pred_x0y0 = self.kf0.predict()
54 | self.kf1.correct(x1y1)
55 | pred_x1y1 = self.kf1.predict()
56 | x0 = np.max([0, pred_x0y0[0][0]]).astype(np.int)
57 | x1 = np.min([img_shape[0], pred_x1y1[0][0]]).astype(np.int)
58 | y0 = np.max([0, pred_x0y0[1][0]]).astype(np.int)
59 | y1 = np.min([img_shape[1], pred_x1y1[1][0]]).astype(np.int)
60 | if x0 == x1 or y0 == y1:
61 | x0, y0, x1, y1 = self.prev_x0, self.prev_y0, self.prev_x1, self.prev_y1
62 | return x0, x1, y0, y1
63 |
64 | def _set_prev_coord(self, x0, x1, y0, y1):
65 | self.prev_x0 = x0
66 | self.prev_x1 = x1
67 | self.prev_y1 = y1
68 | self.prev_y0 = y0
69 |
70 | def _init_kalman_filters(self, noise_coef):
71 | self.kf0 = KalmanFilter(noise_coef=noise_coef)
72 | self.kf1 = KalmanFilter(noise_coef=noise_coef)
73 |
74 | def convert(self, input_fn, output_fn, options, duration=None):
75 | self.check_options(options)
76 |
77 | if options['use_kalman_filter']:
78 | self._init_kalman_filters(options["kf_noise_coef"])
79 |
80 | self.frames = 0
81 | self.prev_x0 = self.prev_x1 = self.prev_y0 = self.prev_y1 = 0
82 |
83 | if self.fdetect is None:
84 | raise Exception(f"face detector has not been set through VideoConverter.set_face_detector() yet.")
85 |
86 | clip1 = VideoFileClip(input_fn)
87 | if type(duration) is tuple:
88 | clip = clip1.fl_image(lambda img: self.process_video(img, options)).subclip(duration[0], duration[1])
89 | else:
90 | clip = clip1.fl_image(lambda img: self.process_video(img, options))
91 | clip.write_videofile(output_fn, audio=True)
92 | clip1.reader.close()
93 | try:
94 | clip1.audio.reader.close_proc()
95 | except:
96 | pass
97 |
98 | def process_video(self, input_img, options):
99 | """Transform detected faces in single input frame."""
100 | image = input_img
101 |
102 | # detect face using MTCNN (faces: face bbox coord, pnts: landmarks coord.)
103 | faces, pnts = self.fdetect.detect_face(image, minsize=20,
104 | threshold=options["detec_threshold"],
105 | factor=0.709,
106 | use_auto_downscaling=options["use_auto_downscaling"],
107 | min_face_area=options["min_face_area"]
108 | )
109 |
110 | # check if any face detected
111 | if len(faces) == 0:
112 | comb_img = get_init_comb_img(input_img)
113 | triple_img = get_init_triple_img(input_img, no_face=True)
114 |
115 | # init. output image
116 | mask_map = get_init_mask_map(image)
117 | comb_img = get_init_comb_img(input_img)
118 | best_conf_score = 0
119 |
120 | # loop through all detected faces
121 | for i, (x0, y1, x1, y0, conf_score) in enumerate(faces):
122 | lms = pnts[:,i:i+1]
123 | # smoothe the bounding box
124 | if options["use_smoothed_bbox"]:
125 | if self.frames != 0 and conf_score >= best_conf_score:
126 | x0, x1, y0, y1 = self._get_smoothed_coord(
127 | x0, x1, y0, y1,
128 | img_shape=image.shape,
129 | use_kalman_filter=options["use_kalman_filter"],
130 | ratio=options["bbox_moving_avg_coef"],
131 | )
132 | self._set_prev_coord(x0, x1, y0, y1)
133 | best_conf_score = conf_score
134 | self.frames += 1
135 | elif conf_score <= best_conf_score:
136 | self.frames += 1
137 | else:
138 | if conf_score >= best_conf_score:
139 | self._set_prev_coord(x0, x1, y0, y1)
140 | best_conf_score = conf_score
141 | if options["use_kalman_filter"]:
142 | for i in range(200):
143 | self.kf0.predict()
144 | self.kf1.predict()
145 | self.frames += 1
146 |
147 | # transform face
148 | try:
149 | # get detected face
150 | det_face_im = input_img[int(x0):int(x1),int(y0):int(y1),:]
151 |
152 | # get src/tar landmarks
153 | src_landmarks = get_src_landmarks(x0, x1, y0, y1, lms)
154 | tar_landmarks = get_tar_landmarks(det_face_im)
155 |
156 | # align detected face
157 | aligned_det_face_im = landmarks_match_mtcnn(det_face_im, src_landmarks, tar_landmarks)
158 |
159 | # face transform
160 | r_im, r_rgb, r_a = self.ftrans.transform(
161 | aligned_det_face_im,
162 | direction=options["direction"],
163 | roi_coverage=options["roi_coverage"],
164 | color_correction=options["use_color_correction"],
165 | IMAGE_SHAPE=options["IMAGE_SHAPE"]
166 | )
167 |
168 | # reverse alignment
169 | rev_aligned_det_face_im = landmarks_match_mtcnn(r_im, tar_landmarks, src_landmarks)
170 | rev_aligned_det_face_im_rgb = landmarks_match_mtcnn(r_rgb, tar_landmarks, src_landmarks)
171 | rev_aligned_mask = landmarks_match_mtcnn(r_a, tar_landmarks, src_landmarks)
172 |
173 | # merge source face and transformed face
174 | result = np.zeros_like(det_face_im)
175 | result = rev_aligned_mask/255*rev_aligned_det_face_im_rgb + (1-rev_aligned_mask/255)*det_face_im
176 | result_a = rev_aligned_mask
177 | except:
178 | # catch exceptions for landmarks alignment errors (if any)
179 | print(f"Face alignment error occurs at frame {self.frames}.")
180 | # get detected face
181 | det_face_im = input_img[int(x0):int(x1),int(y0):int(y1),:]
182 |
183 | result, _, result_a = self.ftrans.transform(
184 | det_face_im,
185 | direction=options["direction"],
186 | roi_coverage=options["roi_coverage"],
187 | color_correction=options["use_color_correction"],
188 | IMAGE_SHAPE=options["IMAGE_SHAPE"]
189 | )
190 |
191 | comb_img[int(x0):int(x1),input_img.shape[1]+int(y0):input_img.shape[1]+int(y1),:] = result
192 |
193 | # Enhance output
194 | if options["enhance"] != 0:
195 | comb_img = -1*options["enhance"] * get_init_comb_img(input_img) + (1+options["enhance"]) * comb_img
196 | comb_img = np.clip(comb_img, 0, 255)
197 |
198 | if conf_score >= best_conf_score:
199 | mask_map[int(x0):int(x1),int(y0):int(y1),:] = result_a
200 | mask_map = np.clip(mask_map + .15 * input_img, 0, 255)
201 | # Possible bug: when small faces are detected before the most confident face,
202 | # the mask_map will show brighter input_img
203 | else:
204 | mask_map[int(x0):int(x1),int(y0):int(y1),:] += result_a
205 | mask_map = np.clip(mask_map, 0, 255)
206 |
207 | triple_img = get_init_triple_img(input_img)
208 | triple_img[:, :input_img.shape[1]*2, :] = comb_img
209 | triple_img[:, input_img.shape[1]*2:, :] = mask_map
210 |
211 | if options["output_type"] == 1:
212 | return comb_img[:, input_img.shape[1]:, :] # return only result image
213 | elif options["output_type"] == 2:
214 | return comb_img # return input and result image combined as one
215 | elif options["output_type"] == 3:
216 | return triple_img #return input,result and mask heatmap image combined as one
217 |
218 | @staticmethod
219 | def check_options(options):
220 | if options["roi_coverage"] <= 0 or options["roi_coverage"] >= 1:
221 | raise ValueError(f"roi_coverage should be between 0 and 1 (exclusive).")
222 | if options["bbox_moving_avg_coef"] < 0 or options["bbox_moving_avg_coef"] > 1:
223 | raise ValueError(f"bbox_moving_avg_coef should be between 0 and 1 (inclusive).")
224 | if options["detec_threshold"] < 0 or options["detec_threshold"] > 1:
225 | raise ValueError(f"detec_threshold should be between 0 and 1 (inclusive).")
226 | if options["use_smoothed_bbox"] not in [True, False]:
227 | raise ValueError(f"use_smoothed_bbox should be a boolean.")
228 | if options["use_kalman_filter"] not in [True, False]:
229 | raise ValueError(f"use_kalman_filter should be a boolean.")
230 | if options["use_auto_downscaling"] not in [True, False]:
231 | raise ValueError(f"use_auto_downscaling should be a boolean.")
232 | if options["output_type"] not in range(1,4):
233 | ot = options["output_type"]
234 | raise ValueError(f"Received an unknown output_type option: {ot}.")
235 |
--------------------------------------------------------------------------------
/notes/README.md:
--------------------------------------------------------------------------------
1 | # Notes:
2 | ## In this page are notes for my ongoing experiments and failed attmepts.
3 | ### 1. BatchNorm/InstanceNorm:
4 | Caused input/output skin color inconsistency when the 2 training dataset had different skin color dsitribution (light condition, shadow, etc.). But I wonder if this will be solved after further training the model.
5 |
6 | ### 2. Perceptual loss
7 | Increasing perceptual loss weighting factor (to 1) unstablized training. But the weihgting [.01, .1, .1] I used is not optimal either.
8 |
9 | ### 3. Bottleneck layers
10 | ~~In the encoder architecture, flattening Conv2D and shrinking it to Dense(1024) is crutial for model to learn semantic features, or face representation. If we used Conv layers only (which means larger dimension), will it learn features like visaul descriptors? ([source paper](https://arxiv.org/abs/1706.02932v2), last paragraph of sec 3.1)~~ Similar results can be achieved by replacing the Dense layer with Conv2D strides 2 layers (shrinking feature map to 1x1).
11 |
12 | ### 4. Transforming Emi Takei to Hinko Sano
13 | Transform Emi Takei to Hinko Sano gave suboptimal results, due to imbalanced training data that over 65% of images of Hinako Sano came from the same video series.
14 |
15 | ### 5. About mixup and LSGAN
16 | **Mixup** technique ([arXiv](https://arxiv.org/abs/1710.09412)) and **least squares loss** function are adopted ([arXiv](https://arxiv.org/abs/1712.06391)) for training GAN. However, I did not do any ablation experiment on them. Don't know how much impact they had on the outputs.
17 |
18 | ### 6. Adding landmarks as input feature
19 | Adding face landmarks as the fourth input channel during training (w/ dropout_chance=0.3) force the model to learn(overfit) these face features. However it didn't give me decernible improvement. The following gif is the result clip, it should be mentoined that the landmarks information was not provided during video making, but the model was still able to prodcue accurate landmarks because similar [face, landmarks] pairs are already shown to the model during training.
20 | - 
21 |
22 | ### 7. **Recursive loop:** Feed model's output image as its input, **repeat N times**.
23 | - Idea: Since our model is able to transform source face into target face, if we feed generated fake target face as its input, will the model refine the fake face to be more like a real target face?
24 | - **Version 1 result (w/o alpha mask)** (left to right: source, N=0, N=2, N=10, N=50)
25 | - 
26 | - The model seems to refine the fake face (to be more similar with target face), but its shape and color go awry. Furthermore, in certain frames of N=50, **there are blue colors that only appear in target face training data but not source face.** Does this mean that the model is trying to pull out trainnig images it had memoried, or does the mdoel trying to transform the input image into a particular trainnig data?
27 | - **Version 2 result (w/ alpha mask)** (left to right: source, N=0, N=50, N=150, N=500)
28 | - 
29 | - V2 model is more robust. Almost generates the same result before/after applying recursive loop except some artifacts on the bangs.
30 |
31 | ### 8. **Code manipulation and interpolation**:
32 | - 
33 | - Idea: Refine output face by adding infromation from training images that look like the input image.
34 | - KNN takes features extracted from ResNet50 model as its input.
35 | - Similar results can be achieved by simply weighted averaging input image with images retrieved by kNNs (instead of the code).
36 | - TODO: Implement **alphaGAN**, which integrates VAE that has a more representative latent space.
37 |
38 | ### 9. **CycleGAN experiment**:
39 | - 
40 | - Top row: input images.; Bottom row: output images.
41 | - CycleGAN produces artifacts on output faces. Also, featuers are not consitent before/after transformation, e.g., bangs and skin tone.
42 | - ~~**CycleGAN with masking**: To be updated.~~
43 |
44 | ### 10. **(Towards) One Model to Swap Them All**
45 | - Objective: Train a model that is capable of swapping any given face to Emma Watson.
46 | - `faceA` folder contains ~2k images of Emma Watson.
47 | - `faceB` folder contains ~200k images from celebA dataset.
48 | - Hacks: Add **domain adversaria loss** on embedidngs (from [XGAN](https://arxiv.org/abs/1711.05139) and [this ICCV GAN tutorial](https://youtu.be/uUUvieVxCMs?t=18m59s)). It encourages encoder to generate embbeding from two diffeernt domains to lie in the same subspace (assuming celebA dataset covers almost the true face image dsitribution). Also, heavy data augmentation (random channel shifting, random downsampling, etc.) is applied on face A to pervent overfitting.
49 | - Result: Model performed poorly on hard sample, e.g., man with beard.
50 |
51 | ### 11. **Face parts swapping as data augmentation**
52 | - 
53 | - Swap only part of source face (mouth/nose/eyes) to target face, treating the swapped face as a augmented training data for source face.
54 | - For each source face image, a look-alike target face is retrieved by using knn (taking a averaegd feature map as input) for face part swapping.
55 | - Result: Unfortunately, the model also learns to generates artifacts as appear in augmented data, e.g., sharp edges around eyes/nose and weirdly warped face. The artifacts of augmented data are caused by non-perfect blending (due to false landmarks and bad perspective warping).
56 |
57 | ### 12. Neural style transfer as output refinement
58 | - Problem: The output resolution 64x64 is blurry and sometimes the skin tone does not match the target face.
59 | - Question: Is there any other way to refine the 64x64 output face so that it looks natural in, say, a 256x256 input image except increasing output resolution (which leads to much longer training time) or training a super resolution model?
60 | - Attempts: **Applied neural style transfer techniques as output refinement**. Hoping it can improve output quality and solve color mismatch without additional training of superRes model or increasing model resolution.
61 | - Method: We used implementation of neural style transfer from [titu1994/Neural-Style-Transfer](https://github.com/titu1994/Neural-Style-Transfer), [eridgd/WCT-TF](https://github.com/eridgd/WCT-TF), and [jonrei/tf-AdaIN](https://github.com/jonrei/tf-AdaIN). All repos provide pre-trained models. We fed swapped face (i.e., the output image of GAN model) as content image and input face as style image.
62 | - Results: Style transfer of Gatys et al. gave decent results but require long execution time (~1.5 min per 256x256 image on K80), thus not appplicable for video conversion. The "Universal Style Transfer via Feature Transforms" (WCT) and "Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" (AdaIN) somehow failed to preserve the content information (perhaps I did not tune the params well).
63 | - Conclusion: **Using neural style transfer to improve output quality seems promising**, but we are not sure if it will benefit video quality w/o introducing jitter. Also the execution time is a problem, we should experiment with more arbitrary style transfer networks to see if there is any model that can do a good job on face refinement within one (or several) forward pass(es).
64 | - 
65 |
66 | ### 13. Model evaluation on Trump/Cage dataset
67 | - Problem: GANs are hard to evaluate. Generally, Inception Score (IS) and Fréchet Inception Distance (FID score) are the most seen metrics for evaluating the output "reality" (i.e., how close the outputs are to real samples). However, in face-swapping task, we care more about the "quality" of the outputs such as how similar is the transformed output face to its target face. Thus we want to find an objective approach to evauate the model performance as a counter-part of subjectively judging by output visualization.
68 | - **Evaluation method 1: Compare the predicted identities of VGGFace-ResNet50.**
69 | - We look at the predictions of ResNet50 and check if it spits out similar predictions on real/fake images.
70 | - There are 8631 identities in VGGFace (but unfortunately both Donald Trump and Nicolas Cage are not in this dataset)
71 | - Top 3 most look-alike identities of "real Trump" are: Alan_Mulally, Jon_Voight, and Tom_Berenger
72 | - Top 3 most look-alike identities of "fake Trump" are: Alan_Mulally, Franjo_Pooth, and Jon_Voight
73 | -
74 | - Top 3 most look-alike identities of "real Cage" are: Jimmy_Stewart, Nick_Grimshaw, and Sylvester_Stallone
75 | - Top 3 most look-alike identities of "fake Cage" are: Franjo_Pooth, Jimmy_Stewart, and Bob_Beckel
76 | -
77 | - **Observation:** Overall, the top-1 look-alike identity of the real Trump/Cage also appear in the top-3 that of the fake one. (Notice that the face-swapping only changes the facial attributes, not the chins and face shape. Thus the fake faces will not look exactly the same with its target face.)
78 | - **Evaluation method 2: Compare the cosine similarity of extracted VGGFace-ResNet50 features.**
79 | - Features (embeddings) are extracted from the global average pooling layer (the last layer the before fully-connected layer) of ResNet50, which have diimension of 2048.
80 | -
81 | -
82 | - The definition of cosine distance can be found [here](https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cosine.html). The cosine similarity is just cosine distance w/o the one minus part.
83 | - The following 2 heatmaps depict the within-class cosine similarity of real Trump images and real Cage images.
84 | -
85 | - The following 2 heatmaps illustrate the cosine similarity between real/fake Trump images and between real/fake Cage images. It is obvious that the similarity is not as high as real samples but is still close enough (Note that the low similarity between real and fake Cage is caused by profile faces and heavily occluded faces in real Trump samples, which are hard for the faceswap model to transform.)
86 | -
87 | - We also checked the cosine similarity between real Trump and real Cage. And the result was not suprising: it shows low similarity between the two identites. This also supports the above observations that the swapped face is much look-alike its target face.
88 | -
89 | - **Observation:** Evaluation using ResNet50 features demonstrates clear indication that the swapped faces are very look-alike its target face.
90 | - **Conclusion:** Cosine similarity seems to be a good way to compare performance among different models on the same dataset. Hope this can accelerate our iterations for seaching optimal hyper-parameters and exploring model architectures.
91 |
92 | ### 14. 3D face reconstruction for output refinement
93 | - Using [PRNet](https://github.com/YadiraF/PRNet) and its accompanying [face-swapping script](https://github.com/YadiraF/PRNet/blob/master/demo_texture.py) to refine the output image.
94 | - **Result:** For extreme facial expressions, the mouth shape becomes more consistent after face texture editing. (The missing details can be restored through style transfer as shown in exp. 12 above.)
95 | - Left to right: Input, output, refined output
96 | - 
97 | - 
98 | - For occluded faces, their pose might not be correctly estimated, thus the refined outputs are likely to be distorted. e.g., the displaced microphone in the figure below.
99 | - 
100 |
--------------------------------------------------------------------------------
/networks/faceswap_gan_model.py:
--------------------------------------------------------------------------------
1 | from keras.models import Model
2 | from keras.layers import *
3 | from keras.optimizers import Adam
4 | from .nn_blocks import *
5 | from .losses import *
6 |
7 | class FaceswapGANModel():
8 | """
9 | faceswap-GAN v2.2 model
10 |
11 | Attributes:
12 | arch_config: A dictionary that contains architecture configurations (details are described in train notebook).
13 | nc_G_inp: int, number of generator input channels
14 | nc_D_inp: int, number of discriminator input channels
15 | lrG: float, learning rate of the generator
16 | lrD: float, learning rate of the discriminator
17 | """
18 | def __init__(self, **arch_config):
19 | self.nc_G_inp = 3
20 | self.nc_D_inp = 6
21 | self.IMAGE_SHAPE = arch_config['IMAGE_SHAPE']
22 | self.lrD = 2e-4
23 | self.lrG = 1e-4
24 | self.use_self_attn = arch_config['use_self_attn']
25 | self.norm = arch_config['norm']
26 | self.model_capacity = arch_config['model_capacity']
27 | self.enc_nc_out = 256 if self.model_capacity == "lite" else 512
28 |
29 | # define networks
30 | self.encoder = self.build_encoder(nc_in=self.nc_G_inp,
31 | input_size=self.IMAGE_SHAPE[0],
32 | use_self_attn=self.use_self_attn,
33 | norm=self.norm,
34 | model_capacity=self.model_capacity
35 | )
36 | self.decoder_A = self.build_decoder(nc_in=self.enc_nc_out,
37 | input_size=8,
38 | output_size=self.IMAGE_SHAPE[0],
39 | use_self_attn=self.use_self_attn,
40 | norm=self.norm,
41 | model_capacity=self.model_capacity
42 | )
43 | self.decoder_B = self.build_decoder(nc_in=self.enc_nc_out,
44 | input_size=8,
45 | output_size=self.IMAGE_SHAPE[0],
46 | use_self_attn=self.use_self_attn,
47 | norm=self.norm,
48 | model_capacity=self.model_capacity
49 | )
50 | self.netDA = self.build_discriminator(nc_in=self.nc_D_inp,
51 | input_size=self.IMAGE_SHAPE[0],
52 | use_self_attn=self.use_self_attn,
53 | norm=self.norm
54 | )
55 | self.netDB = self.build_discriminator(nc_in=self.nc_D_inp,
56 | input_size=self.IMAGE_SHAPE[0],
57 | use_self_attn=self.use_self_attn,
58 | norm=self.norm
59 | )
60 | x = Input(shape=self.IMAGE_SHAPE) # dummy input tensor
61 | self.netGA = Model(x, self.decoder_A(self.encoder(x)))
62 | self.netGB = Model(x, self.decoder_B(self.encoder(x)))
63 |
64 | # define variables
65 | self.distorted_A, self.fake_A, self.mask_A, \
66 | self.path_A, self.path_mask_A, self.path_abgr_A, self.path_bgr_A = self.define_variables(netG=self.netGA)
67 | self.distorted_B, self.fake_B, self.mask_B, \
68 | self.path_B, self.path_mask_B, self.path_abgr_B, self.path_bgr_B = self.define_variables(netG=self.netGB)
69 | self.real_A = Input(shape=self.IMAGE_SHAPE)
70 | self.real_B = Input(shape=self.IMAGE_SHAPE)
71 | self.mask_eyes_A = Input(shape=self.IMAGE_SHAPE)
72 | self.mask_eyes_B = Input(shape=self.IMAGE_SHAPE)
73 |
74 | @staticmethod
75 | def build_encoder(nc_in=3,
76 | input_size=64,
77 | use_self_attn=True,
78 | norm='none',
79 | model_capacity='standard'):
80 | coef = 2 if model_capacity == "lite" else 1
81 | latent_dim = 2048 if (model_capacity == "lite" and input_size > 64) else 1024
82 | upscale_block = upscale_nn if model_capacity == "lite" else upscale_ps
83 | activ_map_size = input_size
84 | use_norm = False if (norm == 'none') else True
85 |
86 | inp = Input(shape=(input_size, input_size, nc_in))
87 | x = Conv2D(64//coef, kernel_size=5, use_bias=False, padding="same")(inp) # use_bias should be True
88 | x = conv_block(x, 128//coef)
89 | x = conv_block(x, 256//coef, use_norm, norm=norm)
90 | x = self_attn_block(x, 256//coef) if use_self_attn else x
91 | x = conv_block(x, 512//coef, use_norm, norm=norm)
92 | x = self_attn_block(x, 512//coef) if use_self_attn else x
93 | x = conv_block(x, 1024//(coef**2), use_norm, norm=norm)
94 |
95 | activ_map_size = activ_map_size//16
96 | while (activ_map_size > 4):
97 | x = conv_block(x, 1024//(coef**2), use_norm, norm=norm)
98 | activ_map_size = activ_map_size//2
99 |
100 | x = Dense(latent_dim)(Flatten()(x))
101 | x = Dense(4*4*1024//(coef**2))(x)
102 | x = Reshape((4, 4, 1024//(coef**2)))(x)
103 | out = upscale_block(x, 512//coef, use_norm, norm=norm)
104 | return Model(inputs=inp, outputs=out)
105 |
106 | @staticmethod
107 | def build_decoder(nc_in=512,
108 | input_size=8,
109 | output_size=64,
110 | use_self_attn=True,
111 | norm='none',
112 | model_capacity='standard'):
113 | coef = 2 if model_capacity == "lite" else 1
114 | upscale_block = upscale_nn if model_capacity == "lite" else upscale_ps
115 | activ_map_size = input_size
116 | use_norm = False if (norm == 'none') else True
117 |
118 | inp = Input(shape=(input_size, input_size, nc_in))
119 | x = inp
120 | x = upscale_block(x, 256//coef, use_norm, norm=norm)
121 | x = upscale_block(x, 128//coef, use_norm, norm=norm)
122 | x = self_attn_block(x, 128//coef) if use_self_attn else x
123 | x = upscale_block(x, 64//coef, use_norm, norm=norm)
124 | x = res_block(x, 64//coef, norm=norm)
125 | x = self_attn_block(x, 64//coef) if use_self_attn else conv_block(x, 64//coef, strides=1)
126 |
127 | outputs = []
128 | activ_map_size = activ_map_size * 8
129 | while (activ_map_size < output_size):
130 | outputs.append(Conv2D(3, kernel_size=5, padding='same', activation="tanh")(x))
131 | x = upscale_block(x, 64//coef, use_norm, norm=norm)
132 | x = conv_block(x, 64//coef, strides=1)
133 | activ_map_size *= 2
134 |
135 | alpha = Conv2D(1, kernel_size=5, padding='same', activation="sigmoid")(x)
136 | bgr = Conv2D(3, kernel_size=5, padding='same', activation="tanh")(x)
137 | out = concatenate([alpha, bgr])
138 | outputs.append(out)
139 | return Model(inp, outputs)
140 |
141 | @staticmethod
142 | def build_discriminator(nc_in,
143 | input_size=64,
144 | use_self_attn=True,
145 | norm='none'):
146 | activ_map_size = input_size
147 | use_norm = False if (norm == 'none') else True
148 |
149 | inp = Input(shape=(input_size, input_size, nc_in))
150 | x = conv_block_d(inp, 64, False)
151 | x = conv_block_d(x, 128, use_norm, norm=norm)
152 | x = conv_block_d(x, 256, use_norm, norm=norm)
153 | x = self_attn_block(x, 256) if use_self_attn else x
154 |
155 | activ_map_size = activ_map_size//8
156 | while (activ_map_size > 8):
157 | x = conv_block_d(x, 256, use_norm, norm=norm)
158 | x = self_attn_block(x, 256) if use_self_attn else x
159 | activ_map_size = activ_map_size//2
160 |
161 | out = Conv2D(1, kernel_size=4, use_bias=False, padding="same")(x) # use_bias should be True
162 | return Model(inputs=[inp], outputs=out)
163 |
164 | @staticmethod
165 | def define_variables(netG):
166 | distorted_input = netG.inputs[0]
167 | fake_output = netG.outputs[-1]
168 | alpha = Lambda(lambda x: x[:,:,:, :1])(fake_output)
169 | bgr = Lambda(lambda x: x[:,:,:, 1:])(fake_output)
170 |
171 | masked_fake_output = alpha * bgr + (1-alpha) * distorted_input
172 |
173 | fn_generate = K.function([distorted_input], [masked_fake_output])
174 | fn_mask = K.function([distorted_input], [concatenate([alpha, alpha, alpha])])
175 | fn_abgr = K.function([distorted_input], [concatenate([alpha, bgr])])
176 | fn_bgr = K.function([distorted_input], [bgr])
177 | return distorted_input, fake_output, alpha, fn_generate, fn_mask, fn_abgr, fn_bgr
178 |
179 | def build_train_functions(self, loss_weights=None, **loss_config):
180 | assert loss_weights is not None, "loss weights are not provided."
181 | # Adversarial loss
182 | loss_DA, loss_adv_GA = adversarial_loss(self.netDA, self.real_A, self.fake_A,
183 | self.distorted_A,
184 | loss_config["gan_training"],
185 | **loss_weights)
186 | loss_DB, loss_adv_GB = adversarial_loss(self.netDB, self.real_B, self.fake_B,
187 | self.distorted_B,
188 | loss_config["gan_training"],
189 | **loss_weights)
190 |
191 | # Reconstruction loss
192 | loss_recon_GA = reconstruction_loss(self.real_A, self.fake_A,
193 | self.mask_eyes_A, self.netGA.outputs,
194 | **loss_weights)
195 | loss_recon_GB = reconstruction_loss(self.real_B, self.fake_B,
196 | self.mask_eyes_B, self.netGB.outputs,
197 | **loss_weights)
198 |
199 | # Edge loss
200 | loss_edge_GA = edge_loss(self.real_A, self.fake_A, self.mask_eyes_A, **loss_weights)
201 | loss_edge_GB = edge_loss(self.real_B, self.fake_B, self.mask_eyes_B, **loss_weights)
202 |
203 | if loss_config['use_PL']:
204 | loss_pl_GA = perceptual_loss(self.real_A, self.fake_A, self.distorted_A,
205 | self.mask_eyes_A, self.vggface_feats, **loss_weights)
206 | loss_pl_GB = perceptual_loss(self.real_B, self.fake_B, self.distorted_B,
207 | self.mask_eyes_B, self.vggface_feats, **loss_weights)
208 | else:
209 | loss_pl_GA = loss_pl_GB = K.zeros(1)
210 |
211 | loss_GA = loss_adv_GA + loss_recon_GA + loss_edge_GA + loss_pl_GA
212 | loss_GB = loss_adv_GB + loss_recon_GB + loss_edge_GB + loss_pl_GB
213 |
214 | # The following losses are rather trivial, thus their wegihts are fixed.
215 | # Cycle consistency loss
216 | if loss_config['use_cyclic_loss']:
217 | loss_GA += 10 * cyclic_loss(self.netGA, self.netGB, self.real_A)
218 | loss_GB += 10 * cyclic_loss(self.netGB, self.netGA, self.real_B)
219 |
220 | # Alpha mask loss
221 | if not loss_config['use_mask_hinge_loss']:
222 | loss_GA += 1e-2 * K.mean(K.abs(self.mask_A))
223 | loss_GB += 1e-2 * K.mean(K.abs(self.mask_B))
224 | else:
225 | loss_GA += 0.1 * K.mean(K.maximum(0., loss_config['m_mask'] - self.mask_A))
226 | loss_GB += 0.1 * K.mean(K.maximum(0., loss_config['m_mask'] - self.mask_B))
227 |
228 | # Alpha mask total variation loss
229 | loss_GA += 0.1 * K.mean(first_order(self.mask_A, axis=1))
230 | loss_GA += 0.1 * K.mean(first_order(self.mask_A, axis=2))
231 | loss_GB += 0.1 * K.mean(first_order(self.mask_B, axis=1))
232 | loss_GB += 0.1 * K.mean(first_order(self.mask_B, axis=2))
233 |
234 | # L2 weight decay
235 | # https://github.com/keras-team/keras/issues/2662
236 | for loss_tensor in self.netGA.losses:
237 | loss_GA += loss_tensor
238 | for loss_tensor in self.netGB.losses:
239 | loss_GB += loss_tensor
240 | for loss_tensor in self.netDA.losses:
241 | loss_DA += loss_tensor
242 | for loss_tensor in self.netDB.losses:
243 | loss_DB += loss_tensor
244 |
245 | weightsDA = self.netDA.trainable_weights
246 | weightsGA = self.netGA.trainable_weights
247 | weightsDB = self.netDB.trainable_weights
248 | weightsGB = self.netGB.trainable_weights
249 |
250 | # Define training functions
251 | # Adam(...).get_updates(...)
252 | training_updates = Adam(lr=self.lrD*loss_config['lr_factor'], beta_1=0.5).get_updates(weightsDA,[],loss_DA)
253 | self.netDA_train = K.function([self.distorted_A, self.real_A],[loss_DA], training_updates)
254 | training_updates = Adam(lr=self.lrG*loss_config['lr_factor'], beta_1=0.5).get_updates(weightsGA,[], loss_GA)
255 | self.netGA_train = K.function([self.distorted_A, self.real_A, self.mask_eyes_A],
256 | [loss_GA, loss_adv_GA, loss_recon_GA, loss_edge_GA, loss_pl_GA],
257 | training_updates)
258 |
259 | training_updates = Adam(lr=self.lrD*loss_config['lr_factor'], beta_1=0.5).get_updates(weightsDB,[],loss_DB)
260 | self.netDB_train = K.function([self.distorted_B, self.real_B],[loss_DB], training_updates)
261 | training_updates = Adam(lr=self.lrG*loss_config['lr_factor'], beta_1=0.5).get_updates(weightsGB,[], loss_GB)
262 | self.netGB_train = K.function([self.distorted_B, self.real_B, self.mask_eyes_B],
263 | [loss_GB, loss_adv_GB, loss_recon_GB, loss_edge_GB, loss_pl_GB],
264 | training_updates)
265 |
266 | def build_pl_model(self, vggface_model, before_activ=False):
267 | # Define Perceptual Loss Model
268 | vggface_model.trainable = False
269 | if before_activ == False:
270 | out_size112 = vggface_model.layers[1].output
271 | out_size55 = vggface_model.layers[36].output
272 | out_size28 = vggface_model.layers[78].output
273 | out_size7 = vggface_model.layers[-2].output
274 | else:
275 | out_size112 = vggface_model.layers[15].output # misnamed: the output size is 55
276 | out_size55 = vggface_model.layers[35].output
277 | out_size28 = vggface_model.layers[77].output
278 | out_size7 = vggface_model.layers[-3].output
279 | self.vggface_feats = Model(vggface_model.input, [out_size112, out_size55, out_size28, out_size7])
280 | self.vggface_feats.trainable = False
281 |
282 | def load_weights(self, path="./models"):
283 | try:
284 | self.encoder.load_weights(f"{path}/encoder.h5")
285 | self.decoder_A.load_weights(f"{path}/decoder_A.h5")
286 | self.decoder_B.load_weights(f"{path}/decoder_B.h5")
287 | self.netDA.load_weights(f"{path}/netDA.h5")
288 | self.netDB.load_weights(f"{path}/netDB.h5")
289 | print ("Model weights files are successfully loaded.")
290 | except:
291 | print ("Error occurs during loading weights files.")
292 | pass
293 |
294 | def save_weights(self, path="./models"):
295 | try:
296 | self.encoder.save_weights(f"{path}/encoder.h5")
297 | self.decoder_A.save_weights(f"{path}/decoder_A.h5")
298 | self.decoder_B.save_weights(f"{path}/decoder_B.h5")
299 | self.netDA.save_weights(f"{path}/netDA.h5")
300 | self.netDB.save_weights(f"{path}/netDB.h5")
301 | print (f"Model weights files have been saved to {path}.")
302 | except:
303 | print ("Error occurs during saving weights.")
304 | pass
305 |
306 | def train_one_batch_G(self, data_A, data_B):
307 | if len(data_A) == 4 and len(data_B) == 4:
308 | _, warped_A, target_A, bm_eyes_A = data_A
309 | _, warped_B, target_B, bm_eyes_B = data_B
310 | elif len(data_A) == 3 and len(data_B) == 3:
311 | warped_A, target_A, bm_eyes_A = data_A
312 | warped_B, target_B, bm_eyes_B = data_B
313 | else:
314 | raise ValueError("Something's wrong with the input data generator.")
315 | errGA = self.netGA_train([warped_A, target_A, bm_eyes_A])
316 | errGB = self.netGB_train([warped_B, target_B, bm_eyes_B])
317 | return errGA, errGB
318 |
319 | def train_one_batch_D(self, data_A, data_B):
320 | if len(data_A) == 4 and len(data_B) == 4:
321 | _, warped_A, target_A, _ = data_A
322 | _, warped_B, target_B, _ = data_B
323 | elif len(data_A) == 3 and len(data_B) == 3:
324 | warped_A, target_A, _ = data_A
325 | warped_B, target_B, _ = data_B
326 | else:
327 | raise ValueError("Something's wrong with the input data generator.")
328 | errDA = self.netDA_train([warped_A, target_A])
329 | errDB = self.netDB_train([warped_B, target_B])
330 | return errDA, errDB
331 |
332 | def transform_A2B(self, img):
333 | return self.path_abgr_B([[img]])
334 |
335 | def transform_B2A(self, img):
336 | return self.path_abgr_A([[img]])
--------------------------------------------------------------------------------
/legacy/FaceSwap_GAN_v2_test_img.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "# 1. Import packages"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [
16 | {
17 | "name": "stderr",
18 | "output_type": "stream",
19 | "text": [
20 | "Using TensorFlow backend.\n"
21 | ]
22 | }
23 | ],
24 | "source": [
25 | "from keras.models import Sequential, Model\n",
26 | "from keras.layers import *\n",
27 | "from keras.layers.advanced_activations import LeakyReLU\n",
28 | "from keras.activations import relu\n",
29 | "from keras.initializers import RandomNormal\n",
30 | "from keras.applications import *\n",
31 | "import keras.backend as K\n",
32 | "from tensorflow.contrib.distributions import Beta\n",
33 | "import tensorflow as tf\n",
34 | "from keras.optimizers import Adam"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 2,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "from image_augmentation import random_transform\n",
44 | "from image_augmentation import random_warp\n",
45 | "from utils import get_image_paths, load_images, stack_images\n",
46 | "from pixel_shuffler import PixelShuffler"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 3,
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "import time\n",
56 | "import numpy as np\n",
57 | "from PIL import Image\n",
58 | "import cv2\n",
59 | "import glob\n",
60 | "from random import randint, shuffle\n",
61 | "from IPython.display import clear_output\n",
62 | "from IPython.display import display\n",
63 | "import matplotlib.pyplot as plt\n",
64 | "%matplotlib inline"
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "metadata": {},
70 | "source": [
71 | "\n",
72 | "# 4. Config\n",
73 | "\n",
74 | "mixup paper: https://arxiv.org/abs/1710.09412\n",
75 | "\n",
76 | "Default training data directories: `./faceA/` and `./faceB/`"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 4,
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "K.set_learning_phase(0)"
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 5,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "channel_axis=-1\n",
95 | "channel_first = False"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 6,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "IMAGE_SHAPE = (64, 64, 3)\n",
105 | "nc_in = 3 # number of input channels of generators\n",
106 | "nc_D_inp = 6 # number of input channels of discriminators\n",
107 | "\n",
108 | "use_self_attn = False\n",
109 | "w_l2 = 1e-4 # weight decay\n",
110 | "\n",
111 | "batchSize = 8\n",
112 | "\n",
113 | "# Path of training images\n",
114 | "img_dirA = './faceA/*.*'\n",
115 | "img_dirB = './faceB/*.*'"
116 | ]
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "metadata": {},
121 | "source": [
122 | "\n",
123 | "# 5. Define models"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": 7,
129 | "metadata": {},
130 | "outputs": [],
131 | "source": [
132 | "class Scale(Layer):\n",
133 | " '''\n",
134 | " Code borrows from https://github.com/flyyufelix/cnn_finetune\n",
135 | " '''\n",
136 | " def __init__(self, weights=None, axis=-1, gamma_init='zero', **kwargs):\n",
137 | " self.axis = axis\n",
138 | " self.gamma_init = initializers.get(gamma_init)\n",
139 | " self.initial_weights = weights\n",
140 | " super(Scale, self).__init__(**kwargs)\n",
141 | "\n",
142 | " def build(self, input_shape):\n",
143 | " self.input_spec = [InputSpec(shape=input_shape)]\n",
144 | "\n",
145 | " # Compatibility with TensorFlow >= 1.0.0\n",
146 | " self.gamma = K.variable(self.gamma_init((1,)), name='{}_gamma'.format(self.name))\n",
147 | " self.trainable_weights = [self.gamma]\n",
148 | "\n",
149 | " if self.initial_weights is not None:\n",
150 | " self.set_weights(self.initial_weights)\n",
151 | " del self.initial_weights\n",
152 | "\n",
153 | " def call(self, x, mask=None):\n",
154 | " return self.gamma * x\n",
155 | "\n",
156 | " def get_config(self):\n",
157 | " config = {\"axis\": self.axis}\n",
158 | " base_config = super(Scale, self).get_config()\n",
159 | " return dict(list(base_config.items()) + list(config.items()))\n",
160 | "\n",
161 | "\n",
162 | "def self_attn_block(inp, nc):\n",
163 | " '''\n",
164 | " Code borrows from https://github.com/taki0112/Self-Attention-GAN-Tensorflow\n",
165 | " '''\n",
166 | " assert nc//8 > 0, f\"Input channels must be >= 8, but got nc={nc}\"\n",
167 | " x = inp\n",
168 | " shape_x = x.get_shape().as_list()\n",
169 | " \n",
170 | " f = Conv2D(nc//8, 1, kernel_initializer=conv_init)(x)\n",
171 | " g = Conv2D(nc//8, 1, kernel_initializer=conv_init)(x)\n",
172 | " h = Conv2D(nc, 1, kernel_initializer=conv_init)(x)\n",
173 | " \n",
174 | " shape_f = f.get_shape().as_list()\n",
175 | " shape_g = g.get_shape().as_list()\n",
176 | " shape_h = h.get_shape().as_list()\n",
177 | " flat_f = Reshape((-1, shape_f[-1]))(f)\n",
178 | " flat_g = Reshape((-1, shape_g[-1]))(g)\n",
179 | " flat_h = Reshape((-1, shape_h[-1]))(h) \n",
180 | " \n",
181 | " s = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_b=True))([flat_g, flat_f])\n",
182 | "\n",
183 | " beta = Softmax(axis=-1)(s)\n",
184 | " o = Lambda(lambda x: tf.matmul(x[0], x[1]))([beta, flat_h])\n",
185 | " o = Reshape(shape_x[1:])(o)\n",
186 | " o = Scale()(o)\n",
187 | " \n",
188 | " out = add([o, inp])\n",
189 | " return out\n",
190 | "\n",
191 | "def conv_block(input_tensor, f):\n",
192 | " x = input_tensor\n",
193 | " x = Conv2D(f, kernel_size=3, strides=2, kernel_regularizer=regularizers.l2(w_l2), \n",
194 | " kernel_initializer=conv_init, use_bias=False, padding=\"same\")(x)\n",
195 | " x = Activation(\"relu\")(x)\n",
196 | " return x\n",
197 | "\n",
198 | "def conv_block_d(input_tensor, f, use_instance_norm=False):\n",
199 | " x = input_tensor\n",
200 | " x = Conv2D(f, kernel_size=4, strides=2, kernel_regularizer=regularizers.l2(w_l2), \n",
201 | " kernel_initializer=conv_init, use_bias=False, padding=\"same\")(x)\n",
202 | " x = LeakyReLU(alpha=0.2)(x)\n",
203 | " return x\n",
204 | "\n",
205 | "def res_block(input_tensor, f):\n",
206 | " x = input_tensor\n",
207 | " x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), \n",
208 | " kernel_initializer=conv_init, use_bias=False, padding=\"same\")(x)\n",
209 | " x = LeakyReLU(alpha=0.2)(x)\n",
210 | " x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), \n",
211 | " kernel_initializer=conv_init, use_bias=False, padding=\"same\")(x)\n",
212 | " x = add([x, input_tensor])\n",
213 | " x = LeakyReLU(alpha=0.2)(x)\n",
214 | " return x\n",
215 | "\n",
216 | "def upscale_ps(filters, use_norm=True):\n",
217 | " def block(x):\n",
218 | " x = Conv2D(filters*4, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), \n",
219 | " kernel_initializer=RandomNormal(0, 0.02), padding='same')(x)\n",
220 | " x = LeakyReLU(0.2)(x)\n",
221 | " x = PixelShuffler()(x)\n",
222 | " return x\n",
223 | " return block\n",
224 | "\n",
225 | "def Discriminator(nc_in, input_size=64):\n",
226 | " inp = Input(shape=(input_size, input_size, nc_in))\n",
227 | " #x = GaussianNoise(0.05)(inp)\n",
228 | " x = conv_block_d(inp, 64, False)\n",
229 | " x = conv_block_d(x, 128, False)\n",
230 | " x = self_attn_block(x, 128) if use_self_attn else x\n",
231 | " x = conv_block_d(x, 256, False)\n",
232 | " x = self_attn_block(x, 256) if use_self_attn else x\n",
233 | " out = Conv2D(1, kernel_size=4, kernel_initializer=conv_init, use_bias=False, padding=\"same\")(x) \n",
234 | " return Model(inputs=[inp], outputs=out)\n",
235 | "\n",
236 | "def Encoder(nc_in=3, input_size=64):\n",
237 | " inp = Input(shape=(input_size, input_size, nc_in))\n",
238 | " x = Conv2D(64, kernel_size=5, kernel_initializer=conv_init, use_bias=False, padding=\"same\")(inp)\n",
239 | " x = conv_block(x,128)\n",
240 | " x = conv_block(x,256)\n",
241 | " x = self_attn_block(x, 256) if use_self_attn else x\n",
242 | " x = conv_block(x,512) \n",
243 | " x = self_attn_block(x, 512) if use_self_attn else x\n",
244 | " x = conv_block(x,1024)\n",
245 | " x = Dense(1024)(Flatten()(x))\n",
246 | " x = Dense(4*4*1024)(x)\n",
247 | " x = Reshape((4, 4, 1024))(x)\n",
248 | " out = upscale_ps(512)(x)\n",
249 | " return Model(inputs=inp, outputs=out)\n",
250 | "\n",
251 | "def Decoder_ps(nc_in=512, input_size=8):\n",
252 | " input_ = Input(shape=(input_size, input_size, nc_in))\n",
253 | " x = input_\n",
254 | " x = upscale_ps(256)(x)\n",
255 | " x = upscale_ps(128)(x)\n",
256 | " x = self_attn_block(x, 128) if use_self_attn else x\n",
257 | " x = upscale_ps(64)(x)\n",
258 | " x = res_block(x, 64)\n",
259 | " x = self_attn_block(x, 64) if use_self_attn else x\n",
260 | " #x = Conv2D(4, kernel_size=5, padding='same')(x) \n",
261 | " alpha = Conv2D(1, kernel_size=5, padding='same', activation=\"sigmoid\")(x)\n",
262 | " rgb = Conv2D(3, kernel_size=5, padding='same', activation=\"tanh\")(x)\n",
263 | " out = concatenate([alpha, rgb])\n",
264 | " return Model(input_, out) "
265 | ]
266 | },
267 | {
268 | "cell_type": "code",
269 | "execution_count": 8,
270 | "metadata": {
271 | "scrolled": true
272 | },
273 | "outputs": [],
274 | "source": [
275 | "encoder = Encoder()\n",
276 | "decoder_A = Decoder_ps()\n",
277 | "decoder_B = Decoder_ps()\n",
278 | "\n",
279 | "x = Input(shape=IMAGE_SHAPE)\n",
280 | "\n",
281 | "netGA = Model(x, decoder_A(encoder(x)))\n",
282 | "netGB = Model(x, decoder_B(encoder(x)))"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": 9,
288 | "metadata": {},
289 | "outputs": [],
290 | "source": [
291 | "netDA = Discriminator(nc_D_inp)\n",
292 | "netDB = Discriminator(nc_D_inp)"
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "metadata": {},
298 | "source": [
299 | "\n",
300 | "# 6. Load Models"
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "execution_count": 10,
306 | "metadata": {},
307 | "outputs": [
308 | {
309 | "name": "stdout",
310 | "output_type": "stream",
311 | "text": [
312 | "model loaded.\n"
313 | ]
314 | }
315 | ],
316 | "source": [
317 | "try:\n",
318 | " encoder.load_weights(\"models/encoder.h5\")\n",
319 | " decoder_A.load_weights(\"models/decoder_A.h5\")\n",
320 | " decoder_B.load_weights(\"models/decoder_B.h5\")\n",
321 | " #netDA.load_weights(\"models/netDA.h5\") \n",
322 | " #netDB.load_weights(\"models/netDB.h5\") \n",
323 | " print (\"model loaded.\")\n",
324 | "except:\n",
325 | " print (\"Weights file not found.\")\n",
326 | " pass"
327 | ]
328 | },
329 | {
330 | "cell_type": "markdown",
331 | "metadata": {},
332 | "source": [
333 | "\n",
334 | "# 7. Define Inputs/Outputs Variables\n",
335 | "\n",
336 | " distorted_A: A (batch_size, 64, 64, 3) tensor, input of generator_A (netGA).\n",
337 | " distorted_B: A (batch_size, 64, 64, 3) tensor, input of generator_B (netGB).\n",
338 | " fake_A: (batch_size, 64, 64, 3) tensor, output of generator_A (netGA).\n",
339 | " fake_B: (batch_size, 64, 64, 3) tensor, output of generator_B (netGB).\n",
340 | " mask_A: (batch_size, 64, 64, 1) tensor, mask output of generator_A (netGA).\n",
341 | " mask_B: (batch_size, 64, 64, 1) tensor, mask output of generator_B (netGB).\n",
342 | " path_A: A function that takes distorted_A as input and outputs fake_A.\n",
343 | " path_B: A function that takes distorted_B as input and outputs fake_B.\n",
344 | " path_mask_A: A function that takes distorted_A as input and outputs mask_A.\n",
345 | " path_mask_B: A function that takes distorted_B as input and outputs mask_B.\n",
346 | " path_abgr_A: A function that takes distorted_A as input and outputs concat([mask_A, fake_A]).\n",
347 | " path_abgr_B: A function that takes distorted_B as input and outputs concat([mask_B, fake_B]).\n",
348 | " real_A: A (batch_size, 64, 64, 3) tensor, target images for generator_A given input distorted_A.\n",
349 | " real_B: A (batch_size, 64, 64, 3) tensor, target images for generator_B given input distorted_B."
350 | ]
351 | },
352 | {
353 | "cell_type": "code",
354 | "execution_count": 11,
355 | "metadata": {},
356 | "outputs": [],
357 | "source": [
358 | "def cycle_variables(netG):\n",
359 | " distorted_input = netG.inputs[0]\n",
360 | " fake_output = netG.outputs[0]\n",
361 | " alpha = Lambda(lambda x: x[:,:,:, :1])(fake_output)\n",
362 | " rgb = Lambda(lambda x: x[:,:,:, 1:])(fake_output)\n",
363 | " \n",
364 | " masked_fake_output = alpha * rgb + (1-alpha) * distorted_input \n",
365 | "\n",
366 | " fn_generate = K.function([distorted_input], [masked_fake_output])\n",
367 | " fn_mask = K.function([distorted_input], [concatenate([alpha, alpha, alpha])])\n",
368 | " fn_abgr = K.function([distorted_input], [concatenate([alpha, rgb])])\n",
369 | " return distorted_input, fake_output, alpha, fn_generate, fn_mask, fn_abgr"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": 12,
375 | "metadata": {},
376 | "outputs": [],
377 | "source": [
378 | "distorted_A, fake_A, mask_A, path_A, path_mask_A, path_abgr_A = cycle_variables(netGA)\n",
379 | "distorted_B, fake_B, mask_B, path_B, path_mask_B, path_abgr_B = cycle_variables(netGB)\n",
380 | "real_A = Input(shape=IMAGE_SHAPE)\n",
381 | "real_B = Input(shape=IMAGE_SHAPE)"
382 | ]
383 | },
384 | {
385 | "cell_type": "code",
386 | "execution_count": null,
387 | "metadata": {},
388 | "outputs": [],
389 | "source": []
390 | },
391 | {
392 | "cell_type": "markdown",
393 | "metadata": {},
394 | "source": [
395 | "\n",
396 | "# 11. Helper Function: face_swap()\n",
397 | "This function is provided for those who don't have enough VRAM to run dlib's CNN and GAN model at the same time.\n",
398 | "\n",
399 | " INPUTS:\n",
400 | " img: A RGB face image of any size.\n",
401 | " path_func: a function that is either path_abgr_A or path_abgr_B.\n",
402 | " OUPUTS:\n",
403 | " result_img: A RGB swapped face image after masking.\n",
404 | " result_mask: A single channel uint8 mask image."
405 | ]
406 | },
407 | {
408 | "cell_type": "code",
409 | "execution_count": 33,
410 | "metadata": {},
411 | "outputs": [],
412 | "source": [
413 | "def swap_face(img, path_func):\n",
414 | " input_size = img.shape\n",
415 | " img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # generator expects BGR input \n",
416 | " ae_input = cv2.resize(img, (64,64))/255. * 2 - 1 \n",
417 | " \n",
418 | " result = np.squeeze(np.array([path_func([[ae_input]])]))\n",
419 | " result_a = result[:,:,0] * 255\n",
420 | " result_a = cv2.resize(result_a, (input_size[1],input_size[0]))[...,np.newaxis]\n",
421 | " result_bgr = np.clip( (result[:,:,1:] + 1) * 255 / 2, 0, 255)\n",
422 | " result_bgr = cv2.resize(result_bgr, (input_size[1],input_size[0]))\n",
423 | " result = (result_a/255 * result_bgr + (1 - result_a/255) * img).astype('uint8')\n",
424 | " \n",
425 | " result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB) \n",
426 | " result = cv2.resize(result, (input_size[1],input_size[0]))\n",
427 | " result_a = np.expand_dims(cv2.resize(result_a, (input_size[1],input_size[0])), axis=2)\n",
428 | " return result, result_a"
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "execution_count": 34,
434 | "metadata": {},
435 | "outputs": [],
436 | "source": [
437 | "whom2whom = \"BtoA\" # default trainsforming faceB to faceA\n",
438 | "\n",
439 | "if whom2whom is \"AtoB\":\n",
440 | " path_func = path_abgr_B\n",
441 | "elif whom2whom is \"BtoA\":\n",
442 | " path_func = path_abgr_A\n",
443 | "else:\n",
444 | " print (\"whom2whom should be either AtoB or BtoA\")"
445 | ]
446 | },
447 | {
448 | "cell_type": "code",
449 | "execution_count": 35,
450 | "metadata": {},
451 | "outputs": [],
452 | "source": [
453 | "input_img = plt.imread(\"./IMAGE_FILENAME.jpg\")"
454 | ]
455 | },
456 | {
457 | "cell_type": "code",
458 | "execution_count": null,
459 | "metadata": {},
460 | "outputs": [],
461 | "source": [
462 | "plt.imshow(input_img)"
463 | ]
464 | },
465 | {
466 | "cell_type": "code",
467 | "execution_count": 37,
468 | "metadata": {},
469 | "outputs": [],
470 | "source": [
471 | "result_img, result_mask = swap_face(input_img, path_func)"
472 | ]
473 | },
474 | {
475 | "cell_type": "code",
476 | "execution_count": null,
477 | "metadata": {},
478 | "outputs": [],
479 | "source": [
480 | "plt.imshow(result_img)"
481 | ]
482 | },
483 | {
484 | "cell_type": "code",
485 | "execution_count": null,
486 | "metadata": {},
487 | "outputs": [],
488 | "source": [
489 | "plt.imshow(result_mask[:, :, 0]) # cmap='gray'"
490 | ]
491 | },
492 | {
493 | "cell_type": "code",
494 | "execution_count": null,
495 | "metadata": {},
496 | "outputs": [],
497 | "source": []
498 | },
499 | {
500 | "cell_type": "code",
501 | "execution_count": null,
502 | "metadata": {},
503 | "outputs": [],
504 | "source": []
505 | }
506 | ],
507 | "metadata": {
508 | "kernelspec": {
509 | "display_name": "Python 3",
510 | "language": "python",
511 | "name": "python3"
512 | },
513 | "language_info": {
514 | "codemirror_mode": {
515 | "name": "ipython",
516 | "version": 3
517 | },
518 | "file_extension": ".py",
519 | "mimetype": "text/x-python",
520 | "name": "python",
521 | "nbconvert_exporter": "python",
522 | "pygments_lexer": "ipython3",
523 | "version": "3.6.4"
524 | }
525 | },
526 | "nbformat": 4,
527 | "nbformat_minor": 2
528 | }
529 |
--------------------------------------------------------------------------------
/prep_binary_masks.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Install face-alignment package\n",
8 | "Environment: Google colab"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {
15 | "colab": {
16 | "autoexec": {
17 | "startup": false,
18 | "wait_interval": 0
19 | },
20 | "base_uri": "https://localhost:8080/",
21 | "height": 139
22 | },
23 | "colab_type": "code",
24 | "executionInfo": {
25 | "elapsed": 46673,
26 | "status": "ok",
27 | "timestamp": 1529151506883,
28 | "user": {
29 | "displayName": "Lu SA",
30 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
31 | "userId": "109275333798683015269"
32 | },
33 | "user_tz": -480
34 | },
35 | "id": "NK7_yFjNV-wY",
36 | "outputId": "091fda26-6e40-4c9d-fd39-2d48cdd7d14f"
37 | },
38 | "outputs": [
39 | {
40 | "name": "stdout",
41 | "output_type": "stream",
42 | "text": [
43 | "Collecting torch\n",
44 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/69/43/380514bd9663f1bf708abeb359b8b48d3fabb1c8e95bb3427a980a064c57/torch-0.4.0-cp36-cp36m-manylinux1_x86_64.whl (484.0MB)\n",
45 | "\u001b[K 100% |████████████████████████████████| 484.0MB 24kB/s \n",
46 | "tcmalloc: large alloc 1073750016 bytes == 0x5c3f8000 @ 0x7fe09f1ac1c4 0x46d6a4 0x5fcbcc 0x4c494d 0x54f3c4 0x553aaf 0x54e4c8 0x54f4f6 0x553aaf 0x54efc1 0x54f24d 0x553aaf 0x54efc1 0x54f24d 0x553aaf 0x54efc1 0x54f24d 0x551ee0 0x54e4c8 0x54f4f6 0x553aaf 0x54efc1 0x54f24d 0x551ee0 0x54efc1 0x54f24d 0x551ee0 0x54e4c8 0x54f4f6 0x553aaf 0x54e4c8\n",
47 | "\u001b[?25hInstalling collected packages: torch\n",
48 | "Successfully installed torch-0.4.0\n"
49 | ]
50 | }
51 | ],
52 | "source": [
53 | "# Update pyorch to 0.4\n",
54 | "#!pip install --upgrade torch"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 2,
60 | "metadata": {
61 | "colab": {
62 | "autoexec": {
63 | "startup": false,
64 | "wait_interval": 0
65 | },
66 | "base_uri": "https://localhost:8080/",
67 | "height": 204
68 | },
69 | "colab_type": "code",
70 | "executionInfo": {
71 | "elapsed": 6783,
72 | "status": "ok",
73 | "timestamp": 1529151513699,
74 | "user": {
75 | "displayName": "Lu SA",
76 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
77 | "userId": "109275333798683015269"
78 | },
79 | "user_tz": -480
80 | },
81 | "id": "n79ip5KbWH0s",
82 | "outputId": "34fd0b04-52d3-45a9-a4e8-feb48013d5b2"
83 | },
84 | "outputs": [
85 | {
86 | "name": "stdout",
87 | "output_type": "stream",
88 | "text": [
89 | "Initialized empty Git repository in /content/.git/\n",
90 | "remote: Counting objects: 277, done.\u001b[K\n",
91 | "remote: Compressing objects: 100% (14/14), done.\u001b[K\n",
92 | "remote: Total 277 (delta 1), reused 3 (delta 0), pack-reused 263\u001b[K\n",
93 | "Receiving objects: 100% (277/277), 3.46 MiB | 13.68 MiB/s, done.\n",
94 | "Resolving deltas: 100% (156/156), done.\n",
95 | "From https://github.com/1adrianb/face-alignment\n",
96 | " * [new branch] master -> origin/master\n",
97 | " * [new tag] v1.0.0 -> v1.0.0\n",
98 | "Branch master set up to track remote branch master from origin.\n",
99 | "Already on 'master'\n"
100 | ]
101 | }
102 | ],
103 | "source": [
104 | "#!git init .\n",
105 | "#!git remote add origin https://github.com/1adrianb/face-alignment.git\n",
106 | "#!git fetch origin\n",
107 | "#!git checkout master"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 0,
113 | "metadata": {
114 | "colab": {
115 | "autoexec": {
116 | "startup": false,
117 | "wait_interval": 0
118 | }
119 | },
120 | "colab_type": "code",
121 | "id": "48vu2IoAXNcU"
122 | },
123 | "outputs": [],
124 | "source": [
125 | "#%%capture\n",
126 | "#!apt update\n",
127 | "#!apt install -y cmake"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {},
133 | "source": [
134 | "**Install dlib (CUDA enabled or CPU version)**\n",
135 | "\n",
136 | "CUDA enabled dlib"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": null,
142 | "metadata": {},
143 | "outputs": [],
144 | "source": [
145 | "#%%capture\n",
146 | "#!git clone https://github.com/davisking/dlib.git dlib/\n",
147 | "#cd dlib/\n",
148 | "#!python setup.py install –yes USE_AVX_INSTRUCTIONS –yes DLIB_USE_CUDA"
149 | ]
150 | },
151 | {
152 | "cell_type": "markdown",
153 | "metadata": {},
154 | "source": [
155 | "dlib w/o CUDA"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {},
162 | "outputs": [],
163 | "source": [
164 | "# CPU dlib\n",
165 | "#!pip install dlib"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": 13,
171 | "metadata": {
172 | "colab": {
173 | "autoexec": {
174 | "startup": false,
175 | "wait_interval": 0
176 | },
177 | "base_uri": "https://localhost:8080/",
178 | "height": 71
179 | },
180 | "colab_type": "code",
181 | "executionInfo": {
182 | "elapsed": 1784,
183 | "status": "ok",
184 | "timestamp": 1529131895182,
185 | "user": {
186 | "displayName": "Lu SA",
187 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
188 | "userId": "109275333798683015269"
189 | },
190 | "user_tz": -480
191 | },
192 | "id": "c8b776O8WH5_",
193 | "outputId": "1adbfb1a-1399-4396-c05c-8685573ee25a"
194 | },
195 | "outputs": [
196 | {
197 | "name": "stdout",
198 | "output_type": "stream",
199 | "text": [
200 | "datalab examples\t README.md\t setup.cfg tox.ini\r\n",
201 | "Dockerfile face_alignment README.rst\t setup.py\r\n",
202 | "docs\t LICENSE\t requirements.txt test\r\n"
203 | ]
204 | }
205 | ],
206 | "source": [
207 | "#!ls"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": 3,
213 | "metadata": {
214 | "colab": {
215 | "autoexec": {
216 | "startup": false,
217 | "wait_interval": 0
218 | },
219 | "base_uri": "https://localhost:8080/",
220 | "height": 35
221 | },
222 | "colab_type": "code",
223 | "executionInfo": {
224 | "elapsed": 7848,
225 | "status": "ok",
226 | "timestamp": 1529131778373,
227 | "user": {
228 | "displayName": "Lu SA",
229 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
230 | "userId": "109275333798683015269"
231 | },
232 | "user_tz": -480
233 | },
234 | "id": "HLqaUVyQWH3V",
235 | "outputId": "3c814038-afca-4934-bd40-c2e7b6a344e5"
236 | },
237 | "outputs": [
238 | {
239 | "data": {
240 | "text/plain": [
241 | "'0.4.0'"
242 | ]
243 | },
244 | "execution_count": 3,
245 | "metadata": {
246 | "tags": []
247 | },
248 | "output_type": "execute_result"
249 | }
250 | ],
251 | "source": [
252 | "#import torch\n",
253 | "#torch.__version__"
254 | ]
255 | },
256 | {
257 | "cell_type": "code",
258 | "execution_count": 0,
259 | "metadata": {
260 | "colab": {
261 | "autoexec": {
262 | "startup": false,
263 | "wait_interval": 0
264 | }
265 | },
266 | "colab_type": "code",
267 | "id": "QNdDVGxkblKX"
268 | },
269 | "outputs": [],
270 | "source": []
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {
275 | "colab_type": "text",
276 | "id": "RT_PlazfbldI"
277 | },
278 | "source": [
279 | "# Generate binary masks\n",
280 | "\n",
281 | "```bash\n",
282 | " Inputs:\n",
283 | " Images from ./faceA and ./faceB\n",
284 | " Outputs:\n",
285 | " Eyes binary masks, which are saved to ./binary_masks/faceA_eyes and ./binary_masks/faceB_eyes respectively\n",
286 | "```"
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": 0,
292 | "metadata": {
293 | "colab": {
294 | "autoexec": {
295 | "startup": false,
296 | "wait_interval": 0
297 | }
298 | },
299 | "colab_type": "code",
300 | "id": "zD5XgAEJbkjS"
301 | },
302 | "outputs": [],
303 | "source": [
304 | "import face_alignment"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": 0,
310 | "metadata": {
311 | "colab": {
312 | "autoexec": {
313 | "startup": false,
314 | "wait_interval": 0
315 | }
316 | },
317 | "colab_type": "code",
318 | "id": "hmSj4zaXXz5W"
319 | },
320 | "outputs": [],
321 | "source": [
322 | "import cv2\n",
323 | "import numpy as np\n",
324 | "from glob import glob\n",
325 | "from pathlib import PurePath, Path\n",
326 | "from matplotlib import pyplot as plt\n",
327 | "%matplotlib inline"
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "execution_count": 0,
333 | "metadata": {
334 | "colab": {
335 | "autoexec": {
336 | "startup": false,
337 | "wait_interval": 0
338 | }
339 | },
340 | "colab_type": "code",
341 | "id": "34x3mp7wb2bR"
342 | },
343 | "outputs": [],
344 | "source": [
345 | "dir_faceA = \"./faceA\"\n",
346 | "dir_faceB = \"./faceB\"\n",
347 | "dir_bm_faceA_eyes = \"./binary_masks/faceA_eyes\"\n",
348 | "dir_bm_faceB_eyes = \"./binary_masks/faceB_eyes\""
349 | ]
350 | },
351 | {
352 | "cell_type": "code",
353 | "execution_count": 0,
354 | "metadata": {
355 | "colab": {
356 | "autoexec": {
357 | "startup": false,
358 | "wait_interval": 0
359 | }
360 | },
361 | "colab_type": "code",
362 | "id": "17GYV20FXz78"
363 | },
364 | "outputs": [],
365 | "source": [
366 | "fns_faceA = glob(f\"{dir_faceA}/*.*\")\n",
367 | "fns_faceB = glob(f\"{dir_faceB}/*.*\")"
368 | ]
369 | },
370 | {
371 | "cell_type": "code",
372 | "execution_count": 11,
373 | "metadata": {
374 | "colab": {
375 | "autoexec": {
376 | "startup": false,
377 | "wait_interval": 0
378 | },
379 | "base_uri": "https://localhost:8080/",
380 | "height": 34
381 | },
382 | "colab_type": "code",
383 | "executionInfo": {
384 | "elapsed": 11767,
385 | "status": "ok",
386 | "timestamp": 1529152245581,
387 | "user": {
388 | "displayName": "Lu SA",
389 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
390 | "userId": "109275333798683015269"
391 | },
392 | "user_tz": -480
393 | },
394 | "id": "fwD2DVvQcEMw",
395 | "outputId": "698867a7-2b85-4703-cdd2-fa7531bc6b52"
396 | },
397 | "outputs": [
398 | {
399 | "name": "stdout",
400 | "output_type": "stream",
401 | "text": [
402 | "Downloading the Face Alignment Network(FAN). Please wait...\n"
403 | ]
404 | }
405 | ],
406 | "source": [
407 | "fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, enable_cuda=True, flip_input=False)"
408 | ]
409 | },
410 | {
411 | "cell_type": "code",
412 | "execution_count": 0,
413 | "metadata": {
414 | "colab": {
415 | "autoexec": {
416 | "startup": false,
417 | "wait_interval": 0
418 | }
419 | },
420 | "colab_type": "code",
421 | "id": "cYCCJK1ehpNi"
422 | },
423 | "outputs": [],
424 | "source": [
425 | "# !mkdir -p binary_masks/faceA_eyes\n",
426 | "Path(f\"binary_masks/faceA_eyes\").mkdir(parents=True, exist_ok=True)\n",
427 | "# !mkdir -p binary_masks/faceB_eyes\n",
428 | "Path(f\"binary_masks/faceB_eyes\").mkdir(parents=True, exist_ok=True)"
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "execution_count": null,
434 | "metadata": {
435 | "colab": {
436 | "autoexec": {
437 | "startup": false,
438 | "wait_interval": 0
439 | },
440 | "base_uri": "https://localhost:8080/",
441 | "height": 2397
442 | },
443 | "colab_type": "code",
444 | "executionInfo": {
445 | "elapsed": 121564,
446 | "status": "ok",
447 | "timestamp": 1529152370522,
448 | "user": {
449 | "displayName": "Lu SA",
450 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
451 | "userId": "109275333798683015269"
452 | },
453 | "user_tz": -480
454 | },
455 | "id": "iMZHXBmzcEUY",
456 | "outputId": "2dc0737f-6990-40db-adb9-c5e2baf49ae9"
457 | },
458 | "outputs": [],
459 | "source": [
460 | "fns_face_not_detected = []\n",
461 | "\n",
462 | "for idx, fns in enumerate([fns_faceA, fns_faceB]):\n",
463 | " if idx == 0:\n",
464 | " save_path = dir_bm_faceA_eyes\n",
465 | " elif idx == 1:\n",
466 | " save_path = dir_bm_faceB_eyes \n",
467 | " \n",
468 | " # create binary mask for each training image\n",
469 | " for fn in fns:\n",
470 | " raw_fn = PurePath(fn).parts[-1]\n",
471 | "\n",
472 | " x = plt.imread(fn)\n",
473 | " x = cv2.resize(x, (256,256))\n",
474 | " preds = fa.get_landmarks(x)\n",
475 | " \n",
476 | " if preds is not None:\n",
477 | " preds = preds[0]\n",
478 | " mask = np.zeros_like(x)\n",
479 | " \n",
480 | " # Draw right eye binary mask\n",
481 | " pnts_right = [(preds[i,0],preds[i,1]) for i in range(36,42)]\n",
482 | " hull = cv2.convexHull(np.array(pnts_right)).astype(np.int32)\n",
483 | " mask = cv2.drawContours(mask,[hull],0,(255,255,255),-1)\n",
484 | "\n",
485 | " # Draw left eye binary mask\n",
486 | " pnts_left = [(preds[i,0],preds[i,1]) for i in range(42,48)]\n",
487 | " hull = cv2.convexHull(np.array(pnts_left)).astype(np.int32)\n",
488 | " mask = cv2.drawContours(mask,[hull],0,(255,255,255),-1)\n",
489 | "\n",
490 | " # Draw mouth binary mask\n",
491 | " #pnts_mouth = [(preds[i,0],preds[i,1]) for i in range(48,60)]\n",
492 | " #hull = cv2.convexHull(np.array(pnts_mouth)).astype(np.int32)\n",
493 | " #mask = cv2.drawContours(mask,[hull],0,(255,255,255),-1)\n",
494 | " \n",
495 | " mask = cv2.dilate(mask, np.ones((13,13), np.uint8), iterations=1)\n",
496 | " mask = cv2.GaussianBlur(mask, (7,7), 0)\n",
497 | " \n",
498 | " else:\n",
499 | " mask = np.zeros_like(x)\n",
500 | " print(f\"No faces were detected in image '{fn}''\")\n",
501 | " fns_face_not_detected.append(fn)\n",
502 | " \n",
503 | " plt.imsave(fname=f\"{save_path}/{raw_fn}\", arr=mask, format=\"jpg\")"
504 | ]
505 | },
506 | {
507 | "cell_type": "code",
508 | "execution_count": 14,
509 | "metadata": {
510 | "colab": {
511 | "autoexec": {
512 | "startup": false,
513 | "wait_interval": 0
514 | },
515 | "base_uri": "https://localhost:8080/",
516 | "height": 51
517 | },
518 | "colab_type": "code",
519 | "executionInfo": {
520 | "elapsed": 559,
521 | "status": "ok",
522 | "timestamp": 1529152371122,
523 | "user": {
524 | "displayName": "Lu SA",
525 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
526 | "userId": "109275333798683015269"
527 | },
528 | "user_tz": -480
529 | },
530 | "id": "uXgooPybq7PG",
531 | "outputId": "362246e2-9b58-487c-cb4e-99ce69136eda"
532 | },
533 | "outputs": [
534 | {
535 | "name": "stdout",
536 | "output_type": "stream",
537 | "text": [
538 | "Nuber of processed images: 694\n",
539 | "Number of image(s) with no face detected: 70\n"
540 | ]
541 | }
542 | ],
543 | "source": [
544 | "num_faceA = len(glob(dir_faceA+\"/*.*\"))\n",
545 | "num_faceB = len(glob(dir_faceB+\"/*.*\"))\n",
546 | "\n",
547 | "print(\"Nuber of processed images: \"+ str(num_faceA + num_faceB))\n",
548 | "print(\"Number of image(s) with no face detected: \" + str(len(fns_face_not_detected)))"
549 | ]
550 | },
551 | {
552 | "cell_type": "markdown",
553 | "metadata": {
554 | "colab_type": "text",
555 | "id": "uRKzF42Wy0Ba"
556 | },
557 | "source": [
558 | "# Randomly diaplay a face image and its result binary mask"
559 | ]
560 | },
561 | {
562 | "cell_type": "code",
563 | "execution_count": null,
564 | "metadata": {
565 | "colab": {
566 | "autoexec": {
567 | "startup": false,
568 | "wait_interval": 0
569 | },
570 | "base_uri": "https://localhost:8080/",
571 | "height": 316
572 | },
573 | "colab_type": "code",
574 | "executionInfo": {
575 | "elapsed": 1080,
576 | "status": "ok",
577 | "timestamp": 1529139911130,
578 | "user": {
579 | "displayName": "Lu SA",
580 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
581 | "userId": "109275333798683015269"
582 | },
583 | "user_tz": -480
584 | },
585 | "id": "Ja4aOHNay0HH",
586 | "outputId": "c6e1b3d4-d427-4267-a33b-efc8882373e3"
587 | },
588 | "outputs": [],
589 | "source": [
590 | "face = np.random.choice([\"A\",\"B\"])\n",
591 | "\n",
592 | "dir_face = dir_faceA if face == \"A\" else dir_faceB\n",
593 | "fns_face = fns_faceA if face == \"A\" else fns_faceB\n",
594 | "num_face = len(glob(dir_face+\"/*.*\"))\n",
595 | "rand_idx = np.random.randint(num_face)\n",
596 | "rand_fn = fns_face[rand_idx]\n",
597 | "raw_fn = PurePath(rand_fn).parts[-1]\n",
598 | "mask_fn = f\"{dir_bm_faceA_eyes}/{raw_fn}\" if face == \"A\" else f\"{dir_bm_faceB_eyes}/{raw_fn}\"\n",
599 | "im = plt.imread(rand_fn)\n",
600 | "mask = plt.imread(mask_fn)\n",
601 | "\n",
602 | "if rand_fn in fns_face_not_detected:\n",
603 | " print(\"========== No faces were detected in this image! ==========\")\n",
604 | "\n",
605 | "fig = plt.figure(figsize=(15,6))\n",
606 | "plt.subplot(1,3,1)\n",
607 | "plt.grid('off')\n",
608 | "plt.imshow(im)\n",
609 | "plt.subplot(1,3,2)\n",
610 | "plt.grid('off')\n",
611 | "plt.imshow(mask)\n",
612 | "plt.subplot(1,3,3)\n",
613 | "plt.grid('off')\n",
614 | "plt.imshow((mask/255*im).astype(np.uint8))\n",
615 | "\n",
616 | "#fa.get_landmarks(x)"
617 | ]
618 | },
619 | {
620 | "cell_type": "markdown",
621 | "metadata": {
622 | "colab_type": "text",
623 | "id": "X6c22xxGsVQR"
624 | },
625 | "source": [
626 | "# Randomly diaplay an image that has no face detected"
627 | ]
628 | },
629 | {
630 | "cell_type": "code",
631 | "execution_count": null,
632 | "metadata": {
633 | "colab": {
634 | "autoexec": {
635 | "startup": false,
636 | "wait_interval": 0
637 | },
638 | "base_uri": "https://localhost:8080/",
639 | "height": 287
640 | },
641 | "colab_type": "code",
642 | "executionInfo": {
643 | "elapsed": 697,
644 | "status": "ok",
645 | "timestamp": 1529139782680,
646 | "user": {
647 | "displayName": "Lu SA",
648 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128",
649 | "userId": "109275333798683015269"
650 | },
651 | "user_tz": -480
652 | },
653 | "id": "aAgOLJ_WowlK",
654 | "outputId": "00138071-ab9e-4c91-aeb6-67d253c81cf5"
655 | },
656 | "outputs": [],
657 | "source": [
658 | "num_no_face_img = len(fns_face_not_detected)\n",
659 | "rand_idx = np.random.randint(num_no_face_img)\n",
660 | "x = plt.imread(fns_face_not_detected[rand_idx])\n",
661 | "#x = cv2.resize(x, (256,256))\n",
662 | "\n",
663 | "plt.grid('off')\n",
664 | "plt.imshow(x)\n",
665 | "\n",
666 | "#fa.get_landmarks(x)"
667 | ]
668 | },
669 | {
670 | "cell_type": "code",
671 | "execution_count": null,
672 | "metadata": {},
673 | "outputs": [],
674 | "source": []
675 | }
676 | ],
677 | "metadata": {
678 | "accelerator": "GPU",
679 | "colab": {
680 | "collapsed_sections": [],
681 | "default_view": {},
682 | "name": "make_binary_masks.ipynb",
683 | "provenance": [],
684 | "version": "0.3.2",
685 | "views": {}
686 | },
687 | "kernelspec": {
688 | "display_name": "Python 3",
689 | "language": "python",
690 | "name": "python3"
691 | },
692 | "language_info": {
693 | "codemirror_mode": {
694 | "name": "ipython",
695 | "version": 3
696 | },
697 | "file_extension": ".py",
698 | "mimetype": "text/x-python",
699 | "name": "python",
700 | "nbconvert_exporter": "python",
701 | "pygments_lexer": "ipython3",
702 | "version": "3.6.4"
703 | }
704 | },
705 | "nbformat": 4,
706 | "nbformat_minor": 1
707 | }
708 |
--------------------------------------------------------------------------------