├── mtcnn_weights ├── det1.npy ├── det2.npy ├── det3.npy └── README.md ├── converter ├── kalman_filter.py ├── vc_utils.py ├── landmarks_alignment.py ├── color_correction.py ├── face_transformer.py └── video_converter.py ├── networks ├── custom_inits │ └── icnr_initializer.py ├── custom_layers │ └── scale_layer.py ├── pixel_shuffler.py ├── GroupNormalization.py ├── instance_normalization.py ├── losses.py ├── nn_blocks.py └── faceswap_gan_model.py ├── legacy ├── training_data.py ├── README.md ├── model_GAN_v2.py ├── pixel_shuffler.py ├── utils.py ├── image_augmentation.py ├── instance_normalization.py ├── FCN8s_keras.py └── FaceSwap_GAN_v2_test_img.ipynb ├── preprocess.py ├── data_loader ├── data_loader.py └── data_augmentation.py ├── umeyama.py ├── image_augmentation.py ├── utils.py ├── detector └── face_detector.py ├── FaceSwap_GAN_v2.2_video_conversion.ipynb ├── MTCNN_video_face_detection_alignment.ipynb ├── README.md ├── notes └── README.md └── prep_binary_masks.ipynb /mtcnn_weights/det1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/product/faceswap-GAN/master/mtcnn_weights/det1.npy -------------------------------------------------------------------------------- /mtcnn_weights/det2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/product/faceswap-GAN/master/mtcnn_weights/det2.npy -------------------------------------------------------------------------------- /mtcnn_weights/det3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/product/faceswap-GAN/master/mtcnn_weights/det3.npy -------------------------------------------------------------------------------- /mtcnn_weights/README.md: -------------------------------------------------------------------------------- 1 | Weights files are form https://github.com/davidsandberg/facenet/tree/master/src/align 2 | -------------------------------------------------------------------------------- /converter/kalman_filter.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | class KalmanFilter(): 5 | def __init__(self, noise_coef): 6 | self.noise_coef = noise_coef 7 | self.kf = self.init_kalman_filter(noise_coef) 8 | 9 | @staticmethod 10 | def init_kalman_filter(noise_coef): 11 | kf = cv2.KalmanFilter(4,2) 12 | kf.measurementMatrix = np.array([[1,0,0,0],[0,1,0,0]], np.float32) 13 | kf.transitionMatrix = np.array([[1,0,1,0],[0,1,0,1],[0,0,1,0],[0,0,0,1]], np.float32) 14 | kf.processNoiseCov = noise_coef * np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]], np.float32) 15 | return kf 16 | 17 | def correct(self, xy): 18 | return self.kf.correct(xy) 19 | 20 | def predict(self): 21 | return self.kf.predict() -------------------------------------------------------------------------------- /networks/custom_inits/icnr_initializer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def icnr_keras(shape, dtype=None): 4 | """ 5 | From https://github.com/kostyaev/ICNR 6 | Custom initializer for subpix upscaling 7 | Note: upscale factor is fixzed to 2, and the base initializer is fixed to random normal. 8 | """ 9 | shape = list(shape) 10 | 11 | scale = 2 12 | initializer = tf.keras.initializers.RandomNormal(0, 0.02) 13 | 14 | new_shape = shape[:3] + [int(shape[3] / (scale ** 2))] 15 | x = initializer(new_shape, dtype) 16 | x = tf.transpose(x, perm=[2, 0, 1, 3]) 17 | x = tf.image.resize_nearest_neighbor(x, size=(shape[0] * scale, shape[1] * scale)) 18 | x = tf.space_to_depth(x, block_size=scale) 19 | x = tf.transpose(x, perm=[1, 2, 0, 3]) 20 | return x 21 | -------------------------------------------------------------------------------- /legacy/training_data.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from image_augmentation import random_transform 3 | from image_augmentation import random_warp 4 | 5 | random_transform_args = { 6 | 'rotation_range': 15, 7 | 'zoom_range': 0.1, 8 | 'shift_range': 0.05, 9 | 'random_flip': 0.5, 10 | } 11 | 12 | def get_training_data( images, batch_size ): 13 | indices = numpy.random.randint( len(images), size=batch_size ) 14 | for i,index in enumerate(indices): 15 | image = images[index] 16 | image = random_transform( image, **random_transform_args ) 17 | warped_img, target_img = random_warp( image ) 18 | 19 | if i == 0: 20 | warped_images = numpy.empty( (batch_size,) + warped_img.shape, warped_img.dtype ) 21 | target_images = numpy.empty( (batch_size,) + target_img.shape, warped_img.dtype ) 22 | 23 | warped_images[i] = warped_img 24 | target_images[i] = target_img 25 | 26 | return warped_images, target_images 27 | -------------------------------------------------------------------------------- /networks/custom_layers/scale_layer.py: -------------------------------------------------------------------------------- 1 | from keras.layers.core import Layer 2 | from keras.engine import InputSpec 3 | from keras import backend as K 4 | from keras import initializers 5 | 6 | class Scale(Layer): 7 | ''' 8 | Code borrows from https://github.com/flyyufelix/cnn_finetune 9 | ''' 10 | def __init__(self, weights=None, axis=-1, gamma_init='zero', **kwargs): 11 | self.axis = axis 12 | self.gamma_init = initializers.get(gamma_init) 13 | self.initial_weights = weights 14 | super(Scale, self).__init__(**kwargs) 15 | 16 | def build(self, input_shape): 17 | self.input_spec = [InputSpec(shape=input_shape)] 18 | 19 | # Compatibility with TensorFlow >= 1.0.0 20 | self.gamma = K.variable(self.gamma_init((1,)), name='{}_gamma'.format(self.name)) 21 | self.trainable_weights = [self.gamma] 22 | 23 | if self.initial_weights is not None: 24 | self.set_weights(self.initial_weights) 25 | del self.initial_weights 26 | 27 | def call(self, x, mask=None): 28 | return self.gamma * x 29 | 30 | def get_config(self): 31 | config = {"axis": self.axis} 32 | base_config = super(Scale, self).get_config() 33 | return dict(list(base_config.items()) + list(config.items())) 34 | -------------------------------------------------------------------------------- /converter/vc_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | # ================================================== 5 | # Output image initialization functions 6 | # ================================================== 7 | def get_init_mask_map(image): 8 | return np.zeros_like(image) 9 | 10 | def get_init_comb_img(input_img): 11 | comb_img = np.zeros([input_img.shape[0], input_img.shape[1]*2,input_img.shape[2]]) 12 | comb_img[:, :input_img.shape[1], :] = input_img 13 | comb_img[:, input_img.shape[1]:, :] = input_img 14 | return comb_img 15 | 16 | def get_init_triple_img(input_img, no_face=False): 17 | if no_face: 18 | triple_img = np.zeros([input_img.shape[0], input_img.shape[1]*3,input_img.shape[2]]) 19 | triple_img[:, :input_img.shape[1], :] = input_img 20 | triple_img[:, input_img.shape[1]:input_img.shape[1]*2, :] = input_img 21 | triple_img[:, input_img.shape[1]*2:, :] = (input_img * .15).astype('uint8') 22 | return triple_img 23 | else: 24 | triple_img = np.zeros([input_img.shape[0], input_img.shape[1]*3,input_img.shape[2]]) 25 | return triple_img 26 | 27 | def get_mask(roi_image, h, w): 28 | mask = np.zeros_like(roi_image) 29 | mask[h//15:-h//15,w//15:-w//15,:] = 255 30 | mask = cv2.GaussianBlur(mask,(15,15),10) 31 | return mask -------------------------------------------------------------------------------- /converter/landmarks_alignment.py: -------------------------------------------------------------------------------- 1 | from umeyama import umeyama 2 | import numpy as np 3 | import cv2 4 | 5 | def get_src_landmarks(x0, x1, y0, y1, pnts): 6 | """ 7 | x0, x1, y0, y1: (smoothed) bbox coord. 8 | pnts: landmarks predicted by MTCNN 9 | """ 10 | src_landmarks = [(int(pnts[i+5][0]-x0), int(pnts[i][0]-y0)) for i in range(5)] 11 | return src_landmarks 12 | 13 | def get_tar_landmarks(img): 14 | """ 15 | img: detected face image 16 | """ 17 | avg_landmarks = [ 18 | (0.31339227236234224, 0.3259269274198092), 19 | (0.31075140146108776, 0.7228453709528997), 20 | (0.5523683107816256, 0.5187296867370605), 21 | (0.7752419985257663, 0.37262483743520886), 22 | (0.7759613623985877, 0.6772957581740159) 23 | ] 24 | img_sz = img.shape 25 | tar_landmarks = [(int(xy[0]*img_sz[0]), int(xy[1]*img_sz[1])) for xy in avg_landmarks] 26 | return tar_landmarks 27 | 28 | def landmarks_match_mtcnn(src_im, src_landmarks, tar_landmarks): 29 | """ 30 | umeyama(src, dst, estimate_scale), 31 | src/dst landmarks coord. should be (y, x) 32 | """ 33 | src_size = src_im.shape 34 | src_tmp = [(int(xy[1]), int(xy[0])) for xy in src_landmarks] 35 | dst_tmp = [(int(xy[1]), int(xy[0])) for xy in tar_landmarks] 36 | M = umeyama(np.array(src_tmp), np.array(dst_tmp), True)[0:2] 37 | result = cv2.warpAffine(src_im, M, (src_size[1], src_size[0]), borderMode=cv2.BORDER_REPLICATE) 38 | return result -------------------------------------------------------------------------------- /legacy/README.md: -------------------------------------------------------------------------------- 1 | ## Notebooks that are not maintained anymore are in this folder. 2 | 3 | ### faceswap-GAN v2.1 4 | * [FaceSwap_GAN_v2.1_train.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/legacy/FaceSwap_GAN_v2.1_train.ipynb) 5 | - A experimental model that provides architectures like VAE and [XGAN](https://arxiv.org/abs/1711.05139). 6 | - In video conversion, it ultilizes FCN for face segmentation to generate a hybrid alpha mask. 7 | 8 | V2.1 model is an improved architecture in order to stablize training. The architecture is greatly inspired by [XGAN](https://arxiv.org/abs/1711.05139) ~~and [MS-D neural network](http://www.pnas.org/content/115/2/254)~~. (Note: V2.1 script is experimental and not well-maintained) 9 | - V2.1 model provides three base architectures: (i) XGAN, (ii) VAE-GAN, and (iii) a variant of v2 GAN. (default `base_model="GAN"`) 10 | - FCN8s for face segmentation is introduced to improve masking in video conversion (default `use_FCN_mask = True`). 11 | - To enable this feature, keras weights file should be generated through jupyter notebook provided in [this repo](https://github.com/shaoanlu/face_segmentation_keras). 12 | 13 | ### faceswap-GAN v2 14 | * [FaceSwap_GAN_v2_train.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/legacy/FaceSwap_GAN_v2_train.ipynb) 15 | - Notebook for training the version 2 GAN model. 16 | - Video conversion functions are also included. 17 | 18 | * [FaceSwap_GAN_v2_test_video_MTCNN.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/legacy/FaceSwap_GAN_v2_test_video_MTCNN.ipynb) 19 | - Notebook for generating videos. Use MTCNN for face detection. 20 | 21 | * [faceswap_WGAN-GP_keras_github.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/lefacy/faceswap_WGAN-GP_keras_github.ipynb) 22 | - This notebook is an independent training script for a GAN model of [WGAN-GP](https://arxiv.org/abs/1704.00028) in which perceptual loss is discarded for simplicity. 23 | - Training can be start easily as the following: 24 | ```python 25 | gan = FaceSwapGAN() # instantiate the class 26 | gan.train(max_iters=10e4, save_interval=500) # start training 27 | ``` 28 | * [FaceSwap_GAN_v2_sz128_train.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2_sz128_train.ipynb) 29 | - This notebook is an independent script for a model with 128x128 input/output resolution. 30 | 31 | ### faceswap-GAN v1 32 | * [FaceSwap_GAN_github.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/legacy/FaceSwap_GAN_github.ipynb) 33 | - V1 model directly predicts color output images without masking. 34 | - Video conversion functions are also included. 35 | -------------------------------------------------------------------------------- /preprocess.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | from moviepy.editor import VideoFileClip 3 | from matplotlib import pyplot as plt 4 | from pathlib import Path 5 | import os 6 | 7 | from converter.landmarks_alignment import * 8 | 9 | class VideoInfo: 10 | def __init__(self): 11 | self.frame = 0 12 | 13 | def process_image(input_img, info, detector, save_interval, save_path): 14 | minsize = 30 # minimum size of face 15 | detec_threshold = 0.9 16 | threshold = [0.7, 0.8, detec_threshold] # three steps's threshold 17 | factor = 0.709 # scale factor 18 | 19 | info.frame += 1 20 | frame = info.frame 21 | if frame % save_interval == 0: 22 | faces, pnts = detector.detect_face(input_img, threshold=detec_threshold, use_auto_downscaling=False) 23 | for idx, (x0, y1, x1, y0, conf_score) in enumerate(faces): 24 | det_face_im = input_img[int(x0):int(x1),int(y0):int(y1),:] 25 | 26 | # get src/tar landmarks 27 | src_landmarks = get_src_landmarks(x0, x1, y0, y1, pnts) 28 | tar_landmarks = get_tar_landmarks(det_face_im) 29 | 30 | # align detected face 31 | aligned_det_face_im = landmarks_match_mtcnn(det_face_im, src_landmarks, tar_landmarks) 32 | 33 | Path(os.path.join(f"{save_path}", "rgb")).mkdir(parents=True, exist_ok=True) 34 | fname = os.path.join(f"{save_path}", "rgb", f"frame{frame}face{str(idx)}.jpg") 35 | plt.imsave(fname, aligned_det_face_im, format="jpg") 36 | #fname = f"./faces/raw_faces/frame{frames}face{str(idx)}.jpg" 37 | #plt.imsave(fname, det_face_im, format="jpg") 38 | 39 | bm = np.zeros_like(aligned_det_face_im) 40 | h, w = bm.shape[:2] 41 | bm[int(src_landmarks[0][0]-h/15):int(src_landmarks[0][0]+h/15), 42 | int(src_landmarks[0][1]-w/8):int(src_landmarks[0][1]+w/8),:] = 255 43 | bm[int(src_landmarks[1][0]-h/15):int(src_landmarks[1][0]+h/15), 44 | int(src_landmarks[1][1]-w/8):int(src_landmarks[1][1]+w/8),:] = 255 45 | bm = landmarks_match_mtcnn(bm, src_landmarks, tar_landmarks) 46 | Path(os.path.join(f"{save_path}", "binary_mask")).mkdir(parents=True, exist_ok=True) 47 | fname = os.path.join(f"{save_path}", "binary_mask", f"frame{frame}face{str(idx)}.jpg") 48 | plt.imsave(fname, bm, format="jpg") 49 | 50 | return np.zeros((3,3,3)) 51 | 52 | def preprocess_video(fn_input_video, fd, save_interval, save_path): 53 | info = VideoInfo() 54 | output = 'dummy.mp4' 55 | clip1 = VideoFileClip(fn_input_video) 56 | clip = clip1.fl_image(lambda img: process_image(img, info, fd, save_interval, save_path)) 57 | clip.write_videofile(output, audio=False, verbose=False) 58 | clip1.reader.close() 59 | 60 | 61 | -------------------------------------------------------------------------------- /data_loader/data_loader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .data_augmentation import * 3 | 4 | 5 | class DataLoader(object): 6 | def __init__(self, filenames, all_filenames, batch_size, dir_bm_eyes, 7 | resolution, num_cpus, sess, **da_config): 8 | self.filenames = filenames 9 | self.all_filenames = all_filenames 10 | self.batch_size = batch_size 11 | self.dir_bm_eyes = dir_bm_eyes 12 | self.resolution = resolution 13 | self.num_cpus = num_cpus 14 | self.sess = sess 15 | 16 | self.set_data_augm_config( 17 | da_config["prob_random_color_match"], 18 | da_config["use_da_motion_blur"], 19 | da_config["use_bm_eyes"]) 20 | 21 | self.data_iter_next = self.create_tfdata_iter( 22 | self.filenames, 23 | self.all_filenames, 24 | self.batch_size, 25 | self.dir_bm_eyes, 26 | self.resolution, 27 | self.prob_random_color_match, 28 | self.use_da_motion_blur, 29 | self.use_bm_eyes, 30 | ) 31 | 32 | def set_data_augm_config(self, prob_random_color_match=0.5, 33 | use_da_motion_blur=True, use_bm_eyes=True): 34 | self.prob_random_color_match = prob_random_color_match 35 | self.use_da_motion_blur = use_da_motion_blur 36 | self.use_bm_eyes = use_bm_eyes 37 | 38 | def create_tfdata_iter(self, filenames, fns_all_trn_data, batch_size, dir_bm_eyes, resolution, 39 | prob_random_color_match, use_da_motion_blur, use_bm_eyes): 40 | tf_fns = tf.constant(filenames, dtype=tf.string) # use tf_fns=filenames is also fine 41 | dataset = tf.data.Dataset.from_tensor_slices(tf_fns) 42 | dataset = dataset.shuffle(len(filenames)) 43 | dataset = dataset.apply( 44 | tf.contrib.data.map_and_batch( 45 | lambda filenames: tf.py_func( 46 | func=read_image, 47 | inp=[filenames, 48 | fns_all_trn_data, 49 | dir_bm_eyes, 50 | resolution, 51 | prob_random_color_match, 52 | use_da_motion_blur, 53 | use_bm_eyes], 54 | Tout=[tf.float32, tf.float32, tf.float32] 55 | ), 56 | batch_size=batch_size, 57 | num_parallel_batches=self.num_cpus, # cpu cores 58 | drop_remainder=True 59 | ) 60 | ) 61 | dataset = dataset.repeat() 62 | dataset = dataset.prefetch(32) 63 | 64 | iterator = dataset.make_one_shot_iterator() 65 | next_element = iterator.get_next() # this tensor can also be useed as Input(tensor=next_element) 66 | return next_element 67 | 68 | def get_next_batch(self): 69 | return self.sess.run(self.data_iter_next) -------------------------------------------------------------------------------- /legacy/model_GAN_v2.py: -------------------------------------------------------------------------------- 1 | from keras.models import Sequential, Model 2 | from keras.layers import * 3 | from keras.layers.advanced_activations import LeakyReLU 4 | from keras.activations import relu 5 | from keras.initializers import RandomNormal 6 | from keras.applications import * 7 | import keras.backend as K 8 | from pixel_shuffler import PixelShuffler 9 | 10 | conv_init = RandomNormal(0, 0.02) 11 | 12 | def conv_block(input_tensor, f): 13 | x = input_tensor 14 | x = Conv2D(f, kernel_size=3, strides=2, kernel_initializer=conv_init, use_bias=False, padding="same")(x) 15 | x = Activation("relu")(x) 16 | return x 17 | 18 | def conv_block_d(input_tensor, f, use_instance_norm=True): 19 | x = input_tensor 20 | x = Conv2D(f, kernel_size=4, strides=2, kernel_initializer=conv_init, use_bias=False, padding="same")(x) 21 | x = LeakyReLU(alpha=0.2)(x) 22 | return x 23 | 24 | def res_block(input_tensor, f): 25 | x = input_tensor 26 | x = Conv2D(f, kernel_size=3, kernel_initializer=conv_init, use_bias=False, padding="same")(x) 27 | x = LeakyReLU(alpha=0.2)(x) 28 | x = Conv2D(f, kernel_size=3, kernel_initializer=conv_init, use_bias=False, padding="same")(x) 29 | x = add([x, input_tensor]) 30 | x = LeakyReLU(alpha=0.2)(x) 31 | return x 32 | 33 | def upscale_ps(filters, use_norm=True): 34 | def block(x): 35 | x = Conv2D(filters*4, kernel_size=3, use_bias=False, kernel_initializer=RandomNormal(0, 0.02), padding='same' )(x) 36 | x = LeakyReLU(0.1)(x) 37 | x = PixelShuffler()(x) 38 | return x 39 | return block 40 | 41 | def Discriminator(nc_in, input_size=64): 42 | inp = Input(shape=(input_size, input_size, nc_in)) 43 | #x = GaussianNoise(0.05)(inp) 44 | x = conv_block_d(inp, 64, False) 45 | x = conv_block_d(x, 128, False) 46 | x = conv_block_d(x, 256, False) 47 | out = Conv2D(1, kernel_size=4, kernel_initializer=conv_init, use_bias=False, padding="same", activation="sigmoid")(x) 48 | return Model(inputs=[inp], outputs=out) 49 | 50 | def Encoder(nc_in=3, input_size=64): 51 | inp = Input(shape=(input_size, input_size, nc_in)) 52 | x = Conv2D(64, kernel_size=5, kernel_initializer=conv_init, use_bias=False, padding="same")(inp) 53 | x = conv_block(x,128) 54 | x = conv_block(x,256) 55 | x = conv_block(x,512) 56 | x = conv_block(x,1024) 57 | x = Dense(1024)(Flatten()(x)) 58 | x = Dense(4*4*1024)(x) 59 | x = Reshape((4, 4, 1024))(x) 60 | out = upscale_ps(512)(x) 61 | return Model(inputs=inp, outputs=out) 62 | 63 | def Decoder_ps(nc_in=512, input_size=8): 64 | input_ = Input(shape=(input_size, input_size, nc_in)) 65 | x = input_ 66 | x = upscale_ps(256)(x) 67 | x = upscale_ps(128)(x) 68 | x = upscale_ps(64)(x) 69 | x = res_block(x, 64) 70 | x = res_block(x, 64) 71 | #x = Conv2D(4, kernel_size=5, padding='same')(x) 72 | alpha = Conv2D(1, kernel_size=5, padding='same', activation="sigmoid")(x) 73 | rgb = Conv2D(3, kernel_size=5, padding='same', activation="tanh")(x) 74 | out = concatenate([alpha, rgb]) 75 | return Model(input_, out ) -------------------------------------------------------------------------------- /converter/color_correction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | """ Color corretion functions""" 5 | def hist_match(source, template): 6 | # Code borrow from: 7 | # https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x 8 | oldshape = source.shape 9 | source = source.ravel() 10 | template = template.ravel() 11 | s_values, bin_idx, s_counts = np.unique(source, return_inverse=True, 12 | return_counts=True) 13 | t_values, t_counts = np.unique(template, return_counts=True) 14 | 15 | s_quantiles = np.cumsum(s_counts).astype(np.float64) 16 | s_quantiles /= s_quantiles[-1] 17 | t_quantiles = np.cumsum(t_counts).astype(np.float64) 18 | t_quantiles /= t_quantiles[-1] 19 | interp_t_values = np.interp(s_quantiles, t_quantiles, t_values) 20 | 21 | return interp_t_values[bin_idx].reshape(oldshape) 22 | 23 | def color_hist_match(src_im, tar_im, color_space="RGB"): 24 | if color_space.lower() != "rgb": 25 | src_im = trans_color_space(src_im, color_space) 26 | tar_im = trans_color_space(tar_im, color_space) 27 | 28 | matched_R = hist_match(src_im[:,:,0], tar_im[:,:,0]) 29 | matched_G = hist_match(src_im[:,:,1], tar_im[:,:,1]) 30 | matched_B = hist_match(src_im[:,:,2], tar_im[:,:,2]) 31 | matched = np.stack((matched_R, matched_G, matched_B), axis=2).astype(np.float32) 32 | matched = np.clip(matched, 0, 255) 33 | 34 | if color_space.lower() != "rgb": 35 | result = trans_color_space(result.astype(np.uint8), color_space, rev=True) 36 | return matched 37 | 38 | def adain(src_im, tar_im, eps=1e-7, color_space="RGB"): 39 | # https://github.com/ftokarev/tf-adain/blob/master/adain/norm.py 40 | if color_space.lower() != "rgb": 41 | src_im = trans_color_space(src_im, color_space) 42 | tar_im = trans_color_space(tar_im, color_space) 43 | 44 | mt = np.mean(tar_im, axis=(0,1)) 45 | st = np.std(tar_im, axis=(0,1)) 46 | ms = np.mean(src_im, axis=(0,1)) 47 | ss = np.std(src_im, axis=(0,1)) 48 | if ss.any() <= eps: return src_im 49 | result = st * (src_im.astype(np.float32) - ms) / (ss+eps) + mt 50 | result = np.clip(result, 0, 255) 51 | 52 | if color_space.lower() != "rgb": 53 | result = trans_color_space(result.astype(np.uint8), color_space, rev=True) 54 | return result 55 | 56 | def trans_color_space(im, color_space, rev=False): 57 | if color_space.lower() == "lab": 58 | clr_spc = cv2.COLOR_BGR2Lab 59 | rev_clr_spc = cv2.COLOR_Lab2BGR 60 | elif color_space.lower() == "ycbcr": 61 | clr_spc = cv2.COLOR_BGR2YCR_CB 62 | rev_clr_spc = cv2.COLOR_YCR_CB2BGR 63 | elif color_space.lower() == "xyz": 64 | clr_spc = cv2.COLOR_BGR2XYZ 65 | rev_clr_spc = cv2.COLOR_XYZ2BGR 66 | elif color_space.lower() == "luv": 67 | clr_spc = cv2.COLOR_BGR2Luv 68 | rev_clr_spc = cv2.COLOR_Luv2BGR 69 | elif color_space.lower() == "rgb": 70 | pass 71 | else: 72 | raise NotImplementedError() 73 | 74 | if color_space.lower() != "rgb": 75 | trans_clr_spc = rev_clr_spc if rev else clr_spc 76 | im = cv2.cvtColor(im, trans_clr_spc) 77 | return im 78 | -------------------------------------------------------------------------------- /networks/pixel_shuffler.py: -------------------------------------------------------------------------------- 1 | # PixelShuffler layer for Keras 2 | # by t-ae 3 | # https://gist.github.com/t-ae/6e1016cc188104d123676ccef3264981 4 | 5 | from keras.utils import conv_utils 6 | from keras.engine.topology import Layer 7 | import keras.backend as K 8 | 9 | class PixelShuffler(Layer): 10 | def __init__(self, size=(2, 2), data_format=None, **kwargs): 11 | super(PixelShuffler, self).__init__(**kwargs) 12 | self.data_format = K.image_data_format() 13 | self.size = conv_utils.normalize_tuple(size, 2, 'size') 14 | 15 | def call(self, inputs): 16 | 17 | input_shape = K.int_shape(inputs) 18 | if len(input_shape) != 4: 19 | raise ValueError('Inputs should have rank ' + 20 | str(4) + 21 | '; Received input shape:', str(input_shape)) 22 | 23 | if self.data_format == 'channels_first': 24 | batch_size, c, h, w = input_shape 25 | if batch_size is None: 26 | batch_size = -1 27 | rh, rw = self.size 28 | oh, ow = h * rh, w * rw 29 | oc = c // (rh * rw) 30 | 31 | out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w)) 32 | out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2)) 33 | out = K.reshape(out, (batch_size, oc, oh, ow)) 34 | return out 35 | 36 | elif self.data_format == 'channels_last': 37 | batch_size, h, w, c = input_shape 38 | if batch_size is None: 39 | batch_size = -1 40 | rh, rw = self.size 41 | oh, ow = h * rh, w * rw 42 | oc = c // (rh * rw) 43 | 44 | out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc)) 45 | out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) 46 | out = K.reshape(out, (batch_size, oh, ow, oc)) 47 | return out 48 | 49 | def compute_output_shape(self, input_shape): 50 | 51 | if len(input_shape) != 4: 52 | raise ValueError('Inputs should have rank ' + 53 | str(4) + 54 | '; Received input shape:', str(input_shape)) 55 | 56 | if self.data_format == 'channels_first': 57 | height = input_shape[2] * self.size[0] if input_shape[2] is not None else None 58 | width = input_shape[3] * self.size[1] if input_shape[3] is not None else None 59 | channels = input_shape[1] // self.size[0] // self.size[1] 60 | 61 | if channels * self.size[0] * self.size[1] != input_shape[1]: 62 | raise ValueError('channels of input and size are incompatible') 63 | 64 | return (input_shape[0], 65 | channels, 66 | height, 67 | width) 68 | 69 | elif self.data_format == 'channels_last': 70 | height = input_shape[1] * self.size[0] if input_shape[1] is not None else None 71 | width = input_shape[2] * self.size[1] if input_shape[2] is not None else None 72 | channels = input_shape[3] // self.size[0] // self.size[1] 73 | 74 | if channels * self.size[0] * self.size[1] != input_shape[3]: 75 | raise ValueError('channels of input and size are incompatible') 76 | 77 | return (input_shape[0], 78 | height, 79 | width, 80 | channels) 81 | 82 | def get_config(self): 83 | config = {'size': self.size, 84 | 'data_format': self.data_format} 85 | base_config = super(PixelShuffler, self).get_config() 86 | 87 | return dict(list(base_config.items()) + list(config.items())) 88 | -------------------------------------------------------------------------------- /legacy/pixel_shuffler.py: -------------------------------------------------------------------------------- 1 | # PixelShuffler layer for Keras 2 | # by t-ae 3 | # https://gist.github.com/t-ae/6e1016cc188104d123676ccef3264981 4 | 5 | from keras.utils import conv_utils 6 | from keras.engine.topology import Layer 7 | import keras.backend as K 8 | 9 | class PixelShuffler(Layer): 10 | def __init__(self, size=(2, 2), data_format=None, **kwargs): 11 | super(PixelShuffler, self).__init__(**kwargs) 12 | self.data_format = conv_utils.normalize_data_format(data_format) 13 | self.size = conv_utils.normalize_tuple(size, 2, 'size') 14 | 15 | def call(self, inputs): 16 | 17 | input_shape = K.int_shape(inputs) 18 | if len(input_shape) != 4: 19 | raise ValueError('Inputs should have rank ' + 20 | str(4) + 21 | '; Received input shape:', str(input_shape)) 22 | 23 | if self.data_format == 'channels_first': 24 | batch_size, c, h, w = input_shape 25 | if batch_size is None: 26 | batch_size = -1 27 | rh, rw = self.size 28 | oh, ow = h * rh, w * rw 29 | oc = c // (rh * rw) 30 | 31 | out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w)) 32 | out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2)) 33 | out = K.reshape(out, (batch_size, oc, oh, ow)) 34 | return out 35 | 36 | elif self.data_format == 'channels_last': 37 | batch_size, h, w, c = input_shape 38 | if batch_size is None: 39 | batch_size = -1 40 | rh, rw = self.size 41 | oh, ow = h * rh, w * rw 42 | oc = c // (rh * rw) 43 | 44 | out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc)) 45 | out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) 46 | out = K.reshape(out, (batch_size, oh, ow, oc)) 47 | return out 48 | 49 | def compute_output_shape(self, input_shape): 50 | 51 | if len(input_shape) != 4: 52 | raise ValueError('Inputs should have rank ' + 53 | str(4) + 54 | '; Received input shape:', str(input_shape)) 55 | 56 | if self.data_format == 'channels_first': 57 | height = input_shape[2] * self.size[0] if input_shape[2] is not None else None 58 | width = input_shape[3] * self.size[1] if input_shape[3] is not None else None 59 | channels = input_shape[1] // self.size[0] // self.size[1] 60 | 61 | if channels * self.size[0] * self.size[1] != input_shape[1]: 62 | raise ValueError('channels of input and size are incompatible') 63 | 64 | return (input_shape[0], 65 | channels, 66 | height, 67 | width) 68 | 69 | elif self.data_format == 'channels_last': 70 | height = input_shape[1] * self.size[0] if input_shape[1] is not None else None 71 | width = input_shape[2] * self.size[1] if input_shape[2] is not None else None 72 | channels = input_shape[3] // self.size[0] // self.size[1] 73 | 74 | if channels * self.size[0] * self.size[1] != input_shape[3]: 75 | raise ValueError('channels of input and size are incompatible') 76 | 77 | return (input_shape[0], 78 | height, 79 | width, 80 | channels) 81 | 82 | def get_config(self): 83 | config = {'size': self.size, 84 | 'data_format': self.data_format} 85 | base_config = super(PixelShuffler, self).get_config() 86 | 87 | return dict(list(base_config.items()) + list(config.items())) 88 | -------------------------------------------------------------------------------- /umeyama.py: -------------------------------------------------------------------------------- 1 | ## License (Modified BSD) 2 | ## Copyright (C) 2011, the scikit-image team All rights reserved. 3 | ## 4 | ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | ## 6 | ## Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | ## Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | ## Neither the name of skimage nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 9 | ## THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 10 | 11 | # umeyama function from scikit-image/skimage/transform/_geometric.py 12 | 13 | import numpy as np 14 | 15 | def umeyama( src, dst, estimate_scale ): 16 | """Estimate N-D similarity transformation with or without scaling. 17 | Parameters 18 | ---------- 19 | src : (M, N) array 20 | Source coordinates. 21 | dst : (M, N) array 22 | Destination coordinates. 23 | estimate_scale : bool 24 | Whether to estimate scaling factor. 25 | Returns 26 | ------- 27 | T : (N + 1, N + 1) 28 | The homogeneous similarity transformation matrix. The matrix contains 29 | NaN values only if the problem is not well-conditioned. 30 | References 31 | ---------- 32 | .. [1] "Least-squares estimation of transformation parameters between two 33 | point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573 34 | """ 35 | 36 | num = src.shape[0] 37 | dim = src.shape[1] 38 | 39 | # Compute mean of src and dst. 40 | src_mean = src.mean(axis=0) 41 | dst_mean = dst.mean(axis=0) 42 | 43 | # Subtract mean from src and dst. 44 | src_demean = src - src_mean 45 | dst_demean = dst - dst_mean 46 | 47 | # Eq. (38). 48 | A = np.dot(dst_demean.T, src_demean) / num 49 | 50 | # Eq. (39). 51 | d = np.ones((dim,), dtype=np.double) 52 | if np.linalg.det(A) < 0: 53 | d[dim - 1] = -1 54 | 55 | T = np.eye(dim + 1, dtype=np.double) 56 | 57 | U, S, V = np.linalg.svd(A) 58 | 59 | # Eq. (40) and (43). 60 | rank = np.linalg.matrix_rank(A) 61 | if rank == 0: 62 | return np.nan * T 63 | elif rank == dim - 1: 64 | if np.linalg.det(U) * np.linalg.det(V) > 0: 65 | T[:dim, :dim] = np.dot(U, V) 66 | else: 67 | s = d[dim - 1] 68 | d[dim - 1] = -1 69 | T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V)) 70 | d[dim - 1] = s 71 | else: 72 | T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V.T)) 73 | 74 | if estimate_scale: 75 | # Eq. (41) and (42). 76 | scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d) 77 | else: 78 | scale = 1.0 79 | 80 | T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T) 81 | T[:dim, :dim] *= scale 82 | 83 | return T 84 | 85 | -------------------------------------------------------------------------------- /legacy/utils.py: -------------------------------------------------------------------------------- 1 | from IPython.display import display 2 | from PIL import Image 3 | import cv2 4 | import numpy as np 5 | import os 6 | 7 | def get_image_paths(directory): 8 | return [x.path for x in os.scandir(directory) if x.name.endswith(".jpg") or x.name.endswith(".png")] 9 | 10 | def load_images(image_paths, convert=None): 11 | iter_all_images = (cv2.resize(cv2.imread(fn), (256,256)) for fn in image_paths) 12 | if convert: 13 | iter_all_images = (convert(img) for img in iter_all_images) 14 | for i,image in enumerate( iter_all_images ): 15 | if i == 0: 16 | all_images = np.empty((len(image_paths),) + image.shape, dtype=image.dtype) 17 | all_images[i] = image 18 | return all_images 19 | 20 | def get_transpose_axes( n ): 21 | if n % 2 == 0: 22 | y_axes = list(range(1, n-1, 2)) 23 | x_axes = list(range(0, n-1, 2)) 24 | else: 25 | y_axes = list(range(0, n-1, 2)) 26 | x_axes = list(range(1, n-1, 2)) 27 | return y_axes, x_axes, [n-1] 28 | 29 | def stack_images(images): 30 | images_shape = np.array(images.shape) 31 | new_axes = get_transpose_axes(len(images_shape)) 32 | new_shape = [np.prod(images_shape[x]) for x in new_axes] 33 | return np.transpose( 34 | images, 35 | axes = np.concatenate(new_axes) 36 | ).reshape(new_shape) 37 | 38 | def showG(test_A, test_B, path_A, path_B, batchSize): 39 | figure_A = np.stack([ 40 | test_A, 41 | np.squeeze(np.array([path_A([test_A[i:i+1]]) for i in range(test_A.shape[0])])), 42 | np.squeeze(np.array([path_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])), 43 | ], axis=1 ) 44 | figure_B = np.stack([ 45 | test_B, 46 | np.squeeze(np.array([path_B([test_B[i:i+1]]) for i in range(test_B.shape[0])])), 47 | np.squeeze(np.array([path_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])), 48 | ], axis=1 ) 49 | 50 | figure = np.concatenate([figure_A, figure_B], axis=0) 51 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:]) 52 | figure = stack_images(figure) 53 | figure = np.clip((figure + 1) * 255 / 2, 0, 255).astype('uint8') 54 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB) 55 | 56 | display(Image.fromarray(figure)) 57 | 58 | def showG_mask(test_A, test_B, path_A, path_B, batchSize): 59 | figure_A = np.stack([ 60 | test_A, 61 | (np.squeeze(np.array([path_A([test_A[i:i+1]]) for i in range(test_A.shape[0])])))*2-1, 62 | (np.squeeze(np.array([path_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])))*2-1, 63 | ], axis=1 ) 64 | figure_B = np.stack([ 65 | test_B, 66 | (np.squeeze(np.array([path_B([test_B[i:i+1]]) for i in range(test_B.shape[0])])))*2-1, 67 | (np.squeeze(np.array([path_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])))*2-1, 68 | ], axis=1 ) 69 | 70 | figure = np.concatenate([figure_A, figure_B], axis=0) 71 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:]) 72 | figure = stack_images(figure) 73 | figure = np.clip((figure + 1) * 255 / 2, 0, 255).astype('uint8') 74 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB) 75 | 76 | display(Image.fromarray(figure)) 77 | 78 | def showG_eyes(test_A, test_B, bm_eyes_A, bm_eyes_B, batchSize): 79 | figure_A = np.stack([ 80 | (test_A + 1)/2, 81 | bm_eyes_A, 82 | bm_eyes_A * (test_A + 1)/2, 83 | ], axis=1 ) 84 | figure_B = np.stack([ 85 | (test_B + 1)/2, 86 | bm_eyes_B, 87 | bm_eyes_B * (test_B+1)/2, 88 | ], axis=1 ) 89 | 90 | figure = np.concatenate([figure_A, figure_B], axis=0) 91 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:]) 92 | figure = stack_images(figure) 93 | figure = np.clip(figure * 255, 0, 255).astype('uint8') 94 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB) 95 | 96 | display(Image.fromarray(figure)) 97 | -------------------------------------------------------------------------------- /image_augmentation.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy 3 | 4 | from umeyama import umeyama 5 | 6 | def random_channel_shift(x, intensity=10, channel_axis=2): 7 | x = numpy.rollaxis(x, channel_axis, 0) 8 | min_x, max_x = numpy.min(x), numpy.max(x) 9 | intensity = max_x/255*15. 10 | channel_images = [numpy.clip(x_channel + numpy.random.uniform(-intensity, intensity), min_x, max_x) for x_channel in x] 11 | x = numpy.stack(channel_images, axis=0) 12 | x = numpy.rollaxis(x, 0, channel_axis + 1) 13 | return x 14 | 15 | def random_transform( image, rotation_range, zoom_range, shift_range, random_flip ): 16 | h,w = image.shape[0:2] 17 | #color_shifted_image = random_channel_shift(image) 18 | rotation = numpy.random.uniform( -rotation_range, rotation_range ) 19 | scale = numpy.random.uniform( 1 - zoom_range, 1 + zoom_range ) 20 | tx = numpy.random.uniform( -shift_range, shift_range ) * w 21 | ty = numpy.random.uniform( -shift_range, shift_range ) * h 22 | mat = cv2.getRotationMatrix2D( (w//2,h//2), rotation, scale ) 23 | mat[:,2] += (tx,ty) 24 | result = cv2.warpAffine( image, mat, (w,h), borderMode=cv2.BORDER_REPLICATE ) 25 | if numpy.random.random() < random_flip: 26 | result = result[:,::-1] 27 | return result 28 | 29 | # get pair of random warped images from aligened face image 30 | def random_warp( image ): 31 | assert image.shape == (256,256,3) 32 | #range_ = numpy.linspace( 128-80, 128+80, 5 ) 33 | range_ = numpy.linspace( 128-110, 128+110, 5 ) 34 | mapx = numpy.broadcast_to( range_, (5,5) ) 35 | mapy = mapx.T 36 | 37 | mapx = mapx + numpy.random.normal( size=(5,5), scale=6 ) 38 | mapy = mapy + numpy.random.normal( size=(5,5), scale=6 ) 39 | 40 | interp_mapx = cv2.resize( mapx, (80,80) )[8:72,8:72].astype('float32') 41 | interp_mapy = cv2.resize( mapy, (80,80) )[8:72,8:72].astype('float32') 42 | 43 | warped_image = cv2.remap( image, interp_mapx, interp_mapy, cv2.INTER_LINEAR ) 44 | 45 | src_points = numpy.stack( [ mapx.ravel(), mapy.ravel() ], axis=-1 ) 46 | dst_points = numpy.mgrid[0:65:16,0:65:16].T.reshape(-1,2) 47 | mat = umeyama( src_points, dst_points, True )[0:2] 48 | 49 | target_image = cv2.warpAffine( image, mat, (64,64) ) 50 | 51 | return warped_image, target_image 52 | 53 | # get pair of random warped images from aligened face image 54 | def random_warp128(image): 55 | assert image.shape == (256,256,3) 56 | range_ = numpy.linspace(128-110, 128+110, 5) 57 | mapx = numpy.broadcast_to(range_, (5,5)) 58 | mapy = mapx.T 59 | 60 | mapx = mapx + numpy.random.normal(size=(5,5), scale=6) 61 | mapy = mapy + numpy.random.normal(size=(5,5), scale=6) 62 | 63 | interp_mapx = cv2.resize(mapx, (80*2,80*2))[8*2:72*2,8*2:72*2].astype('float32') 64 | interp_mapy = cv2.resize(mapy, (80*2,80*2))[8*2:72*2,8*2:72*2].astype('float32') 65 | 66 | warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR) 67 | 68 | src_points = numpy.stack([mapx.ravel(), mapy.ravel() ], axis=-1) 69 | dst_points = numpy.mgrid[0:65*2:16*2,0:65*2:16*2].T.reshape(-1,2) 70 | mat = umeyama(src_points, dst_points, True)[0:2] 71 | 72 | target_image = cv2.warpAffine(image, mat, (64*2,64*2)) 73 | 74 | return warped_image, target_image 75 | 76 | # get pair of random warped images from aligened face image 77 | def random_warp224(image): 78 | assert image.shape == (256,256,3) 79 | range_ = numpy.linspace(128-110, 128+110, 5) 80 | mapx = numpy.broadcast_to(range_, (5,5)) 81 | mapy = mapx.T 82 | 83 | mapx = mapx + numpy.random.normal(size=(5,5), scale=6) 84 | mapy = mapy + numpy.random.normal(size=(5,5), scale=6) 85 | 86 | interp_mapx = cv2.resize(mapx, (80*4,80*4))[8*4:72*4,8*4:72*4].astype('float32') 87 | interp_mapy = cv2.resize(mapy, (80*4,80*4))[8*4:72*4,8*4:72*4].astype('float32') 88 | 89 | warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR) 90 | 91 | src_points = numpy.stack([mapx.ravel(), mapy.ravel() ], axis=-1) 92 | dst_points = numpy.mgrid[0:65*4:16*4,0:65*4:16*4].T.reshape(-1,2) 93 | mat = umeyama(src_points, dst_points, True)[0:2] 94 | 95 | target_image = cv2.warpAffine(image, mat, (64*4,64*4)) 96 | 97 | target_image = cv2.resize(target_image, (224,224)) 98 | warped_image = cv2.resize(warped_image, (224,224)) 99 | 100 | return warped_image, target_image 101 | -------------------------------------------------------------------------------- /legacy/image_augmentation.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy 3 | 4 | from umeyama import umeyama 5 | 6 | def random_channel_shift(x, intensity=10, channel_axis=2): 7 | x = numpy.rollaxis(x, channel_axis, 0) 8 | min_x, max_x = numpy.min(x), numpy.max(x) 9 | intensity = max_x/255*15. 10 | channel_images = [numpy.clip(x_channel + numpy.random.uniform(-intensity, intensity), min_x, max_x) for x_channel in x] 11 | x = numpy.stack(channel_images, axis=0) 12 | x = numpy.rollaxis(x, 0, channel_axis + 1) 13 | return x 14 | 15 | def random_transform( image, rotation_range, zoom_range, shift_range, random_flip ): 16 | h,w = image.shape[0:2] 17 | #color_shifted_image = random_channel_shift(image) 18 | rotation = numpy.random.uniform( -rotation_range, rotation_range ) 19 | scale = numpy.random.uniform( 1 - zoom_range, 1 + zoom_range ) 20 | tx = numpy.random.uniform( -shift_range, shift_range ) * w 21 | ty = numpy.random.uniform( -shift_range, shift_range ) * h 22 | mat = cv2.getRotationMatrix2D( (w//2,h//2), rotation, scale ) 23 | mat[:,2] += (tx,ty) 24 | result = cv2.warpAffine( image, mat, (w,h), borderMode=cv2.BORDER_REPLICATE ) 25 | if numpy.random.random() < random_flip: 26 | result = result[:,::-1] 27 | return result 28 | 29 | # get pair of random warped images from aligened face image 30 | def random_warp( image ): 31 | assert image.shape == (256,256,3) 32 | #range_ = numpy.linspace( 128-80, 128+80, 5 ) 33 | range_ = numpy.linspace( 128-110, 128+110, 5 ) 34 | mapx = numpy.broadcast_to( range_, (5,5) ) 35 | mapy = mapx.T 36 | 37 | mapx = mapx + numpy.random.normal( size=(5,5), scale=6 ) 38 | mapy = mapy + numpy.random.normal( size=(5,5), scale=6 ) 39 | 40 | interp_mapx = cv2.resize( mapx, (80,80) )[8:72,8:72].astype('float32') 41 | interp_mapy = cv2.resize( mapy, (80,80) )[8:72,8:72].astype('float32') 42 | 43 | warped_image = cv2.remap( image, interp_mapx, interp_mapy, cv2.INTER_LINEAR ) 44 | 45 | src_points = numpy.stack( [ mapx.ravel(), mapy.ravel() ], axis=-1 ) 46 | dst_points = numpy.mgrid[0:65:16,0:65:16].T.reshape(-1,2) 47 | mat = umeyama( src_points, dst_points, True )[0:2] 48 | 49 | target_image = cv2.warpAffine( image, mat, (64,64) ) 50 | 51 | return warped_image, target_image 52 | 53 | # get pair of random warped images from aligened face image 54 | def random_warp128(image): 55 | assert image.shape == (256,256,3) 56 | range_ = numpy.linspace(128-110, 128+110, 5) 57 | mapx = numpy.broadcast_to(range_, (5,5)) 58 | mapy = mapx.T 59 | 60 | mapx = mapx + numpy.random.normal(size=(5,5), scale=6) 61 | mapy = mapy + numpy.random.normal(size=(5,5), scale=6) 62 | 63 | interp_mapx = cv2.resize(mapx, (80*2,80*2))[8*2:72*2,8*2:72*2].astype('float32') 64 | interp_mapy = cv2.resize(mapy, (80*2,80*2))[8*2:72*2,8*2:72*2].astype('float32') 65 | 66 | warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR) 67 | 68 | src_points = numpy.stack([mapx.ravel(), mapy.ravel() ], axis=-1) 69 | dst_points = numpy.mgrid[0:65*2:16*2,0:65*2:16*2].T.reshape(-1,2) 70 | mat = umeyama(src_points, dst_points, True)[0:2] 71 | 72 | target_image = cv2.warpAffine(image, mat, (64*2,64*2)) 73 | 74 | return warped_image, target_image 75 | 76 | # get pair of random warped images from aligened face image 77 | def random_warp224(image): 78 | assert image.shape == (256,256,3) 79 | range_ = numpy.linspace(128-110, 128+110, 5) 80 | mapx = numpy.broadcast_to(range_, (5,5)) 81 | mapy = mapx.T 82 | 83 | mapx = mapx + numpy.random.normal(size=(5,5), scale=6) 84 | mapy = mapy + numpy.random.normal(size=(5,5), scale=6) 85 | 86 | interp_mapx = cv2.resize(mapx, (80*4,80*4))[8*4:72*4,8*4:72*4].astype('float32') 87 | interp_mapy = cv2.resize(mapy, (80*4,80*4))[8*4:72*4,8*4:72*4].astype('float32') 88 | 89 | warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR) 90 | 91 | src_points = numpy.stack([mapx.ravel(), mapy.ravel() ], axis=-1) 92 | dst_points = numpy.mgrid[0:65*4:16*4,0:65*4:16*4].T.reshape(-1,2) 93 | mat = umeyama(src_points, dst_points, True)[0:2] 94 | 95 | target_image = cv2.warpAffine(image, mat, (64*4,64*4)) 96 | 97 | target_image = cv2.resize(target_image, (224,224)) 98 | warped_image = cv2.resize(warped_image, (224,224)) 99 | 100 | return warped_image, target_image 101 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from IPython.display import display 2 | from PIL import Image 3 | import numpy as np 4 | import cv2 5 | import os 6 | import yaml 7 | 8 | def get_image_paths(directory): 9 | return [x.path for x in os.scandir(directory) if x.name.endswith(".jpg") or x.name.endswith(".png")] 10 | 11 | def load_images(image_paths, convert=None): 12 | iter_all_images = (cv2.resize(cv2.imread(fn), (256,256)) for fn in image_paths) 13 | if convert: 14 | iter_all_images = (convert(img) for img in iter_all_images) 15 | for i,image in enumerate( iter_all_images ): 16 | if i == 0: 17 | all_images = np.empty((len(image_paths),) + image.shape, dtype=image.dtype) 18 | all_images[i] = image 19 | return all_images 20 | 21 | def get_transpose_axes( n ): 22 | if n % 2 == 0: 23 | y_axes = list(range(1, n-1, 2)) 24 | x_axes = list(range(0, n-1, 2)) 25 | else: 26 | y_axes = list(range(0, n-1, 2)) 27 | x_axes = list(range(1, n-1, 2)) 28 | return y_axes, x_axes, [n-1] 29 | 30 | def stack_images(images): 31 | images_shape = np.array(images.shape) 32 | new_axes = get_transpose_axes(len(images_shape)) 33 | new_shape = [np.prod(images_shape[x]) for x in new_axes] 34 | return np.transpose( 35 | images, 36 | axes = np.concatenate(new_axes) 37 | ).reshape(new_shape) 38 | 39 | def showG(test_A, test_B, path_A, path_B, batchSize): 40 | figure_A = np.stack([ 41 | test_A, 42 | np.squeeze(np.array([path_A([test_A[i:i+1]]) for i in range(test_A.shape[0])])), 43 | np.squeeze(np.array([path_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])), 44 | ], axis=1 ) 45 | figure_B = np.stack([ 46 | test_B, 47 | np.squeeze(np.array([path_B([test_B[i:i+1]]) for i in range(test_B.shape[0])])), 48 | np.squeeze(np.array([path_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])), 49 | ], axis=1 ) 50 | 51 | figure = np.concatenate([figure_A, figure_B], axis=0) 52 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:]) 53 | figure = stack_images(figure) 54 | figure = np.clip((figure + 1) * 255 / 2, 0, 255).astype('uint8') 55 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB) 56 | display(Image.fromarray(figure)) 57 | 58 | def showG_mask(test_A, test_B, path_A, path_B, batchSize): 59 | figure_A = np.stack([ 60 | test_A, 61 | (np.squeeze(np.array([path_A([test_A[i:i+1]]) for i in range(test_A.shape[0])])))*2-1, 62 | (np.squeeze(np.array([path_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])))*2-1, 63 | ], axis=1 ) 64 | figure_B = np.stack([ 65 | test_B, 66 | (np.squeeze(np.array([path_B([test_B[i:i+1]]) for i in range(test_B.shape[0])])))*2-1, 67 | (np.squeeze(np.array([path_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])))*2-1, 68 | ], axis=1 ) 69 | 70 | figure = np.concatenate([figure_A, figure_B], axis=0) 71 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:]) 72 | figure = stack_images(figure) 73 | figure = np.clip((figure + 1) * 255 / 2, 0, 255).astype('uint8') 74 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB) 75 | display(Image.fromarray(figure)) 76 | 77 | def showG_eyes(test_A, test_B, bm_eyes_A, bm_eyes_B, batchSize): 78 | figure_A = np.stack([ 79 | (test_A + 1)/2, 80 | bm_eyes_A, 81 | bm_eyes_A * (test_A + 1)/2, 82 | ], axis=1 ) 83 | figure_B = np.stack([ 84 | (test_B + 1)/2, 85 | bm_eyes_B, 86 | bm_eyes_B * (test_B+1)/2, 87 | ], axis=1 ) 88 | 89 | figure = np.concatenate([figure_A, figure_B], axis=0) 90 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:]) 91 | figure = stack_images(figure) 92 | figure = np.clip(figure * 255, 0, 255).astype('uint8') 93 | figure = cv2.cvtColor(figure, cv2.COLOR_BGR2RGB) 94 | 95 | display(Image.fromarray(figure)) 96 | 97 | def save_preview_image(test_A, test_B, 98 | path_A, path_B, 99 | path_bgr_A, path_bgr_B, 100 | path_mask_A, path_mask_B, 101 | batchSize, save_fn="preview.jpg"): 102 | figure_A = np.stack([ 103 | test_A, 104 | np.squeeze(np.array([path_bgr_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])), 105 | (np.squeeze(np.array([path_mask_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])))*2-1, 106 | np.squeeze(np.array([path_B([test_A[i:i+1]]) for i in range(test_A.shape[0])])), 107 | ], axis=1 ) 108 | figure_B = np.stack([ 109 | test_B, 110 | np.squeeze(np.array([path_bgr_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])), 111 | (np.squeeze(np.array([path_mask_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])))*2-1, 112 | np.squeeze(np.array([path_A([test_B[i:i+1]]) for i in range(test_B.shape[0])])), 113 | ], axis=1 ) 114 | 115 | figure = np.concatenate([figure_A, figure_B], axis=0) 116 | figure = figure.reshape((4,batchSize//2) + figure.shape[1:]) 117 | figure = stack_images(figure) 118 | figure = np.clip((figure + 1) * 255 / 2, 0, 255).astype('uint8') 119 | cv2.imwrite(save_fn, figure) 120 | 121 | def load_yaml(path_configs): 122 | with open(path_configs, 'r') as f: 123 | return yaml.load(f) 124 | 125 | def show_loss_config(loss_config): 126 | """ 127 | Print out loss configuration. Called in loss function automation. 128 | 129 | Argument: 130 | loss_config: A dictionary. Configuration regarding the optimization. 131 | """ 132 | for config, value in loss_config.items(): 133 | print(f"{config} = {value}") 134 | -------------------------------------------------------------------------------- /networks/GroupNormalization.py: -------------------------------------------------------------------------------- 1 | from keras.engine import Layer, InputSpec 2 | from keras import initializers, regularizers 3 | from keras import backend as K 4 | from keras.utils import conv_utils 5 | 6 | try: 7 | from keras.utils.conv_utils import normalize_data_format 8 | except: 9 | from keras.backend.common import normalize_data_format 10 | 11 | def to_list(x): 12 | if type(x) not in [list, tuple]: 13 | return [x] 14 | else: 15 | return list(x) 16 | 17 | class GroupNormalization(Layer): 18 | def __init__(self, axis=-1, 19 | gamma_init='one', beta_init='zero', 20 | gamma_regularizer=None, beta_regularizer=None, 21 | epsilon=1e-6, 22 | group=32, 23 | data_format=None, 24 | **kwargs): 25 | super(GroupNormalization, self).__init__(**kwargs) 26 | 27 | self.axis = to_list(axis) 28 | self.gamma_init = initializers.get(gamma_init) 29 | self.beta_init = initializers.get(beta_init) 30 | self.gamma_regularizer = regularizers.get(gamma_regularizer) 31 | self.beta_regularizer = regularizers.get(beta_regularizer) 32 | self.epsilon = epsilon 33 | self.group = group 34 | self.data_format = normalize_data_format(data_format) 35 | 36 | self.supports_masking = True 37 | 38 | def build(self, input_shape): 39 | self.input_spec = [InputSpec(shape=input_shape)] 40 | shape = [1 for _ in input_shape] 41 | if self.data_format == 'channels_last': 42 | channel_axis = -1 43 | shape[channel_axis] = input_shape[channel_axis] 44 | elif self.data_format == 'channels_first': 45 | channel_axis = 1 46 | shape[channel_axis] = input_shape[channel_axis] 47 | #for i in self.axis: 48 | # shape[i] = input_shape[i] 49 | self.gamma = self.add_weight(shape=shape, 50 | initializer=self.gamma_init, 51 | regularizer=self.gamma_regularizer, 52 | name='gamma') 53 | self.beta = self.add_weight(shape=shape, 54 | initializer=self.beta_init, 55 | regularizer=self.beta_regularizer, 56 | name='beta') 57 | self.built = True 58 | 59 | def call(self, inputs, mask=None): 60 | input_shape = K.int_shape(inputs) 61 | if len(input_shape) != 4 and len(input_shape) != 2: 62 | raise ValueError('Inputs should have rank ' + 63 | str(4) + " or " + str(2) + 64 | '; Received input shape:', str(input_shape)) 65 | 66 | if len(input_shape) == 4: 67 | if self.data_format == 'channels_last': 68 | batch_size, h, w, c = input_shape 69 | if batch_size is None: 70 | batch_size = -1 71 | 72 | if c < self.group: 73 | raise ValueError('Input channels should be larger than group size' + 74 | '; Received input channels: ' + str(c) + 75 | '; Group size: ' + str(self.group) 76 | ) 77 | 78 | x = K.reshape(inputs, (batch_size, h, w, self.group, c // self.group)) 79 | mean = K.mean(x, axis=[1, 2, 4], keepdims=True) 80 | std = K.sqrt(K.var(x, axis=[1, 2, 4], keepdims=True) + self.epsilon) 81 | x = (x - mean) / std 82 | 83 | x = K.reshape(x, (batch_size, h, w, c)) 84 | return self.gamma * x + self.beta 85 | elif self.data_format == 'channels_first': 86 | batch_size, c, h, w = input_shape 87 | if batch_size is None: 88 | batch_size = -1 89 | 90 | if c < self.group: 91 | raise ValueError('Input channels should be larger than group size' + 92 | '; Received input channels: ' + str(c) + 93 | '; Group size: ' + str(self.group) 94 | ) 95 | 96 | x = K.reshape(inputs, (batch_size, self.group, c // self.group, h, w)) 97 | mean = K.mean(x, axis=[2, 3, 4], keepdims=True) 98 | std = K.sqrt(K.var(x, axis=[2, 3, 4], keepdims=True) + self.epsilon) 99 | x = (x - mean) / std 100 | 101 | x = K.reshape(x, (batch_size, c, h, w)) 102 | return self.gamma * x + self.beta 103 | 104 | elif len(input_shape) == 2: 105 | reduction_axes = list(range(0, len(input_shape))) 106 | del reduction_axes[0] 107 | batch_size, _ = input_shape 108 | if batch_size is None: 109 | batch_size = -1 110 | 111 | mean = K.mean(inputs, keepdims=True) 112 | std = K.sqrt(K.var(inputs, keepdims=True) + self.epsilon) 113 | x = (inputs - mean) / std 114 | 115 | return self.gamma * x + self.beta 116 | 117 | 118 | def get_config(self): 119 | config = {'epsilon': self.epsilon, 120 | 'axis': self.axis, 121 | 'gamma_init': initializers.serialize(self.gamma_init), 122 | 'beta_init': initializers.serialize(self.beta_init), 123 | 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), 124 | 'beta_regularizer': regularizers.serialize(self.gamma_regularizer), 125 | 'group': self.group 126 | } 127 | base_config = super(GroupNormalization, self).get_config() 128 | return dict(list(base_config.items()) + list(config.items())) 129 | -------------------------------------------------------------------------------- /detector/face_detector.py: -------------------------------------------------------------------------------- 1 | import mtcnn_detect_face 2 | import tensorflow as tf 3 | from keras import backend as K 4 | import numpy as np 5 | import cv2 6 | import os 7 | 8 | class MTCNNFaceDetector(): 9 | """ 10 | This class load the MTCNN network and perform face detection. 11 | 12 | Attributes: 13 | model_path: path to the MTCNN weights files 14 | """ 15 | def __init__(self, sess, model_path="./mtcnn_weights/"): 16 | self.pnet = None 17 | self.rnet = None 18 | self.onet = None 19 | self.create_mtcnn(sess, model_path) 20 | 21 | def create_mtcnn(self, sess, model_path): 22 | if not model_path: 23 | model_path, _ = os.path.split(os.path.realpath(__file__)) 24 | 25 | with tf.variable_scope('pnet'): 26 | data = tf.placeholder(tf.float32, (None,None,None,3), 'input') 27 | pnet = mtcnn_detect_face.PNet({'data':data}) 28 | pnet.load(os.path.join(model_path, 'det1.npy'), sess) 29 | with tf.variable_scope('rnet'): 30 | data = tf.placeholder(tf.float32, (None,24,24,3), 'input') 31 | rnet = mtcnn_detect_face.RNet({'data':data}) 32 | rnet.load(os.path.join(model_path, 'det2.npy'), sess) 33 | with tf.variable_scope('onet'): 34 | data = tf.placeholder(tf.float32, (None,48,48,3), 'input') 35 | onet = mtcnn_detect_face.ONet({'data':data}) 36 | onet.load(os.path.join(model_path, 'det3.npy'), sess) 37 | self.pnet = K.function([pnet.layers['data']], [pnet.layers['conv4-2'], pnet.layers['prob1']]) 38 | self.rnet = K.function([rnet.layers['data']], [rnet.layers['conv5-2'], rnet.layers['prob1']]) 39 | self.onet = K.function([onet.layers['data']], [onet.layers['conv6-2'], onet.layers['conv6-3'], onet.layers['prob1']]) 40 | 41 | def detect_face(self, image, minsize=20, threshold=0.7, factor=0.709, use_auto_downscaling=True, min_face_area=25*25): 42 | if use_auto_downscaling: 43 | image, scale_factor = self.auto_downscale(image) 44 | 45 | faces, pnts = mtcnn_detect_face.detect_face( 46 | image, minsize, 47 | self.pnet, self.rnet, self.onet, 48 | [0.6, 0.7, threshold], 49 | factor) 50 | faces = self.process_mtcnn_bbox(faces, image.shape) 51 | faces, pnts = self.remove_small_faces(faces, pnts, min_face_area) 52 | 53 | if use_auto_downscaling: 54 | faces = self.calibrate_coord(faces, scale_factor) 55 | pnts = self.calibrate_landmarks(pnts, scale_factor) 56 | return faces, pnts 57 | 58 | def auto_downscale(self, image): 59 | if self.is_higher_than_1080p(image): 60 | scale_factor = 4 61 | resized_image = cv2.resize(image, 62 | (image.shape[1]//scale_factor, 63 | image.shape[0]//scale_factor)) 64 | elif self.is_higher_than_720p(image): 65 | scale_factor = 3 66 | resized_image = cv2.resize(image, 67 | (image.shape[1]//scale_factor, 68 | image.shape[0]//scale_factor)) 69 | elif self.is_higher_than_480p(image): 70 | scale_factor = 2 71 | resized_image = cv2.resize(image, 72 | (image.shape[1]//scale_factor, 73 | image.shape[0]//scale_factor)) 74 | else: 75 | scale_factor = 1 76 | resized_image = image.copy() 77 | return resized_image, scale_factor 78 | 79 | @staticmethod 80 | def is_higher_than_480p(x): 81 | return (x.shape[0] * x.shape[1]) >= (858*480) 82 | 83 | @staticmethod 84 | def is_higher_than_720p(x): 85 | return (x.shape[0] * x.shape[1]) >= (1280*720) 86 | 87 | @staticmethod 88 | def is_higher_than_1080p(x): 89 | return (x.shape[0] * x.shape[1]) >= (1920*1080) 90 | 91 | @staticmethod 92 | def process_mtcnn_bbox(bboxes, im_shape): 93 | # output bbox coordinate of MTCNN is (y0, x0, y1, x1) 94 | # Here we process the bbox coord. to a square bbox with ordering (x0, y1, x1, y0) 95 | for i, bbox in enumerate(bboxes): 96 | y0, x0, y1, x1 = bboxes[i,0:4] 97 | w = int(y1 - y0) 98 | h = int(x1 - x0) 99 | length = (w + h)/2 100 | center = (int((x1+x0)/2),int((y1+y0)/2)) 101 | new_x0 = np.max([0, (center[0]-length//2)])#.astype(np.int32) 102 | new_x1 = np.min([im_shape[0], (center[0]+length//2)])#.astype(np.int32) 103 | new_y0 = np.max([0, (center[1]-length//2)])#.astype(np.int32) 104 | new_y1 = np.min([im_shape[1], (center[1]+length//2)])#.astype(np.int32) 105 | bboxes[i,0:4] = new_x0, new_y1, new_x1, new_y0 106 | return bboxes 107 | 108 | @staticmethod 109 | def calibrate_coord(faces, scale_factor): 110 | for i, (x0, y1, x1, y0, _) in enumerate(faces): 111 | faces[i] = (x0*scale_factor, y1*scale_factor, 112 | x1*scale_factor, y0*scale_factor, _) 113 | return faces 114 | 115 | @staticmethod 116 | def calibrate_landmarks(pnts, scale_factor): 117 | # pnts is a numpy array 118 | return np.array([xy * scale_factor for xy in pnts]) 119 | 120 | @staticmethod 121 | def remove_small_faces(faces, pnts, min_area=25*25): 122 | def compute_area(face_coord): 123 | x0, y1, x1, y0, _ = face_coord 124 | area = np.abs((x1 - x0) * (y1 - y0)) 125 | return area 126 | 127 | new_faces = [] 128 | new_pnts = [] 129 | # faces has shape (num_faces, coord), and pnts has shape (coord, num_faces) 130 | for face,pnt in zip(faces, pnts.transpose()): 131 | if compute_area(face) >= min_area: 132 | new_faces.append(face) 133 | new_pnts.append(pnt) 134 | new_faces = np.array(new_faces) 135 | new_pnts = np.array(new_pnts).transpose() 136 | return new_faces, new_pnts -------------------------------------------------------------------------------- /converter/face_transformer.py: -------------------------------------------------------------------------------- 1 | from .color_correction import * 2 | import cv2 3 | import numpy as np 4 | 5 | 6 | class FaceTransformer(object): 7 | """ 8 | Attributes: 9 | path_func: string, direction for the transformation: either AtoB or BtoA. 10 | model: the generator of the faceswap-GAN model 11 | """ 12 | def __init__(self): 13 | self.path_func = None 14 | self.model = None 15 | 16 | self.inp_img = None 17 | self.input_size = None 18 | self.img_bgr = None 19 | self.roi = None 20 | self.roi_size = None 21 | self.ae_input = None 22 | self.ae_output = None 23 | self.ae_output_masked = None 24 | self.ae_output_bgr = None 25 | self.ae_output_a = None 26 | self.result = None 27 | self.result_rawRGB = None 28 | self.result_alpha = None 29 | 30 | def set_model(self, model): 31 | self.model = model 32 | 33 | def _preprocess_inp_img(self, inp_img, roi_coverage, IMAGE_SHAPE): 34 | img_bgr = cv2.cvtColor(inp_img, cv2.COLOR_RGB2BGR) 35 | input_size = img_bgr.shape 36 | roi_x, roi_y = int(input_size[0]*(1-roi_coverage)), int(input_size[1]*(1-roi_coverage)) 37 | roi = img_bgr[roi_x:-roi_x, roi_y:-roi_y,:] # BGR, [0, 255] 38 | roi_size = roi.shape 39 | ae_input = cv2.resize(roi, IMAGE_SHAPE[:2])/255. * 2 - 1 # BGR, [-1, 1] 40 | self.img_bgr = img_bgr 41 | self.input_size = input_size 42 | self.roi = roi 43 | self.roi_size = roi_size 44 | self.ae_input = ae_input 45 | 46 | def _ae_forward_pass(self, ae_input): 47 | ae_out = self.path_func([[ae_input]]) 48 | self.ae_output = np.squeeze(np.array([ae_out])) 49 | 50 | def _postprocess_roi_img(self, ae_output, roi, roi_size, color_correction): 51 | ae_output_a = ae_output[:,:,0] * 255 52 | ae_output_a = cv2.resize(ae_output_a, (roi_size[1],roi_size[0]))[...,np.newaxis] 53 | ae_output_bgr = np.clip( (ae_output[:,:,1:] + 1) * 255 / 2, 0, 255) 54 | ae_output_bgr = cv2.resize(ae_output_bgr, (roi_size[1],roi_size[0])) 55 | ae_output_masked = (ae_output_a/255 * ae_output_bgr + (1 - ae_output_a/255) * roi).astype('uint8') # BGR, [0, 255] 56 | self.ae_output_a = ae_output_a 57 | if color_correction == "adain": 58 | self.ae_output_masked = adain(ae_output_masked, roi) 59 | self.ae_output_bgr = adain(ae_output_bgr, roi) 60 | elif color_correction == "adain_xyz": 61 | self.ae_output_masked = adain(ae_output_masked, roi, color_space="XYZ") 62 | self.ae_output_bgr = adain(ae_output_bgr, roi, color_space="XYZ") 63 | elif color_correction == "hist_match": 64 | self.ae_output_masked = color_hist_match(ae_output_masked, roi) 65 | self.ae_output_bgr = color_hist_match(ae_output_bgr, roi) 66 | else: 67 | self.ae_output_masked = ae_output_masked 68 | self.ae_output_bgr = ae_output_bgr 69 | 70 | def _merge_img_and_mask(self, ae_output_bgr, ae_output_masked, input_size, roi, roi_coverage): 71 | blend_mask = self.get_feather_edges_mask(roi, roi_coverage) 72 | blended_img = blend_mask/255 * ae_output_masked + (1-blend_mask/255) * roi 73 | result = self.img_bgr.copy() 74 | roi_x, roi_y = int(input_size[0]*(1-roi_coverage)), int(input_size[1]*(1-roi_coverage)) 75 | result[roi_x:-roi_x, roi_y:-roi_y,:] = blended_img 76 | result_rawRGB = self.img_bgr.copy() 77 | result_rawRGB[roi_x:-roi_x, roi_y:-roi_y,:] = ae_output_bgr 78 | result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB) 79 | result_rawRGB = cv2.cvtColor(result_rawRGB, cv2.COLOR_BGR2RGB) 80 | result_alpha = np.zeros_like(self.img_bgr) 81 | result_alpha[roi_x:-roi_x, roi_y:-roi_y,:] = (blend_mask/255) * self.ae_output_a 82 | self.result = result 83 | self.result_rawRGB = result_rawRGB 84 | self.result_alpha = result_alpha 85 | 86 | @staticmethod 87 | def get_feather_edges_mask(img, roi_coverage): 88 | img_size = img.shape 89 | mask = np.zeros_like(img) 90 | roi_x, roi_y = int(img_size[0]*(1-roi_coverage)), int(img_size[1]*(1-roi_coverage)) 91 | mask[roi_x:-roi_x, roi_y:-roi_y,:] = 255 92 | mask = cv2.GaussianBlur(mask,(15,15),10) 93 | return mask 94 | 95 | def transform(self, inp_img, direction, roi_coverage, color_correction, IMAGE_SHAPE): 96 | self.check_generator_model(self.model) 97 | self.check_roi_coverage(inp_img, roi_coverage) 98 | 99 | if direction == "AtoB": 100 | self.path_func = self.model.path_abgr_B 101 | elif direction == "BtoA": 102 | self.path_func = self.model.path_abgr_A 103 | else: 104 | raise ValueError(f"direction should be either AtoB or BtoA, recieved {direction}.") 105 | 106 | self.inp_img = inp_img 107 | 108 | # pre-process input image 109 | # Set 5 members: self.img_bgr, self.input_size, self.roi, self.roi_size, self.ae_input 110 | self._preprocess_inp_img(self.inp_img, roi_coverage, IMAGE_SHAPE) 111 | 112 | # model inference 113 | # Set 1 member: self.ae_output 114 | self._ae_forward_pass(self.ae_input) 115 | 116 | # post-process transformed roi image 117 | # Set 3 members: self.ae_output_a, self.ae_output_masked, self.ae_output_bgr 118 | self._postprocess_roi_img(self.ae_output, self.roi, self.roi_size, color_correction) 119 | 120 | # merge transformed output back to input image 121 | # Set 3 members: self.result, self.result_rawRGB, self.result_alpha 122 | self._merge_img_and_mask(self.ae_output_bgr, self.ae_output_masked, 123 | self.input_size, self.roi, roi_coverage) 124 | 125 | return self.result, self.result_rawRGB, self.result_alpha 126 | 127 | @staticmethod 128 | def check_generator_model(model): 129 | if model is None: 130 | raise ValueError(f"Generator model has not been set.") 131 | 132 | @staticmethod 133 | def check_roi_coverage(inp_img, roi_coverage): 134 | input_size = inp_img.shape 135 | roi_x, roi_y = int(input_size[0]*(1-roi_coverage)), int(input_size[1]*(1-roi_coverage)) 136 | if roi_x == 0 or roi_y == 0: 137 | raise ValueError("Error occurs when cropping roi image. \ 138 | Consider increasing min_face_area or decreasing roi_coverage.") 139 | -------------------------------------------------------------------------------- /legacy/instance_normalization.py: -------------------------------------------------------------------------------- 1 | from keras.engine import Layer, InputSpec 2 | from keras import initializers, regularizers, constraints 3 | from keras import backend as K 4 | from keras.utils.generic_utils import get_custom_objects 5 | 6 | import numpy as np 7 | 8 | 9 | class InstanceNormalization(Layer): 10 | """Instance normalization layer (Lei Ba et al, 2016, Ulyanov et al., 2016). 11 | Normalize the activations of the previous layer at each step, 12 | i.e. applies a transformation that maintains the mean activation 13 | close to 0 and the activation standard deviation close to 1. 14 | # Arguments 15 | axis: Integer, the axis that should be normalized 16 | (typically the features axis). 17 | For instance, after a `Conv2D` layer with 18 | `data_format="channels_first"`, 19 | set `axis=1` in `InstanceNormalization`. 20 | Setting `axis=None` will normalize all values in each instance of the batch. 21 | Axis 0 is the batch dimension. `axis` cannot be set to 0 to avoid errors. 22 | epsilon: Small float added to variance to avoid dividing by zero. 23 | center: If True, add offset of `beta` to normalized tensor. 24 | If False, `beta` is ignored. 25 | scale: If True, multiply by `gamma`. 26 | If False, `gamma` is not used. 27 | When the next layer is linear (also e.g. `nn.relu`), 28 | this can be disabled since the scaling 29 | will be done by the next layer. 30 | beta_initializer: Initializer for the beta weight. 31 | gamma_initializer: Initializer for the gamma weight. 32 | beta_regularizer: Optional regularizer for the beta weight. 33 | gamma_regularizer: Optional regularizer for the gamma weight. 34 | beta_constraint: Optional constraint for the beta weight. 35 | gamma_constraint: Optional constraint for the gamma weight. 36 | # Input shape 37 | Arbitrary. Use the keyword argument `input_shape` 38 | (tuple of integers, does not include the samples axis) 39 | when using this layer as the first layer in a model. 40 | # Output shape 41 | Same shape as input. 42 | # References 43 | - [Layer Normalization](https://arxiv.org/abs/1607.06450) 44 | - [Instance Normalization: The Missing Ingredient for Fast Stylization](https://arxiv.org/abs/1607.08022) 45 | """ 46 | def __init__(self, 47 | axis=None, 48 | epsilon=1e-3, 49 | center=True, 50 | scale=True, 51 | beta_initializer='zeros', 52 | gamma_initializer='ones', 53 | beta_regularizer=None, 54 | gamma_regularizer=None, 55 | beta_constraint=None, 56 | gamma_constraint=None, 57 | **kwargs): 58 | super(InstanceNormalization, self).__init__(**kwargs) 59 | self.supports_masking = True 60 | self.axis = axis 61 | self.epsilon = epsilon 62 | self.center = center 63 | self.scale = scale 64 | self.beta_initializer = initializers.get(beta_initializer) 65 | self.gamma_initializer = initializers.get(gamma_initializer) 66 | self.beta_regularizer = regularizers.get(beta_regularizer) 67 | self.gamma_regularizer = regularizers.get(gamma_regularizer) 68 | self.beta_constraint = constraints.get(beta_constraint) 69 | self.gamma_constraint = constraints.get(gamma_constraint) 70 | 71 | def build(self, input_shape): 72 | ndim = len(input_shape) 73 | if self.axis == 0: 74 | raise ValueError('Axis cannot be zero') 75 | 76 | if (self.axis is not None) and (ndim == 2): 77 | raise ValueError('Cannot specify axis for rank 1 tensor') 78 | 79 | self.input_spec = InputSpec(ndim=ndim) 80 | 81 | if self.axis is None: 82 | shape = (1,) 83 | else: 84 | shape = (input_shape[self.axis],) 85 | 86 | if self.scale: 87 | self.gamma = self.add_weight(shape=shape, 88 | name='gamma', 89 | initializer=self.gamma_initializer, 90 | regularizer=self.gamma_regularizer, 91 | constraint=self.gamma_constraint) 92 | else: 93 | self.gamma = None 94 | if self.center: 95 | self.beta = self.add_weight(shape=shape, 96 | name='beta', 97 | initializer=self.beta_initializer, 98 | regularizer=self.beta_regularizer, 99 | constraint=self.beta_constraint) 100 | else: 101 | self.beta = None 102 | self.built = True 103 | 104 | def call(self, inputs, training=None): 105 | input_shape = K.int_shape(inputs) 106 | reduction_axes = list(range(0, len(input_shape))) 107 | 108 | if (self.axis is not None): 109 | del reduction_axes[self.axis] 110 | 111 | del reduction_axes[0] 112 | 113 | mean = K.mean(inputs, reduction_axes, keepdims=True) 114 | stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon 115 | normed = (inputs - mean) / stddev 116 | 117 | broadcast_shape = [1] * len(input_shape) 118 | if self.axis is not None: 119 | broadcast_shape[self.axis] = input_shape[self.axis] 120 | 121 | if self.scale: 122 | broadcast_gamma = K.reshape(self.gamma, broadcast_shape) 123 | normed = normed * broadcast_gamma 124 | if self.center: 125 | broadcast_beta = K.reshape(self.beta, broadcast_shape) 126 | normed = normed + broadcast_beta 127 | return normed 128 | 129 | def get_config(self): 130 | config = { 131 | 'axis': self.axis, 132 | 'epsilon': self.epsilon, 133 | 'center': self.center, 134 | 'scale': self.scale, 135 | 'beta_initializer': initializers.serialize(self.beta_initializer), 136 | 'gamma_initializer': initializers.serialize(self.gamma_initializer), 137 | 'beta_regularizer': regularizers.serialize(self.beta_regularizer), 138 | 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), 139 | 'beta_constraint': constraints.serialize(self.beta_constraint), 140 | 'gamma_constraint': constraints.serialize(self.gamma_constraint) 141 | } 142 | base_config = super(InstanceNormalization, self).get_config() 143 | return dict(list(base_config.items()) + list(config.items())) 144 | 145 | get_custom_objects().update({'InstanceNormalization': InstanceNormalization}) 146 | -------------------------------------------------------------------------------- /networks/instance_normalization.py: -------------------------------------------------------------------------------- 1 | from keras.engine import Layer, InputSpec 2 | from keras import initializers, regularizers, constraints 3 | from keras import backend as K 4 | from keras.utils.generic_utils import get_custom_objects 5 | 6 | import numpy as np 7 | 8 | 9 | class InstanceNormalization(Layer): 10 | """Instance normalization layer (Lei Ba et al, 2016, Ulyanov et al., 2016). 11 | Normalize the activations of the previous layer at each step, 12 | i.e. applies a transformation that maintains the mean activation 13 | close to 0 and the activation standard deviation close to 1. 14 | # Arguments 15 | axis: Integer, the axis that should be normalized 16 | (typically the features axis). 17 | For instance, after a `Conv2D` layer with 18 | `data_format="channels_first"`, 19 | set `axis=1` in `InstanceNormalization`. 20 | Setting `axis=None` will normalize all values in each instance of the batch. 21 | Axis 0 is the batch dimension. `axis` cannot be set to 0 to avoid errors. 22 | epsilon: Small float added to variance to avoid dividing by zero. 23 | center: If True, add offset of `beta` to normalized tensor. 24 | If False, `beta` is ignored. 25 | scale: If True, multiply by `gamma`. 26 | If False, `gamma` is not used. 27 | When the next layer is linear (also e.g. `nn.relu`), 28 | this can be disabled since the scaling 29 | will be done by the next layer. 30 | beta_initializer: Initializer for the beta weight. 31 | gamma_initializer: Initializer for the gamma weight. 32 | beta_regularizer: Optional regularizer for the beta weight. 33 | gamma_regularizer: Optional regularizer for the gamma weight. 34 | beta_constraint: Optional constraint for the beta weight. 35 | gamma_constraint: Optional constraint for the gamma weight. 36 | # Input shape 37 | Arbitrary. Use the keyword argument `input_shape` 38 | (tuple of integers, does not include the samples axis) 39 | when using this layer as the first layer in a model. 40 | # Output shape 41 | Same shape as input. 42 | # References 43 | - [Layer Normalization](https://arxiv.org/abs/1607.06450) 44 | - [Instance Normalization: The Missing Ingredient for Fast Stylization](https://arxiv.org/abs/1607.08022) 45 | """ 46 | def __init__(self, 47 | axis=None, 48 | epsilon=1e-3, 49 | center=True, 50 | scale=True, 51 | beta_initializer='zeros', 52 | gamma_initializer='ones', 53 | beta_regularizer=None, 54 | gamma_regularizer=None, 55 | beta_constraint=None, 56 | gamma_constraint=None, 57 | **kwargs): 58 | super(InstanceNormalization, self).__init__(**kwargs) 59 | self.supports_masking = True 60 | self.axis = axis 61 | self.epsilon = epsilon 62 | self.center = center 63 | self.scale = scale 64 | self.beta_initializer = initializers.get(beta_initializer) 65 | self.gamma_initializer = initializers.get(gamma_initializer) 66 | self.beta_regularizer = regularizers.get(beta_regularizer) 67 | self.gamma_regularizer = regularizers.get(gamma_regularizer) 68 | self.beta_constraint = constraints.get(beta_constraint) 69 | self.gamma_constraint = constraints.get(gamma_constraint) 70 | 71 | def build(self, input_shape): 72 | ndim = len(input_shape) 73 | if self.axis == 0: 74 | raise ValueError('Axis cannot be zero') 75 | 76 | if (self.axis is not None) and (ndim == 2): 77 | raise ValueError('Cannot specify axis for rank 1 tensor') 78 | 79 | self.input_spec = InputSpec(ndim=ndim) 80 | 81 | if self.axis is None: 82 | shape = (1,) 83 | else: 84 | shape = (input_shape[self.axis],) 85 | 86 | if self.scale: 87 | self.gamma = self.add_weight(shape=shape, 88 | name='gamma', 89 | initializer=self.gamma_initializer, 90 | regularizer=self.gamma_regularizer, 91 | constraint=self.gamma_constraint) 92 | else: 93 | self.gamma = None 94 | if self.center: 95 | self.beta = self.add_weight(shape=shape, 96 | name='beta', 97 | initializer=self.beta_initializer, 98 | regularizer=self.beta_regularizer, 99 | constraint=self.beta_constraint) 100 | else: 101 | self.beta = None 102 | self.built = True 103 | 104 | def call(self, inputs, training=None): 105 | input_shape = K.int_shape(inputs) 106 | reduction_axes = list(range(0, len(input_shape))) 107 | 108 | if (self.axis is not None): 109 | del reduction_axes[self.axis] 110 | 111 | del reduction_axes[0] 112 | 113 | mean = K.mean(inputs, reduction_axes, keepdims=True) 114 | stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon 115 | normed = (inputs - mean) / stddev 116 | 117 | broadcast_shape = [1] * len(input_shape) 118 | if self.axis is not None: 119 | broadcast_shape[self.axis] = input_shape[self.axis] 120 | 121 | if self.scale: 122 | broadcast_gamma = K.reshape(self.gamma, broadcast_shape) 123 | normed = normed * broadcast_gamma 124 | if self.center: 125 | broadcast_beta = K.reshape(self.beta, broadcast_shape) 126 | normed = normed + broadcast_beta 127 | return normed 128 | 129 | def get_config(self): 130 | config = { 131 | 'axis': self.axis, 132 | 'epsilon': self.epsilon, 133 | 'center': self.center, 134 | 'scale': self.scale, 135 | 'beta_initializer': initializers.serialize(self.beta_initializer), 136 | 'gamma_initializer': initializers.serialize(self.gamma_initializer), 137 | 'beta_regularizer': regularizers.serialize(self.beta_regularizer), 138 | 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), 139 | 'beta_constraint': constraints.serialize(self.beta_constraint), 140 | 'gamma_constraint': constraints.serialize(self.gamma_constraint) 141 | } 142 | base_config = super(InstanceNormalization, self).get_config() 143 | return dict(list(base_config.items()) + list(config.items())) 144 | 145 | get_custom_objects().update({'InstanceNormalization': InstanceNormalization}) 146 | -------------------------------------------------------------------------------- /legacy/FCN8s_keras.py: -------------------------------------------------------------------------------- 1 | from keras.models import Sequential, Model 2 | from keras.layers import * 3 | from keras.layers.advanced_activations import LeakyReLU 4 | from keras.activations import relu 5 | from keras.initializers import RandomNormal 6 | from keras.applications import * 7 | import keras.backend as K 8 | 9 | def FCN(num_output=21, input_shape=(500, 500, 3)): 10 | """Instantiate the FCN8s architecture with keras. 11 | # Arguments 12 | basenet: type of basene {'vgg16'} 13 | trainable_base: Bool whether the basenet weights are trainable 14 | num_output: number of classes 15 | input_shape: input image shape 16 | weights: pre-trained weights to load (None for training from scratch) 17 | # Returns 18 | A Keras model instance 19 | """ 20 | ROW_AXIS = 1 21 | COL_AXIS = 2 22 | CHANNEL_AXIS = 3 23 | 24 | def _crop(target_layer, offset=(None, None), name=None): 25 | """Crop the bottom such that it has the same shape as target_layer.""" 26 | """ Use _keras_shape to prevent undefined output shape in Conv2DTranspose""" 27 | def f(x): 28 | width = x._keras_shape[ROW_AXIS] 29 | height = x._keras_shape[COL_AXIS] 30 | target_width = target_layer._keras_shape[ROW_AXIS] 31 | target_height = target_layer._keras_shape[COL_AXIS] 32 | cropped = Cropping2D(cropping=((offset[0], width - offset[0] - target_width), (offset[1], height - offset[1] - target_height)), name='{}'.format(name))(x) 33 | return cropped 34 | return f 35 | 36 | input_tensor = Input(shape=input_shape) 37 | pad1 = ZeroPadding2D(padding=(100, 100))(input_tensor) 38 | conv1_1 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', 39 | padding='valid', name='conv1_1')(pad1) 40 | conv1_2 = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', 41 | padding='same', name='conv1_2')(conv1_1) 42 | pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), 43 | padding='same', name='pool1')(conv1_2) 44 | # Block 2 45 | conv2_1 = Conv2D(filters=128, kernel_size=(3, 3), 46 | activation='relu', 47 | padding='same', name='conv2_1')(pool1) 48 | conv2_2 = Conv2D(filters=128, kernel_size=(3, 3), activation='relu', 49 | padding='same', name='conv2_2')(conv2_1) 50 | pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), 51 | padding='same', name='pool2')(conv2_2) 52 | # Block 3 53 | conv3_1 = Conv2D(filters=256, kernel_size=(3, 3), activation='relu', 54 | padding='same', name='conv3_1')(pool2) 55 | conv3_2 = Conv2D(filters=256, kernel_size=(3, 3), activation='relu', 56 | padding='same', name='conv3_2')(conv3_1) 57 | conv3_3 = Conv2D(filters=256, kernel_size=(3, 3), activation='relu', 58 | padding='same', name='conv3_3')(conv3_2) 59 | pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), 60 | padding='same', name='pool3')(conv3_3) 61 | # Block 4 62 | conv4_1 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu', 63 | padding='same', name='conv4_1')(pool3) 64 | conv4_2 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu', 65 | padding='same', name='conv4_2')(conv4_1) 66 | conv4_3 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu', 67 | padding='same', name='conv4_3')(conv4_2) 68 | pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), 69 | padding='same', name='pool4')(conv4_3) 70 | # Block 5 71 | conv5_1 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu', 72 | padding='same', name='conv5_1')(pool4) 73 | conv5_2 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu', 74 | padding='same', name='conv5_2')(conv5_1) 75 | conv5_3 = Conv2D(filters=512, kernel_size=(3, 3), activation='relu', 76 | padding='same', name='conv5_3')(conv5_2) 77 | pool5 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), 78 | padding='same', name='pool5')(conv5_3) 79 | # fully conv 80 | fc6 = Conv2D(filters=4096, kernel_size=(7, 7), 81 | activation='relu', padding='valid', 82 | name='fc6')(pool5) 83 | drop6 = Dropout(0.5)(fc6) 84 | fc7 = Conv2D(filters=4096, kernel_size=(1, 1), 85 | activation='relu', padding='valid', 86 | name='fc7')(drop6) 87 | drop7 = Dropout(0.5)(fc7) 88 | 89 | #basenet = VGG16_basenet() 90 | # input 91 | #input_tensor = Input(shape=input_shape) 92 | # Get skip_layers=[drop7, pool4, pool3] from the base net: VGG16 93 | #skip_layers = VGG16_basenet(input_tensor) 94 | 95 | #drop7 = skip_layers[0] 96 | score_fr = Conv2D(filters=num_output, kernel_size=(1, 1), padding='valid', name='score_fr')(drop7) 97 | upscore2 = Conv2DTranspose(num_output, kernel_size=4, strides=2, use_bias=False, name='upscore2')(score_fr) 98 | 99 | # scale pool4 skip for compatibility 100 | #pool4 = skip_layers[1] 101 | scale_pool4 = Lambda(lambda x: x * 0.01, name='scale_pool4')(pool4) 102 | score_pool4 = Conv2D(filters=num_output, kernel_size=(1, 1), 103 | padding='valid', name='score_pool4')(scale_pool4) 104 | score_pool4c = _crop(upscore2, offset=(5, 5), 105 | name='score_pool4c')(score_pool4) 106 | fuse_pool4 = add([upscore2, score_pool4c]) 107 | upscore_pool4 = Conv2DTranspose(filters=num_output, kernel_size=(4, 4), 108 | strides=(2, 2), padding='valid', 109 | use_bias=False, 110 | data_format=K.image_data_format(), 111 | name='upscore_pool4')(fuse_pool4) 112 | # scale pool3 skip for compatibility 113 | #pool3 = skip_layers[2] 114 | scale_pool3 = Lambda(lambda x: x * 0.0001, name='scale_pool3')(pool3) 115 | score_pool3 = Conv2D(filters=num_output, kernel_size=(1, 1), 116 | padding='valid', name='score_pool3')(scale_pool3) 117 | score_pool3c = _crop(upscore_pool4, offset=(9, 9), 118 | name='score_pool3c')(score_pool3) 119 | fuse_pool3 = add([upscore_pool4, score_pool3c]) 120 | # score 121 | upscore8 = Conv2DTranspose(filters=num_output, kernel_size=(16, 16), 122 | strides=(8, 8), padding='valid', 123 | use_bias=False, 124 | data_format=K.image_data_format(), 125 | name='upscore8')(fuse_pool3) 126 | score = _crop(input_tensor, offset=(31, 31), name='score')(upscore8) 127 | 128 | # model 129 | model = Model(input_tensor, score, name='fcn_vgg16') 130 | 131 | return model -------------------------------------------------------------------------------- /networks/losses.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Lambda, concatenate 2 | from tensorflow.contrib.distributions import Beta 3 | from .instance_normalization import InstanceNormalization 4 | import keras.backend as K 5 | import tensorflow as tf 6 | 7 | def first_order(x, axis=1): 8 | img_nrows = x.shape[1] 9 | img_ncols = x.shape[2] 10 | if axis == 1: 11 | return K.abs(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) 12 | elif axis == 2: 13 | return K.abs(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) 14 | else: 15 | return None 16 | 17 | def calc_loss(pred, target, loss='l2'): 18 | if loss.lower() == "l2": 19 | return K.mean(K.square(pred - target)) 20 | elif loss.lower() == "l1": 21 | return K.mean(K.abs(pred - target)) 22 | elif loss.lower() == "cross_entropy": 23 | return -K.mean(K.log(pred + K.epsilon())*target + K.log(1 - pred + K.epsilon())*(1 - target)) 24 | else: 25 | raise ValueError(f'Recieve an unknown loss type: {loss}.') 26 | 27 | def cyclic_loss(netG1, netG2, real1): 28 | fake2 = netG2(real1)[-1] # fake2 ABGR 29 | fake2_alpha = Lambda(lambda x: x[:,:,:, :1])(fake2) # fake2 BGR 30 | fake2 = Lambda(lambda x: x[:,:,:, 1:])(fake2) # fake2 BGR 31 | cyclic1 = netG1(fake2)[-1] # cyclic1 ABGR 32 | cyclic1_alpha = Lambda(lambda x: x[:,:,:, :1])(cyclic1) # cyclic1 BGR 33 | cyclic1 = Lambda(lambda x: x[:,:,:, 1:])(cyclic1) # cyclic1 BGR 34 | loss = calc_loss(cyclic1, real1, loss='l1') 35 | loss += 0.1 * calc_loss(cyclic1_alpha, fake2_alpha, loss='l1') 36 | return loss 37 | 38 | def adversarial_loss(netD, real, fake_abgr, distorted, gan_training="mixup_LSGAN", **weights): 39 | alpha = Lambda(lambda x: x[:,:,:, :1])(fake_abgr) 40 | fake_bgr = Lambda(lambda x: x[:,:,:, 1:])(fake_abgr) 41 | fake = alpha * fake_bgr + (1-alpha) * distorted 42 | 43 | if gan_training == "mixup_LSGAN": 44 | dist = Beta(0.2, 0.2) 45 | lam = dist.sample() 46 | mixup = lam * concatenate([real, distorted]) + (1 - lam) * concatenate([fake, distorted]) 47 | pred_fake = netD(concatenate([fake, distorted])) 48 | pred_mixup = netD(mixup) 49 | loss_D = calc_loss(pred_mixup, lam * K.ones_like(pred_mixup), "l2") 50 | loss_G = weights['w_D'] * calc_loss(pred_fake, K.ones_like(pred_fake), "l2") 51 | mixup2 = lam * concatenate([real, distorted]) + (1 - lam) * concatenate([fake_bgr, distorted]) 52 | pred_fake_bgr = netD(concatenate([fake_bgr, distorted])) 53 | pred_mixup2 = netD(mixup2) 54 | loss_D += calc_loss(pred_mixup2, lam * K.ones_like(pred_mixup2), "l2") 55 | loss_G += weights['w_D'] * calc_loss(pred_fake_bgr, K.ones_like(pred_fake_bgr), "l2") 56 | elif gan_training == "relativistic_avg_LSGAN": 57 | real_pred = netD(concatenate([real, distorted])) 58 | fake_pred = netD(concatenate([fake, distorted])) 59 | loss_D = K.mean(K.square(real_pred - K.ones_like(fake_pred)))/2 60 | loss_D += K.mean(K.square(fake_pred - K.zeros_like(fake_pred)))/2 61 | loss_G = weights['w_D'] * K.mean(K.square(fake_pred - K.ones_like(fake_pred))) 62 | 63 | fake_pred2 = netD(concatenate([fake_bgr, distorted])) 64 | loss_D += K.mean(K.square(real_pred - K.mean(fake_pred2,axis=0) - K.ones_like(fake_pred2)))/2 65 | loss_D += K.mean(K.square(fake_pred2 - K.mean(real_pred,axis=0) - K.zeros_like(fake_pred2)))/2 66 | loss_G += weights['w_D'] * K.mean(K.square(real_pred - K.mean(fake_pred2,axis=0) - K.zeros_like(fake_pred2)))/2 67 | loss_G += weights['w_D'] * K.mean(K.square(fake_pred2 - K.mean(real_pred,axis=0) - K.ones_like(fake_pred2)))/2 68 | else: 69 | raise ValueError("Receive an unknown GAN training method: {gan_training}") 70 | return loss_D, loss_G 71 | 72 | def reconstruction_loss(real, fake_abgr, mask_eyes, model_outputs, **weights): 73 | alpha = Lambda(lambda x: x[:,:,:, :1])(fake_abgr) 74 | fake_bgr = Lambda(lambda x: x[:,:,:, 1:])(fake_abgr) 75 | 76 | loss_G = 0 77 | loss_G += weights['w_recon'] * calc_loss(fake_bgr, real, "l1") 78 | loss_G += weights['w_eyes'] * K.mean(K.abs(mask_eyes*(fake_bgr - real))) 79 | 80 | for out in model_outputs[:-1]: 81 | out_size = out.get_shape().as_list() 82 | resized_real = tf.image.resize_images(real, out_size[1:3]) 83 | loss_G += weights['w_recon'] * calc_loss(out, resized_real, "l1") 84 | return loss_G 85 | 86 | def edge_loss(real, fake_abgr, mask_eyes, **weights): 87 | alpha = Lambda(lambda x: x[:,:,:, :1])(fake_abgr) 88 | fake_bgr = Lambda(lambda x: x[:,:,:, 1:])(fake_abgr) 89 | 90 | loss_G = 0 91 | loss_G += weights['w_edge'] * calc_loss(first_order(fake_bgr, axis=1), first_order(real, axis=1), "l1") 92 | loss_G += weights['w_edge'] * calc_loss(first_order(fake_bgr, axis=2), first_order(real, axis=2), "l1") 93 | shape_mask_eyes = mask_eyes.get_shape().as_list() 94 | resized_mask_eyes = tf.image.resize_images(mask_eyes, [shape_mask_eyes[1]-1, shape_mask_eyes[2]-1]) 95 | loss_G += weights['w_eyes'] * K.mean(K.abs(resized_mask_eyes * \ 96 | (first_order(fake_bgr, axis=1) - first_order(real, axis=1)))) 97 | loss_G += weights['w_eyes'] * K.mean(K.abs(resized_mask_eyes * \ 98 | (first_order(fake_bgr, axis=2) - first_order(real, axis=2)))) 99 | return loss_G 100 | 101 | def perceptual_loss(real, fake_abgr, distorted, mask_eyes, vggface_feats, **weights): 102 | alpha = Lambda(lambda x: x[:,:,:, :1])(fake_abgr) 103 | fake_bgr = Lambda(lambda x: x[:,:,:, 1:])(fake_abgr) 104 | fake = alpha * fake_bgr + (1-alpha) * distorted 105 | 106 | def preprocess_vggface(x): 107 | x = (x + 1)/2 * 255 # channel order: BGR 108 | x -= [91.4953, 103.8827, 131.0912] 109 | return x 110 | 111 | real_sz224 = tf.image.resize_images(real, [224, 224]) 112 | real_sz224 = Lambda(preprocess_vggface)(real_sz224) 113 | dist = Beta(0.2, 0.2) 114 | lam = dist.sample() # use mixup trick here to reduce foward pass from 2 times to 1. 115 | mixup = lam*fake_bgr + (1-lam)*fake 116 | fake_sz224 = tf.image.resize_images(mixup, [224, 224]) 117 | fake_sz224 = Lambda(preprocess_vggface)(fake_sz224) 118 | real_feat112, real_feat55, real_feat28, real_feat7 = vggface_feats(real_sz224) 119 | fake_feat112, fake_feat55, fake_feat28, fake_feat7 = vggface_feats(fake_sz224) 120 | 121 | # Apply instance norm on VGG(ResNet) features 122 | # From MUNIT https://github.com/NVlabs/MUNIT 123 | loss_G = 0 124 | def instnorm(): return InstanceNormalization() 125 | loss_G += weights['w_pl'][0] * calc_loss(instnorm()(fake_feat7), instnorm()(real_feat7), "l2") 126 | loss_G += weights['w_pl'][1] * calc_loss(instnorm()(fake_feat28), instnorm()(real_feat28), "l2") 127 | loss_G += weights['w_pl'][2] * calc_loss(instnorm()(fake_feat55), instnorm()(real_feat55), "l2") 128 | loss_G += weights['w_pl'][3] * calc_loss(instnorm()(fake_feat112), instnorm()(real_feat112), "l2") 129 | return loss_G 130 | -------------------------------------------------------------------------------- /data_loader/data_augmentation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from umeyama import umeyama 4 | from scipy import ndimage 5 | from pathlib import PurePath, Path 6 | 7 | random_transform_args = { 8 | 'rotation_range': 10, 9 | 'zoom_range': 0.1, 10 | 'shift_range': 0.05, 11 | 'random_flip': 0.5, 12 | } 13 | 14 | # Motion blurs as data augmentation 15 | def get_motion_blur_kernel(sz=7): 16 | rot_angle = np.random.uniform(-180,180) 17 | kernel = np.zeros((sz,sz)) 18 | kernel[int((sz-1)//2), :] = np.ones(sz) 19 | kernel = ndimage.interpolation.rotate(kernel, rot_angle, reshape=False) 20 | kernel = np.clip(kernel, 0, 1) 21 | normalize_factor = 1 / np.sum(kernel) 22 | kernel = kernel * normalize_factor 23 | return kernel 24 | 25 | def motion_blur(images, sz=7): 26 | # images is a list [image2, image2, ...] 27 | blur_sz = np.random.choice([5, 7, 9, 11]) 28 | kernel_motion_blur = get_motion_blur_kernel(blur_sz) 29 | for i, image in enumerate(images): 30 | images[i] = cv2.filter2D(image, -1, kernel_motion_blur).astype(np.float64) 31 | return images 32 | 33 | def random_transform(image, rotation_range, zoom_range, shift_range, random_flip): 34 | h,w = image.shape[0:2] 35 | rotation = np.random.uniform(-rotation_range, rotation_range) 36 | scale = np.random.uniform(1 - zoom_range, 1 + zoom_range) 37 | tx = np.random.uniform(-shift_range, shift_range) * w 38 | ty = np.random.uniform(-shift_range, shift_range) * h 39 | mat = cv2.getRotationMatrix2D((w//2,h//2), rotation, scale) 40 | mat[:,2] += (tx,ty) 41 | result = cv2.warpAffine(image, mat, (w,h), borderMode=cv2.BORDER_REPLICATE) 42 | if np.random.random() < random_flip: 43 | result = result[:,::-1] 44 | return result 45 | 46 | def random_warp_rev(image, res=64): 47 | assert image.shape == (256,256,6) 48 | res_scale = res//64 49 | assert res_scale >= 1, f"Resolution should be >= 64. Recieved {res}." 50 | interp_param = 80 * res_scale 51 | interp_slice = slice(interp_param//10,9*interp_param//10) 52 | dst_pnts_slice = slice(0,65*res_scale,16*res_scale) 53 | 54 | rand_coverage = np.random.randint(20) + 78 # random warping coverage 55 | rand_scale = np.random.uniform(5., 6.2) # random warping scale 56 | 57 | range_ = np.linspace(128-rand_coverage, 128+rand_coverage, 5) 58 | mapx = np.broadcast_to(range_, (5,5)) 59 | mapy = mapx.T 60 | mapx = mapx + np.random.normal(size=(5,5), scale=rand_scale) 61 | mapy = mapy + np.random.normal(size=(5,5), scale=rand_scale) 62 | interp_mapx = cv2.resize(mapx, (interp_param,interp_param))[interp_slice,interp_slice].astype('float32') 63 | interp_mapy = cv2.resize(mapy, (interp_param,interp_param))[interp_slice,interp_slice].astype('float32') 64 | warped_image = cv2.remap(image, interp_mapx, interp_mapy, cv2.INTER_LINEAR) 65 | src_points = np.stack([mapx.ravel(), mapy.ravel()], axis=-1) 66 | dst_points = np.mgrid[dst_pnts_slice,dst_pnts_slice].T.reshape(-1,2) 67 | mat = umeyama(src_points, dst_points, True)[0:2] 68 | target_image = cv2.warpAffine(image, mat, (res,res)) 69 | return warped_image, target_image 70 | 71 | def random_color_match(image, fns_all_trn_data): 72 | rand_idx = np.random.randint(len(fns_all_trn_data)) 73 | fn_match = fns_all_trn_data[rand_idx] 74 | tar_img = cv2.imread(fn_match) 75 | if tar_img is None: 76 | print(f"Failed reading image {fn_match} in random_color_match().") 77 | return image 78 | r = 60 # only take color information of the center area 79 | src_img = cv2.resize(image, (256,256)) 80 | tar_img = cv2.resize(tar_img, (256,256)) 81 | 82 | # randomly transform to XYZ color space 83 | rand_color_space_to_XYZ = np.random.choice([True, False]) 84 | if rand_color_space_to_XYZ: 85 | src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2XYZ) 86 | tar_img = cv2.cvtColor(tar_img, cv2.COLOR_BGR2XYZ) 87 | 88 | # compute statistics 89 | mt = np.mean(tar_img[r:-r,r:-r,:], axis=(0,1)) 90 | st = np.std(tar_img[r:-r,r:-r,:], axis=(0,1)) 91 | ms = np.mean(src_img[r:-r,r:-r,:], axis=(0,1)) 92 | ss = np.std(src_img[r:-r,r:-r,:], axis=(0,1)) 93 | 94 | # randomly interpolate the statistics 95 | rand_ratio = np.random.uniform() 96 | mt = rand_ratio * mt + (1 - rand_ratio) * ms 97 | st = rand_ratio * st + (1 - rand_ratio) * ss 98 | 99 | # Apply color transfer from src to tar domain 100 | if ss.any() <= 1e-7: return src_img 101 | result = st * (src_img.astype(np.float32) - ms) / (ss+1e-7) + mt 102 | if result.min() < 0: 103 | result = result - result.min() 104 | if result.max() > 255: 105 | result = (255.0/result.max()*result).astype(np.float32) 106 | 107 | # transform back from XYZ to BGR color space if necessary 108 | if rand_color_space_to_XYZ: 109 | result = cv2.cvtColor(result.astype(np.uint8), cv2.COLOR_XYZ2BGR) 110 | return result 111 | 112 | def read_image(fn, fns_all_trn_data, dir_bm_eyes=None, res=64, prob_random_color_match=0.5, 113 | use_da_motion_blur=True, use_bm_eyes=True, 114 | random_transform_args=random_transform_args): 115 | if dir_bm_eyes is None: 116 | raise ValueError(f"dir_bm_eyes is not set.") 117 | 118 | # https://github.com/tensorflow/tensorflow/issues/5552 119 | # TensorFlow converts str to bytes in most places, including sess.run(). 120 | if type(fn) == type(b"bytes"): 121 | fn = fn.decode("utf-8") 122 | dir_bm_eyes = dir_bm_eyes.decode("utf-8") 123 | fns_all_trn_data = [fn_all.decode("utf-8") for fn_all in fns_all_trn_data] 124 | 125 | raw_fn = PurePath(fn).parts[-1] 126 | image = cv2.imread(fn) 127 | if image is None: 128 | print(f"Failed reading image {fn}.") 129 | raise IOError(f"Failed reading image {fn}.") 130 | if np.random.uniform() <= prob_random_color_match: 131 | image = random_color_match(image, fns_all_trn_data) 132 | image = cv2.resize(image, (256,256)) / 255 * 2 - 1 133 | 134 | if use_bm_eyes: 135 | bm_eyes = cv2.imread(f"{dir_bm_eyes}/{raw_fn}") 136 | if bm_eyes is None: 137 | print(f"Failed reading binary mask {dir_bm_eyes}/{raw_fn}. \ 138 | If this message keeps showing, please check for existence of binary masks folder \ 139 | or disable eye-aware training in the configuration.") 140 | bm_eyes = np.zeros_like(image) 141 | #raise IOError(f"Failed reading binary mask {dir_bm_eyes}/{raw_fn}.") 142 | bm_eyes = cv2.resize(bm_eyes, (256,256)) / 255. 143 | else: 144 | bm_eyes = np.zeros_like(image) 145 | 146 | image = np.concatenate([image, bm_eyes], axis=-1) 147 | image = random_transform(image, **random_transform_args) 148 | warped_img, target_img = random_warp_rev(image, res=res) 149 | 150 | bm_eyes = target_img[...,3:] 151 | warped_img = warped_img[...,:3] 152 | target_img = target_img[...,:3] 153 | 154 | # Motion blur data augmentation: 155 | # we want the model to learn to preserve motion blurs of input images 156 | if np.random.uniform() < 0.25 and use_da_motion_blur: 157 | warped_img, target_img = motion_blur([warped_img, target_img]) 158 | 159 | warped_img, target_img, bm_eyes = \ 160 | warped_img.astype(np.float32), target_img.astype(np.float32), bm_eyes.astype(np.float32) 161 | 162 | return warped_img, target_img, bm_eyes -------------------------------------------------------------------------------- /FaceSwap_GAN_v2.2_video_conversion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "# Import modules" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import keras.backend as K" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "\n", 25 | "# Model Configuration" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "K.set_learning_phase(0)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# Input/Output resolution\n", 44 | "RESOLUTION = 64 # 64x64, 128x128, 256x256\n", 45 | "assert (RESOLUTION % 64) == 0, \"RESOLUTION should be 64, 128, 256\"" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# Architecture configuration\n", 55 | "arch_config = {}\n", 56 | "arch_config['IMAGE_SHAPE'] = (RESOLUTION, RESOLUTION, 3)\n", 57 | "arch_config['use_self_attn'] = True\n", 58 | "arch_config['norm'] = \"instancenorm\" # instancenorm, batchnorm, layernorm, groupnorm, none\n", 59 | "arch_config['model_capacity'] = \"standard\" # standard, lite" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "\n", 67 | "# Define models" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 5, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "from networks.faceswap_gan_model import FaceswapGANModel" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 6, 82 | "metadata": { 83 | "scrolled": true 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "model = FaceswapGANModel(**arch_config)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "\n", 95 | "# Load Model Weights" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 10, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "Model weights files are successfully loaded\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "model.load_weights(path=\"./models\")" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "\n", 120 | "# Video Conversion" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 10, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "from converter.video_converter import VideoConverter\n", 130 | "from detector.face_detector import MTCNNFaceDetector" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "mtcnn_weights_dir = \"./mtcnn_weights/\"\n", 140 | "\n", 141 | "fd = MTCNNFaceDetector(sess=K.get_session(), model_path=mtcnn_weights_dir)\n", 142 | "vc = VideoConverter()" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 12, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "vc.set_face_detector(fd)\n", 152 | "vc.set_gan_model(model)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "### Video conversion configuration\n", 160 | "\n", 161 | "\n", 162 | "- `use_smoothed_bbox`: \n", 163 | " - Boolean. Whether to enable smoothed bbox.\n", 164 | "- `use_kalman_filter`: \n", 165 | " - Boolean. Whether to enable Kalman filter.\n", 166 | "- `use_auto_downscaling`:\n", 167 | " - Boolean. Whether to enable auto-downscaling in face detection (to prevent OOM error).\n", 168 | "- `bbox_moving_avg_coef`: \n", 169 | " - Float point between 0 and 1. Smoothing coef. used when use_kalman_filter is set False.\n", 170 | "- `min_face_area`:\n", 171 | " - int x int. Minimum size of face. Detected faces smaller than min_face_area will not be transformed.\n", 172 | "- `IMAGE_SHAPE`:\n", 173 | " - Input/Output resolution of the GAN model\n", 174 | "- `kf_noise_coef`:\n", 175 | " - Float point. Increase by 10x if tracking is slow. Decrease by 1/10x if trakcing works fine but jitter occurs.\n", 176 | "- `use_color_correction`: \n", 177 | " - String of \"adain\", \"adain_xyz\", \"hist_match\", or \"none\". The color correction method to be applied.\n", 178 | "- `detec_threshold`: \n", 179 | " - Float point between 0 and 1. Decrease its value if faces are missed. Increase its value to reduce false positives.\n", 180 | "- `roi_coverage`: \n", 181 | " - Float point between 0 and 1 (exclusive). Center area of input images to be cropped (Suggested range: 0.85 ~ 0.95)\n", 182 | "- `enhance`: \n", 183 | " - Float point. A coef. for contrast enhancement in the region of alpha mask (Suggested range: 0. ~ 0.4)\n", 184 | "- `output_type`: \n", 185 | " - Layout format of output video: 1. [ result ], 2. [ source | result ], 3. [ source | result | mask ]\n", 186 | "- `direction`: \n", 187 | " - String of \"AtoB\" or \"BtoA\". Direction of face transformation." 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 13, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "options = {\n", 197 | " # ===== Fixed =====\n", 198 | " \"use_smoothed_bbox\": True,\n", 199 | " \"use_kalman_filter\": True,\n", 200 | " \"use_auto_downscaling\": False,\n", 201 | " \"bbox_moving_avg_coef\": 0.65,\n", 202 | " \"min_face_area\": 35 * 35,\n", 203 | " \"IMAGE_SHAPE\": model.IMAGE_SHAPE,\n", 204 | " # ===== Tunable =====\n", 205 | " \"kf_noise_coef\": 3e-3,\n", 206 | " \"use_color_correction\": \"hist_match\",\n", 207 | " \"detec_threshold\": 0.7,\n", 208 | " \"roi_coverage\": 0.9,\n", 209 | " \"enhance\": 0.,\n", 210 | " \"output_type\": 3,\n", 211 | " \"direction\": \"AtoB\",\n", 212 | "}" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "# Start video conversion\n", 220 | "\n", 221 | "\n", 222 | "- `input_fn`: \n", 223 | " - String. Input video path.\n", 224 | "- `output_fn`: \n", 225 | " - String. Output video path.\n", 226 | "- `duration`: \n", 227 | " - None or a non-negative float tuple: (start_sec, end_sec). Duration of input video to be converted\n", 228 | " - e.g., setting `duration = (5, 7.5)` outputs a 2.5-sec-long video clip corresponding to 5s ~ 7.5s of the input video." 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 14, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "input_fn = \"INPUT_VIDEO.mp4\"\n", 238 | "output_fn = \"OUTPUT_VIDEO.mp4\"\n", 239 | "duration = None " 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 15, 245 | "metadata": {}, 246 | "outputs": [ 247 | { 248 | "name": "stdout", 249 | "output_type": "stream", 250 | "text": [ 251 | "[MoviePy] >>>> Building video OUTPUT_VIDEO.mp4\n", 252 | "[MoviePy] Writing video OUTPUT_VIDEO.mp4\n" 253 | ] 254 | }, 255 | { 256 | "name": "stderr", 257 | "output_type": "stream", 258 | "text": [ 259 | "100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:12<00:00, 1.48it/s]\n" 260 | ] 261 | }, 262 | { 263 | "name": "stdout", 264 | "output_type": "stream", 265 | "text": [ 266 | "[MoviePy] Done.\n", 267 | "[MoviePy] >>>> Video ready: OUTPUT_VIDEO.mp4 \n", 268 | "\n" 269 | ] 270 | } 271 | ], 272 | "source": [ 273 | "vc.convert(input_fn=input_fn, output_fn=output_fn, options=options, duration=duration)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [] 289 | } 290 | ], 291 | "metadata": { 292 | "kernelspec": { 293 | "display_name": "Python 3", 294 | "language": "python", 295 | "name": "python3" 296 | }, 297 | "language_info": { 298 | "codemirror_mode": { 299 | "name": "ipython", 300 | "version": 3 301 | }, 302 | "file_extension": ".py", 303 | "mimetype": "text/x-python", 304 | "name": "python", 305 | "nbconvert_exporter": "python", 306 | "pygments_lexer": "ipython3", 307 | "version": "3.6.4" 308 | } 309 | }, 310 | "nbformat": 4, 311 | "nbformat_minor": 2 312 | } 313 | -------------------------------------------------------------------------------- /networks/nn_blocks.py: -------------------------------------------------------------------------------- 1 | from keras.layers import * 2 | from keras.layers.advanced_activations import LeakyReLU 3 | from .instance_normalization import InstanceNormalization 4 | from .GroupNormalization import GroupNormalization 5 | from .pixel_shuffler import PixelShuffler 6 | from .custom_layers.scale_layer import Scale 7 | from .custom_inits.icnr_initializer import icnr_keras 8 | import tensorflow as tf 9 | import keras.backend as K 10 | 11 | # initializers and weight decay regularization are fixed 12 | conv_init = 'he_normal' 13 | w_l2 = 1e-4 14 | 15 | def self_attn_block(inp, nc, squeeze_factor=8): 16 | ''' 17 | Code borrows from https://github.com/taki0112/Self-Attention-GAN-Tensorflow 18 | ''' 19 | assert nc//squeeze_factor > 0, f"Input channels must be >= {squeeze_factor}, recieved nc={nc}" 20 | x = inp 21 | shape_x = x.get_shape().as_list() 22 | 23 | f = Conv2D(nc//squeeze_factor, 1, kernel_regularizer=regularizers.l2(w_l2))(x) 24 | g = Conv2D(nc//squeeze_factor, 1, kernel_regularizer=regularizers.l2(w_l2))(x) 25 | h = Conv2D(nc, 1, kernel_regularizer=regularizers.l2(w_l2))(x) 26 | 27 | shape_f = f.get_shape().as_list() 28 | shape_g = g.get_shape().as_list() 29 | shape_h = h.get_shape().as_list() 30 | flat_f = Reshape((-1, shape_f[-1]))(f) 31 | flat_g = Reshape((-1, shape_g[-1]))(g) 32 | flat_h = Reshape((-1, shape_h[-1]))(h) 33 | 34 | s = Lambda(lambda x: K.batch_dot(x[0], Permute((2,1))(x[1])))([flat_g, flat_f]) 35 | 36 | beta = Softmax(axis=-1)(s) 37 | o = Lambda(lambda x: K.batch_dot(x[0], x[1]))([beta, flat_h]) 38 | o = Reshape(shape_x[1:])(o) 39 | o = Scale()(o) 40 | 41 | out = add([o, inp]) 42 | return out 43 | 44 | def dual_attn_block(inp, nc, squeeze_factor=8): 45 | ''' 46 | https://github.com/junfu1115/DANet 47 | ''' 48 | assert nc//squeeze_factor > 0, f"Input channels must be >= {squeeze_factor}, recieved nc={nc}" 49 | x = inp 50 | shape_x = x.get_shape().as_list() 51 | 52 | # position attention module 53 | x_pam = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 54 | kernel_initializer=conv_init, use_bias=False, padding="same")(x) 55 | x_pam = Activation("relu")(x_pam) 56 | x_pam = normalization(x_pam, norm, nc) 57 | f_pam = Conv2D(nc//squeeze_factor, 1, kernel_regularizer=regularizers.l2(w_l2))(x_pam) 58 | g_pam = Conv2D(nc//squeeze_factor, 1, kernel_regularizer=regularizers.l2(w_l2))(x_pam) 59 | h_pam = Conv2D(nc, 1, kernel_regularizer=regularizers.l2(w_l2))(x_pam) 60 | shape_f_pam = f_pam.get_shape().as_list() 61 | shape_g_pam = g_pam.get_shape().as_list() 62 | shape_h_pam = h_pam.get_shape().as_list() 63 | flat_f_pam = Reshape((-1, shape_f_pam[-1]))(f_pam) 64 | flat_g_pam = Reshape((-1, shape_g_pam[-1]))(g_pam) 65 | flat_h_pam = Reshape((-1, shape_h_pam[-1]))(h_pam) 66 | s_pam = Lambda(lambda x: K.batch_dot(x[0], Permute((2,1))(x[1])))([flat_g_pam, flat_f_pam]) 67 | beta_pam = Softmax(axis=-1)(s_pam) 68 | o_pam = Lambda(lambda x: K.batch_dot(x[0], x[1]))([beta_pam, flat_h_pam]) 69 | o_pam = Reshape(shape_x[1:])(o_pam) 70 | o_pam = Scale()(o_pam) 71 | out_pam = add([o_pam, x_pam]) 72 | out_pam = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 73 | kernel_initializer=conv_init, use_bias=False, padding="same")(out_pam) 74 | out_pam = Activation("relu")(out_pam) 75 | out_pam = normalization(out_pam, norm, nc) 76 | 77 | # channel attention module 78 | x_chn = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 79 | kernel_initializer=conv_init, use_bias=False, padding="same")(x) 80 | x_chn = Activation("relu")(x_chn) 81 | x_chn = normalization(x_chn, norm, nc) 82 | shape_x_chn = x_chn.get_shape().as_list() 83 | flat_f_chn = Reshape((-1, shape_x_chn[-1]))(x_chn) 84 | flat_g_chn = Reshape((-1, shape_x_chn[-1]))(x_chn) 85 | flat_h_chn = Reshape((-1, shape_x_chn[-1]))(x_chn) 86 | s_chn = Lambda(lambda x: K.batch_dot(Permute((2,1))(x[0]), x[1]))([flat_g_chn, flat_f_chn]) 87 | s_new_chn = Lambda(lambda x: K.repeat_elements(K.max(x, -1, keepdims=True), nc, -1))(s_chn) 88 | s_new_chn = Lambda(lambda x: x[0] - x[1])([s_new_chn, s_chn]) 89 | beta_chn = Softmax(axis=-1)(s_new_chn) 90 | o_chn = Lambda(lambda x: K.batch_dot(x[0], Permute((2,1))(x[1])))([flat_h_chn, beta_chn]) 91 | o_chn = Reshape(shape_x[1:])(o_chn) 92 | o_chn = Scale()(o_chn) 93 | out_chn = add([o_chn, x_chn]) 94 | out_chn = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 95 | kernel_initializer=conv_init, use_bias=False, padding="same")(out_chn) 96 | out_chn = Activation("relu")(out_chn) 97 | out_chn = normalization(out_chn, norm, nc) 98 | 99 | out = add([out_pam, out_chn]) 100 | return out 101 | 102 | def normalization(inp, norm='none', group='16'): 103 | x = inp 104 | if norm == 'layernorm': 105 | x = GroupNormalization(group=group)(x) 106 | elif norm == 'batchnorm': 107 | x = BatchNormalization()(x) 108 | elif norm == 'groupnorm': 109 | x = GroupNormalization(group=16)(x) 110 | elif norm == 'instancenorm': 111 | x = InstanceNormalization()(x) 112 | elif norm == 'hybrid': 113 | if group % 2 == 1: 114 | raise ValueError(f"Output channels must be an even number for hybrid norm, received {group}.") 115 | f = group 116 | x0 = Lambda(lambda x: x[...,:f//2])(x) 117 | x1 = Lambda(lambda x: x[...,f//2:])(x) 118 | x0 = Conv2D(f//2, kernel_size=1, kernel_regularizer=regularizers.l2(w_l2), 119 | kernel_initializer=conv_init)(x0) 120 | x1 = InstanceNormalization()(x1) 121 | x = concatenate([x0, x1], axis=-1) 122 | else: 123 | x = x 124 | return x 125 | 126 | def conv_block(input_tensor, f, use_norm=False, strides=2, w_l2=w_l2, norm='none'): 127 | x = input_tensor 128 | x = Conv2D(f, kernel_size=3, strides=strides, kernel_regularizer=regularizers.l2(w_l2), 129 | kernel_initializer=conv_init, use_bias=False, padding="same")(x) 130 | x = Activation("relu")(x) 131 | x = normalization(x, norm, f) if use_norm else x 132 | return x 133 | 134 | def conv_block_d(input_tensor, f, use_norm=False, w_l2=w_l2, norm='none'): 135 | x = input_tensor 136 | x = Conv2D(f, kernel_size=4, strides=2, kernel_regularizer=regularizers.l2(w_l2), 137 | kernel_initializer=conv_init, use_bias=False, padding="same")(x) 138 | x = LeakyReLU(alpha=0.2)(x) 139 | x = normalization(x, norm, f) if use_norm else x 140 | return x 141 | 142 | def res_block(input_tensor, f, use_norm=False, w_l2=w_l2, norm='none'): 143 | x = input_tensor 144 | x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 145 | kernel_initializer=conv_init, use_bias=False, padding="same")(x) 146 | x = LeakyReLU(alpha=0.2)(x) 147 | x = normalization(x, norm, f) if use_norm else x 148 | x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 149 | kernel_initializer=conv_init, use_bias=False, padding="same")(x) 150 | x = add([x, input_tensor]) 151 | x = LeakyReLU(alpha=0.2)(x) 152 | x = normalization(x, norm, f) if use_norm else x 153 | return x 154 | 155 | def SPADE_res_block(input_tensor, cond_input_tensor, f, use_norm=True, norm='none'): 156 | """ 157 | Semantic Image Synthesis with Spatially-Adaptive Normalization 158 | Taesung Park, Ming-Yu Liu, Ting-Chun Wang, Jun-Yan Zhu 159 | https://arxiv.org/abs/1903.07291 160 | 161 | Note: 162 | SPADE just works like a charm. 163 | It speeds up training alot and is also a very promosing approach for solving profile face generation issue. 164 | *(This implementation can be wrong since I haven't finished reading the paper. 165 | The author hasn't release their code either (https://github.com/NVlabs/SPADE).) 166 | """ 167 | def SPADE(input_tensor, cond_input_tensor, f, use_norm=True, norm='none'): 168 | x = input_tensor 169 | x = normalization(x, norm, f) if use_norm else x 170 | y = cond_input_tensor 171 | y = Conv2D(128, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 172 | kernel_initializer=conv_init, padding='same')(y) 173 | y = Activation('relu')(y) 174 | gamma = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 175 | kernel_initializer=conv_init, padding='same')(y) 176 | beta = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 177 | kernel_initializer=conv_init, padding='same')(y) 178 | x = multiply([x, gamma]) 179 | x = add([x, beta]) 180 | return x 181 | 182 | x = input_tensor 183 | x = SPADE(x, cond_input_tensor, f, use_norm, norm) 184 | x = Activation('relu')(x) 185 | x = ReflectPadding2D(x) 186 | x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 187 | kernel_initializer=conv_init, use_bias=not use_norm)(x) 188 | x = SPADE(x, cond_input_tensor, f, use_norm, norm) 189 | x = Activation('relu')(x) 190 | x = ReflectPadding2D(x) 191 | x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 192 | kernel_initializer=conv_init)(x) 193 | x = add([x, input_tensor]) 194 | x = Activation('relu')(x) 195 | return x 196 | 197 | def upscale_ps(input_tensor, f, use_norm=False, w_l2=w_l2, norm='none'): 198 | x = input_tensor 199 | x = Conv2D(f*4, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 200 | kernel_initializer=icnr_keras, padding='same')(x) 201 | x = LeakyReLU(0.2)(x) 202 | x = normalization(x, norm, f) if use_norm else x 203 | x = PixelShuffler()(x) 204 | return x 205 | 206 | def ReflectPadding2D(x, pad=1): 207 | x = Lambda(lambda x: tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]], mode='REFLECT'))(x) 208 | return x 209 | 210 | def upscale_nn(input_tensor, f, use_norm=False, w_l2=w_l2, norm='none'): 211 | x = input_tensor 212 | x = UpSampling2D()(x) 213 | x = ReflectPadding2D(x, 1) 214 | x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), 215 | kernel_initializer=conv_init)(x) 216 | x = normalization(x, norm, f) if use_norm else x 217 | return x 218 | -------------------------------------------------------------------------------- /MTCNN_video_face_detection_alignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Face detection for video\n", 8 | "Images of detected faces have format `frameXfaceY.jpg`, where `X` represents the Xth frame and `Y` the Yth face in Xth frame. " 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import os\n", 18 | "import cv2\n", 19 | "import numpy as np\n", 20 | "from matplotlib import pyplot as plt" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import tensorflow as tf\n", 30 | "from keras import backend as K\n", 31 | "from pathlib import PurePath, Path\n", 32 | "from moviepy.editor import VideoFileClip" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "from umeyama import umeyama\n", 42 | "import mtcnn_detect_face" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "Create MTCNN and its forward pass functions" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "def create_mtcnn(sess, model_path):\n", 59 | " if not model_path:\n", 60 | " model_path,_ = os.path.split(os.path.realpath(__file__))\n", 61 | "\n", 62 | " with tf.variable_scope('pnet2'):\n", 63 | " data = tf.placeholder(tf.float32, (None,None,None,3), 'input')\n", 64 | " pnet = mtcnn_detect_face.PNet({'data':data})\n", 65 | " pnet.load(os.path.join(model_path, 'det1.npy'), sess)\n", 66 | " with tf.variable_scope('rnet2'):\n", 67 | " data = tf.placeholder(tf.float32, (None,24,24,3), 'input')\n", 68 | " rnet = mtcnn_detect_face.RNet({'data':data})\n", 69 | " rnet.load(os.path.join(model_path, 'det2.npy'), sess)\n", 70 | " with tf.variable_scope('onet2'):\n", 71 | " data = tf.placeholder(tf.float32, (None,48,48,3), 'input')\n", 72 | " onet = mtcnn_detect_face.ONet({'data':data})\n", 73 | " onet.load(os.path.join(model_path, 'det3.npy'), sess)\n", 74 | " return pnet, rnet, onet" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "WEIGHTS_PATH = \"./mtcnn_weights/\"\n", 84 | "\n", 85 | "sess = K.get_session()\n", 86 | "with sess.as_default():\n", 87 | " global pnet, rnet, onet \n", 88 | " pnet, rnet, onet = create_mtcnn(sess, WEIGHTS_PATH)\n", 89 | "\n", 90 | "global pnet, rnet, onet\n", 91 | " \n", 92 | "pnet = K.function([pnet.layers['data']],[pnet.layers['conv4-2'], pnet.layers['prob1']])\n", 93 | "rnet = K.function([rnet.layers['data']],[rnet.layers['conv5-2'], rnet.layers['prob1']])\n", 94 | "onet = K.function([onet.layers['data']],[onet.layers['conv6-2'], onet.layers['conv6-3'], onet.layers['prob1']])" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "Create folder where images will be saved to" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 10, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "Path(f\"faces/aligned_faces\").mkdir(parents=True, exist_ok=True)\n", 111 | "Path(f\"faces/raw_faces\").mkdir(parents=True, exist_ok=True)\n", 112 | "Path(f\"faces/binary_masks_eyes\").mkdir(parents=True, exist_ok=True)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "Functions for video processing and face alignment" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 8, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "def get_src_landmarks(x0, x1, y0, y1, pnts):\n", 129 | " \"\"\"\n", 130 | " x0, x1, y0, y1: (smoothed) bbox coord.\n", 131 | " pnts: landmarks predicted by MTCNN\n", 132 | " \"\"\" \n", 133 | " src_landmarks = [(int(pnts[i+5][0]-x0), \n", 134 | " int(pnts[i][0]-y0)) for i in range(5)]\n", 135 | " return src_landmarks\n", 136 | "\n", 137 | "def get_tar_landmarks(img):\n", 138 | " \"\"\" \n", 139 | " img: detected face image\n", 140 | " \"\"\" \n", 141 | " ratio_landmarks = [\n", 142 | " (0.31339227236234224, 0.3259269274198092),\n", 143 | " (0.31075140146108776, 0.7228453709528997),\n", 144 | " (0.5523683107816256, 0.5187296867370605),\n", 145 | " (0.7752419985257663, 0.37262483743520886),\n", 146 | " (0.7759613623985877, 0.6772957581740159)\n", 147 | " ] \n", 148 | " \n", 149 | " img_size = img.shape\n", 150 | " tar_landmarks = [(int(xy[0]*img_size[0]), \n", 151 | " int(xy[1]*img_size[1])) for xy in ratio_landmarks]\n", 152 | " return tar_landmarks\n", 153 | "\n", 154 | "def landmarks_match_mtcnn(src_im, src_landmarks, tar_landmarks): \n", 155 | " \"\"\"\n", 156 | " umeyama(src, dst, estimate_scale)\n", 157 | " landmarks coord. for umeyama should be (width, height) or (y, x)\n", 158 | " \"\"\"\n", 159 | " src_size = src_im.shape\n", 160 | " src_tmp = [(int(xy[1]), int(xy[0])) for xy in src_landmarks]\n", 161 | " tar_tmp = [(int(xy[1]), int(xy[0])) for xy in tar_landmarks]\n", 162 | " M = umeyama(np.array(src_tmp), np.array(tar_tmp), True)[0:2]\n", 163 | " result = cv2.warpAffine(src_im, M, (src_size[1], src_size[0]), borderMode=cv2.BORDER_REPLICATE) \n", 164 | " return result\n", 165 | "\n", 166 | "def process_mtcnn_bbox(bboxes, im_shape):\n", 167 | " \"\"\"\n", 168 | " output bbox coordinate of MTCNN is (y0, x0, y1, x1)\n", 169 | " Here we process the bbox coord. to a square bbox with ordering (x0, y1, x1, y0)\n", 170 | " \"\"\"\n", 171 | " for i, bbox in enumerate(bboxes):\n", 172 | " y0, x0, y1, x1 = bboxes[i,0:4]\n", 173 | " w, h = int(y1 - y0), int(x1 - x0)\n", 174 | " length = (w + h)/2\n", 175 | " center = (int((x1+x0)/2),int((y1+y0)/2))\n", 176 | " new_x0 = np.max([0, (center[0]-length//2)])#.astype(np.int32)\n", 177 | " new_x1 = np.min([im_shape[0], (center[0]+length//2)])#.astype(np.int32)\n", 178 | " new_y0 = np.max([0, (center[1]-length//2)])#.astype(np.int32)\n", 179 | " new_y1 = np.min([im_shape[1], (center[1]+length//2)])#.astype(np.int32)\n", 180 | " bboxes[i,0:4] = new_x0, new_y1, new_x1, new_y0\n", 181 | " return bboxes\n", 182 | "\n", 183 | "def process_video(input_img): \n", 184 | " global frames, save_interval\n", 185 | " global pnet, rnet, onet\n", 186 | " minsize = 30 # minimum size of face\n", 187 | " detec_threshold = 0.7\n", 188 | " threshold = [0.6, 0.7, detec_threshold] # three steps's threshold\n", 189 | " factor = 0.709 # scale factor \n", 190 | " \n", 191 | " frames += 1 \n", 192 | " if frames % save_interval == 0:\n", 193 | " faces, pnts = mtcnn_detect_face.detect_face(\n", 194 | " input_img, minsize, pnet, rnet, onet, threshold, factor)\n", 195 | " faces = process_mtcnn_bbox(faces, input_img.shape)\n", 196 | " \n", 197 | " for idx, (x0, y1, x1, y0, conf_score) in enumerate(faces):\n", 198 | " det_face_im = input_img[int(x0):int(x1),int(y0):int(y1),:]\n", 199 | "\n", 200 | " # get src/tar landmarks\n", 201 | " src_landmarks = get_src_landmarks(x0, x1, y0, y1, pnts)\n", 202 | " tar_landmarks = get_tar_landmarks(det_face_im)\n", 203 | "\n", 204 | " # align detected face\n", 205 | " aligned_det_face_im = landmarks_match_mtcnn(\n", 206 | " det_face_im, src_landmarks, tar_landmarks)\n", 207 | "\n", 208 | " fname = f\"./faces/aligned_faces/frame{frames}face{str(idx)}.jpg\"\n", 209 | " plt.imsave(fname, aligned_det_face_im, format=\"jpg\")\n", 210 | " fname = f\"./faces/raw_faces/frame{frames}face{str(idx)}.jpg\"\n", 211 | " plt.imsave(fname, det_face_im, format=\"jpg\")\n", 212 | " \n", 213 | " bm = np.zeros_like(aligned_det_face_im)\n", 214 | " h, w = bm.shape[:2]\n", 215 | " bm[int(src_landmarks[0][0]-h/15):int(src_landmarks[0][0]+h/15),\n", 216 | " int(src_landmarks[0][1]-w/8):int(src_landmarks[0][1]+w/8),:] = 255\n", 217 | " bm[int(src_landmarks[1][0]-h/15):int(src_landmarks[1][0]+h/15),\n", 218 | " int(src_landmarks[1][1]-w/8):int(src_landmarks[1][1]+w/8),:] = 255\n", 219 | " bm = landmarks_match_mtcnn(bm, src_landmarks, tar_landmarks)\n", 220 | " fname = f\"./faces/binary_masks_eyes/frame{frames}face{str(idx)}.jpg\"\n", 221 | " plt.imsave(fname, bm, format=\"jpg\")\n", 222 | " \n", 223 | " return np.zeros((3,3,3))" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "Start face detection\n", 231 | "\n", 232 | "Default input video filename: `INPUT_VIDEO.mp4`" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 12, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "global frames\n", 242 | "frames = 0\n", 243 | "\n", 244 | "# configuration\n", 245 | "save_interval = 6 # perform face detection every {save_interval} frames\n", 246 | "fn_input_video = \"INPUT_VIDEO.mp4\"\n", 247 | "\n", 248 | "output = 'dummy.mp4'\n", 249 | "clip1 = VideoFileClip(fn_input_video)\n", 250 | "clip = clip1.fl_image(process_video)#.subclip(0,3) #NOTE: this function expects color images!!\n", 251 | "clip.write_videofile(output, audio=False)\n", 252 | "clip1.reader.close()" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "## Saved images will be in folder `faces/raw_faces` and `faces/aligned_faces` respectively. Binary masks will be in `faces/binary_masks_eyes`." 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [] 268 | } 269 | ], 270 | "metadata": { 271 | "kernelspec": { 272 | "display_name": "Python 3", 273 | "language": "python", 274 | "name": "python3" 275 | }, 276 | "language_info": { 277 | "codemirror_mode": { 278 | "name": "ipython", 279 | "version": 3 280 | }, 281 | "file_extension": ".py", 282 | "mimetype": "text/x-python", 283 | "name": "python", 284 | "nbconvert_exporter": "python", 285 | "pygments_lexer": "ipython3", 286 | "version": "3.6.4" 287 | } 288 | }, 289 | "nbformat": 4, 290 | "nbformat_minor": 2 291 | } 292 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # faceswap-GAN 2 | Adding Adversarial loss and perceptual loss (VGGface) to deepfakes'(reddit user) auto-encoder architecture. 3 | 4 | ## Updates 5 | | Date    | Update | 6 | | ------------- | ------------- | 7 | | 2018-08-27     | **Colab support:** A [colab notebook](https://colab.research.google.com/github/shaoanlu/faceswap-GAN/blob/master/colab_demo/faceswap-GAN_colab_demo.ipynb) for faceswap-GAN v2.2 is provided.| 8 | | 2018-07-25     | **Data preparation:** Add a [new notebook](https://github.com/shaoanlu/faceswap-GAN/blob/master/MTCNN_video_face_detection_alignment.ipynb) for video pre-processing in which MTCNN is used for face detection as well as face alignment.| 9 | | 2018-06-29     | **Model architecture**: faceswap-GAN v2.2 now supports different output resolutions: 64x64, 128x128, and 256x256. Default `RESOLUTION = 64` can be changed in the config cell of [v2.2 notebook](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2.2_train_test.ipynb).| 10 | | 2018-06-25     | **New version**: faceswap-GAN v2.2 has been released. The main improvements of v2.2 model are its capability of generating realistic and consistent eye movements (results are shown below, or Ctrl+F for eyes), as well as higher video quality with face alignment.| 11 | | 2018-06-06     | **Model architecture**: Add a self-attention mechanism proposed in [SAGAN](https://arxiv.org/abs/1805.08318) into V2 GAN model. (Note: There is still no official code release for SAGAN, the implementation in this repo. could be wrong. We'll keep an eye on it.)| 12 | 13 | ## Google Colab support 14 | Here is a [playground notebook](https://colab.research.google.com/github/shaoanlu/faceswap-GAN/blob/master/colab_demo/faceswap-GAN_colab_demo.ipynb) for faceswap-GAN v2.2 on Google Colab. Users can train their own model in the browser. 15 | 16 | ## Descriptions 17 | ### faceswap-GAN v2.2 18 | * [FaceSwap_GAN_v2.2_train_test.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2.2_train_test.ipynb) 19 | - Notebook for model training of faceswap-GAN model version 2.2. 20 | - This notebook also provides code for still image transformation at the bottom. 21 | - Require additional training images generated through [prep_binary_masks.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/prep_binary_masks.ipynb). 22 | 23 | * [FaceSwap_GAN_v2.2_video_conversion.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2.2_video_conversion.ipynb) 24 | - Notebook for video conversion of faceswap-GAN model version 2.2. 25 | - Face alignment using 5-points landmarks is introduced to video conversion. 26 | 27 | * [prep_binary_masks.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/prep_binary_masks.ipynb) 28 | - Notebook for training data preprocessing. Output binary masks are save in `./binary_masks/faceA_eyes` and `./binary_masks/faceB_eyes` folders. 29 | - Require [face_alignment](https://github.com/1adrianb/face-alignment) package. (An alternative method for generating binary masks (not requiring `face_alignment` and `dlib` packages) can be found in [MTCNN_video_face_detection_alignment.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/MTCNN_video_face_detection_alignment.ipynb).) 30 | 31 | * [MTCNN_video_face_detection_alignment.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/MTCNN_video_face_detection_alignment.ipynb) 32 | - This notebook performs face detection/alignment on the input video. 33 | - Detected faces are saved in `./faces/raw_faces` and `./faces/aligned_faces` for non-aligned/aligned results respectively. 34 | - Crude eyes binary masks are also generated and saved in `./faces/binary_masks_eyes`. These binary masks can serve as a suboptimal alternative to masks generated through [prep_binary_masks.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/prep_binary_masks.ipynb). 35 | 36 | **Usage** 37 | 1. Run [MTCNN_video_face_detection_alignment.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/MTCNN_video_face_detection_alignment.ipynb) to extract faces from videos. Manually move/rename the aligned face images into `./faceA/` or `./faceB/` folders. 38 | 2. Run [prep_binary_masks.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/prep_binary_masks.ipynb) to generate binary masks of training images. 39 | - You can skip this pre-processing step by (1) setting `use_bm_eyes=False` in the config cell of the train_test notebook, or (2) use low-quality binary masks generated in step 1. 40 | 3. Run [FaceSwap_GAN_v2.2_train_test.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2.2_train_test.ipynb) to train models. 41 | 4. Run [FaceSwap_GAN_v2.2_video_conversion.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/FaceSwap_GAN_v2.2_video_conversion.ipynb) to create videos using the trained models in step 3. 42 | 43 | ### Miscellaneous 44 | * [faceswap-GAN_colab_demo.ipynb](https://github.com/shaoanlu/faceswap-GAN/blob/master/colab_demo/faceswap-GAN_colab_demo.ipynb) 45 | - An all-in-one notebook for demostration purpose that can be run on Google colab. 46 | 47 | ### Training data format 48 | - Face images are supposed to be in `./faceA/` or `./faceB/` folder for each taeget respectively. 49 | - Images will be resized to 256x256 during training. 50 | 51 | ## Generative adversarial networks for face swapping 52 | ### 1. Architecture 53 | ![enc_arch3d](https://www.dropbox.com/s/b43x8bv5xxbo5q0/enc_arch3d_resized2.jpg?raw=1) 54 | 55 | ![dec_arch3d](https://www.dropbox.com/s/p09ioztjcxs66ey/dec_3arch3d_resized.jpg?raw=1) 56 | 57 | ![dis_arch3d](https://www.dropbox.com/s/szcq8j5axo11mu9/dis_arch3d_resized2.jpg?raw=1) 58 | 59 | ### 2. Results 60 | - **Improved output quality:** Adversarial loss improves reconstruction quality of generated images. 61 | ![trump_cage](https://www.dropbox.com/s/24k16vtqkhlf13i/auto_results.jpg?raw=1) 62 | 63 | - **Additional results:** [This image](https://www.dropbox.com/s/2nc5guogqk7nwdd/rand_160_2.jpg?raw=1) shows 160 random results generated by v2 GAN with self-attention mechanism (image format: source -> mask -> transformed). 64 | 65 | - **Consistent eye movements (v2.2 model):** Results of the v2.2 model which specializes on eye direcitons are presented below. V2.2 model generates more realistic eyes within shorter training iteations. (Input gifs are created using [DeepWarp](http://163.172.78.19/).) 66 | - Top row: v2 model; Bottom row: v2.2 model. In column 1, 3, and 5 show input gifs. 67 | - ![v2_eb](https://www.dropbox.com/s/d0m626ldcw2lop3/v2_comb.gif?raw=1) 68 | - ![v2.2_eb](https://www.dropbox.com/s/v7wx6r72yfowh98/v2.2_comb.gif?raw=1) 69 | 70 | - **Evaluations:** Evaluations of the output quality on Trump/Cage dataset can be found [here](https://github.com/shaoanlu/faceswap-GAN/blob/master/notes/README.md#13-model-evaluation-for-trumpcage-dataset). 71 | 72 | ###### The Trump/Cage images are obtained from the reddit user [deepfakes' project](https://pastebin.com/hYaLNg1T) on pastebin.com. 73 | 74 | ### 3. Features 75 | - **[VGGFace](https://github.com/rcmalli/keras-vggface) perceptual loss:** Perceptual loss improves direction of eyeballs to be more realistic and consistent with input face. It also smoothes out artifacts in the segmentation mask, resulting higher output quality. 76 | 77 | - **Attention mask:** Model predicts an attention mask that helps on handling occlusion, eliminating artifacts, and producing natrual skin tone. In below are results transforming Hinako Sano ([佐野ひなこ](https://ja.wikipedia.org/wiki/%E4%BD%90%E9%87%8E%E3%81%B2%E3%81%AA%E3%81%93)) to Emi Takei ([武井咲](https://ja.wikipedia.org/wiki/%E6%AD%A6%E4%BA%95%E5%92%B2)). 78 | 79 | ![mask1](https://www.dropbox.com/s/do3gax2lmhck941/mask_comp1.gif?raw=1)  ![mask2](https://www.dropbox.com/s/gh0yq26qkr31yve/mask_comp2.gif?raw=1) 80 | - From left to right: source face, swapped face (before masking), swapped face (after masking). 81 | 82 | ![mask_vis](https://www.dropbox.com/s/q6dfllwh71vavcv/mask_vis_rev.gif?raw=1) 83 | - From left to right: source face, swapped face (after masking), mask heatmap. 84 | ###### Source video: [佐野ひなことすごくどうでもいい話?(遊戯王)](https://www.youtube.com/watch?v=tzlD1CQvkwU) 85 | 86 | - **Configurable input/output resolution (v2.2)**: The model supports 64x64, 128x128, and 256x256 outupt resolutions. 87 | 88 | - **Face tracking/alignment using MTCNN and Kalman filter during video conversion**: 89 | - MTCNN is introduced for more stable detections and reliable face alignment (FA). 90 | - Kalman filter smoothen the bounding box positions over frames and eliminate jitter on the swapped face. 91 | 92 | ![dlib_vs_MTCNN](https://www.dropbox.com/s/diztxntkss4dt7v/mask_dlib_mtcnn.gif?raw=1) 93 | ![comp_FA](https://www.dropbox.com/s/kviue4065gdqfnt/comp_fa.gif?raw=1) 94 | 95 | - **Training schedule**: Notebooks for training provide a predefined training schedule. The above Trump/Cage face-swapping are generated by model trained for 21k iters using `TOTAL_ITERS = 30000` predefined training schedule. 96 | - Training tricks: Swapping the decoders in the late stage of training reduces artifacts caused by the extreme facial expressions. E.g., some of the failure cases (of results above) having their mouth open wide are better transformed using this trick. 97 | 98 | ![self_attn_and_dec_swapping](https://www.dropbox.com/s/ekpa3caq921v6vk/SA_and_dec_swap2.jpg?raw=1) 99 | 100 | - **Eyes-aware training:** Introduce high reconstruction loss and edge loss around eyes area, which guides the model to generate realistic eyes. 101 | 102 | ## Frequently asked questions and troubleshooting 103 | 104 | #### 1. How does it work? 105 | - The following illustration shows a very high-level and abstract (but not exactly the same) flowchart of the denoising autoencoder algorithm. The objective functions look like [this](https://www.dropbox.com/s/e5j5rl7o3tmw6q0/faceswap_GAN_arch4.jpg?raw=1). 106 | ![flow_chart](https://www.dropbox.com/s/4u8q4f03px4spf8/faceswap_GAN_arch3.jpg?raw=1) 107 | #### 2. Previews look good, but it does not transform to the output videos? 108 | - Model performs its full potential when the input images are preprocessed with face alignment methods. 109 | - ![readme_note001](https://www.dropbox.com/s/a1kjy0ynnlj2g4c/readme_note00.jpg?raw=1) 110 | 111 | ## Requirements 112 | 113 | * keras 2.1.5 114 | * Tensorflow 1.6.0 115 | * Python 3.6.4 116 | * OpenCV 117 | * [keras-vggface](https://github.com/rcmalli/keras-vggface) 118 | * [moviepy](http://zulko.github.io/moviepy/) 119 | * [prefetch_generator](https://github.com/justheuristic/prefetch_generator) (required for v2.2 model) 120 | * [face-alignment](https://github.com/1adrianb/face-alignment) (required as preprocessing for v2.2 model) 121 | 122 | ## Acknowledgments 123 | Code borrows from [tjwei](https://github.com/tjwei/GANotebooks), [eriklindernoren](https://github.com/eriklindernoren/Keras-GAN/blob/master/aae/adversarial_autoencoder.py), [fchollet](https://github.com/fchollet/deep-learning-with-python-notebooks/blob/master/8.5-introduction-to-gans.ipynb), [keras-contrib](https://github.com/keras-team/keras-contrib/blob/master/examples/improved_wgan.py) and [reddit user deepfakes' project](https://pastebin.com/hYaLNg1T). The generative network is adopted from [CycleGAN](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix). Weights and scripts of MTCNN are from [FaceNet](https://github.com/davidsandberg/facenet). Illustrations are from [irasutoya](http://www.irasutoya.com/). 124 | -------------------------------------------------------------------------------- /converter/video_converter.py: -------------------------------------------------------------------------------- 1 | from .kalman_filter import KalmanFilter 2 | from .landmarks_alignment import * 3 | from .face_transformer import FaceTransformer 4 | from .vc_utils import * 5 | import numpy as np 6 | from moviepy.editor import VideoFileClip 7 | 8 | 9 | class VideoConverter(object): 10 | """ 11 | This class is for video conversion 12 | 13 | Attributes: 14 | ftrans: FaceTransformer instance 15 | fdetect: MTCNNFaceDetector instance 16 | prev_x0, prev_x1, prev_y0, prev_y1, frames: Variables for smoothing bounding box 17 | kf0, kf1: KalmanFilter instances for smoothing bounding box 18 | """ 19 | def __init__(self): 20 | # Variables for smoothing bounding box 21 | self.prev_x0 = 0 22 | self.prev_x1 = 0 23 | self.prev_y0 = 0 24 | self.prev_y1 = 0 25 | self.frames = 0 26 | 27 | # face transformer 28 | self.ftrans = FaceTransformer() 29 | 30 | # MTCNN face detector 31 | self.fdetect = None 32 | 33 | # Kalman filters 34 | self.kf0 = None 35 | self.kf1 = None 36 | 37 | def set_gan_model(self, model): 38 | self.ftrans.set_model(model) 39 | 40 | def set_face_detector(self, fdetect): 41 | self.fdetect = fdetect 42 | 43 | def _get_smoothed_coord(self, x0, x1, y0, y1, img_shape, use_kalman_filter=True, ratio=0.65): 44 | if not use_kalman_filter: 45 | x0 = int(ratio * self.prev_x0 + (1-ratio) * x0) 46 | x1 = int(ratio * self.prev_x1 + (1-ratio) * x1) 47 | y1 = int(ratio * self.prev_y1 + (1-ratio) * y1) 48 | y0 = int(ratio * self.prev_y0 + (1-ratio) * y0) 49 | else: 50 | x0y0 = np.array([x0, y0]).astype(np.float32) 51 | x1y1 = np.array([x1, y1]).astype(np.float32) 52 | self.kf0.correct(x0y0) 53 | pred_x0y0 = self.kf0.predict() 54 | self.kf1.correct(x1y1) 55 | pred_x1y1 = self.kf1.predict() 56 | x0 = np.max([0, pred_x0y0[0][0]]).astype(np.int) 57 | x1 = np.min([img_shape[0], pred_x1y1[0][0]]).astype(np.int) 58 | y0 = np.max([0, pred_x0y0[1][0]]).astype(np.int) 59 | y1 = np.min([img_shape[1], pred_x1y1[1][0]]).astype(np.int) 60 | if x0 == x1 or y0 == y1: 61 | x0, y0, x1, y1 = self.prev_x0, self.prev_y0, self.prev_x1, self.prev_y1 62 | return x0, x1, y0, y1 63 | 64 | def _set_prev_coord(self, x0, x1, y0, y1): 65 | self.prev_x0 = x0 66 | self.prev_x1 = x1 67 | self.prev_y1 = y1 68 | self.prev_y0 = y0 69 | 70 | def _init_kalman_filters(self, noise_coef): 71 | self.kf0 = KalmanFilter(noise_coef=noise_coef) 72 | self.kf1 = KalmanFilter(noise_coef=noise_coef) 73 | 74 | def convert(self, input_fn, output_fn, options, duration=None): 75 | self.check_options(options) 76 | 77 | if options['use_kalman_filter']: 78 | self._init_kalman_filters(options["kf_noise_coef"]) 79 | 80 | self.frames = 0 81 | self.prev_x0 = self.prev_x1 = self.prev_y0 = self.prev_y1 = 0 82 | 83 | if self.fdetect is None: 84 | raise Exception(f"face detector has not been set through VideoConverter.set_face_detector() yet.") 85 | 86 | clip1 = VideoFileClip(input_fn) 87 | if type(duration) is tuple: 88 | clip = clip1.fl_image(lambda img: self.process_video(img, options)).subclip(duration[0], duration[1]) 89 | else: 90 | clip = clip1.fl_image(lambda img: self.process_video(img, options)) 91 | clip.write_videofile(output_fn, audio=True) 92 | clip1.reader.close() 93 | try: 94 | clip1.audio.reader.close_proc() 95 | except: 96 | pass 97 | 98 | def process_video(self, input_img, options): 99 | """Transform detected faces in single input frame.""" 100 | image = input_img 101 | 102 | # detect face using MTCNN (faces: face bbox coord, pnts: landmarks coord.) 103 | faces, pnts = self.fdetect.detect_face(image, minsize=20, 104 | threshold=options["detec_threshold"], 105 | factor=0.709, 106 | use_auto_downscaling=options["use_auto_downscaling"], 107 | min_face_area=options["min_face_area"] 108 | ) 109 | 110 | # check if any face detected 111 | if len(faces) == 0: 112 | comb_img = get_init_comb_img(input_img) 113 | triple_img = get_init_triple_img(input_img, no_face=True) 114 | 115 | # init. output image 116 | mask_map = get_init_mask_map(image) 117 | comb_img = get_init_comb_img(input_img) 118 | best_conf_score = 0 119 | 120 | # loop through all detected faces 121 | for i, (x0, y1, x1, y0, conf_score) in enumerate(faces): 122 | lms = pnts[:,i:i+1] 123 | # smoothe the bounding box 124 | if options["use_smoothed_bbox"]: 125 | if self.frames != 0 and conf_score >= best_conf_score: 126 | x0, x1, y0, y1 = self._get_smoothed_coord( 127 | x0, x1, y0, y1, 128 | img_shape=image.shape, 129 | use_kalman_filter=options["use_kalman_filter"], 130 | ratio=options["bbox_moving_avg_coef"], 131 | ) 132 | self._set_prev_coord(x0, x1, y0, y1) 133 | best_conf_score = conf_score 134 | self.frames += 1 135 | elif conf_score <= best_conf_score: 136 | self.frames += 1 137 | else: 138 | if conf_score >= best_conf_score: 139 | self._set_prev_coord(x0, x1, y0, y1) 140 | best_conf_score = conf_score 141 | if options["use_kalman_filter"]: 142 | for i in range(200): 143 | self.kf0.predict() 144 | self.kf1.predict() 145 | self.frames += 1 146 | 147 | # transform face 148 | try: 149 | # get detected face 150 | det_face_im = input_img[int(x0):int(x1),int(y0):int(y1),:] 151 | 152 | # get src/tar landmarks 153 | src_landmarks = get_src_landmarks(x0, x1, y0, y1, lms) 154 | tar_landmarks = get_tar_landmarks(det_face_im) 155 | 156 | # align detected face 157 | aligned_det_face_im = landmarks_match_mtcnn(det_face_im, src_landmarks, tar_landmarks) 158 | 159 | # face transform 160 | r_im, r_rgb, r_a = self.ftrans.transform( 161 | aligned_det_face_im, 162 | direction=options["direction"], 163 | roi_coverage=options["roi_coverage"], 164 | color_correction=options["use_color_correction"], 165 | IMAGE_SHAPE=options["IMAGE_SHAPE"] 166 | ) 167 | 168 | # reverse alignment 169 | rev_aligned_det_face_im = landmarks_match_mtcnn(r_im, tar_landmarks, src_landmarks) 170 | rev_aligned_det_face_im_rgb = landmarks_match_mtcnn(r_rgb, tar_landmarks, src_landmarks) 171 | rev_aligned_mask = landmarks_match_mtcnn(r_a, tar_landmarks, src_landmarks) 172 | 173 | # merge source face and transformed face 174 | result = np.zeros_like(det_face_im) 175 | result = rev_aligned_mask/255*rev_aligned_det_face_im_rgb + (1-rev_aligned_mask/255)*det_face_im 176 | result_a = rev_aligned_mask 177 | except: 178 | # catch exceptions for landmarks alignment errors (if any) 179 | print(f"Face alignment error occurs at frame {self.frames}.") 180 | # get detected face 181 | det_face_im = input_img[int(x0):int(x1),int(y0):int(y1),:] 182 | 183 | result, _, result_a = self.ftrans.transform( 184 | det_face_im, 185 | direction=options["direction"], 186 | roi_coverage=options["roi_coverage"], 187 | color_correction=options["use_color_correction"], 188 | IMAGE_SHAPE=options["IMAGE_SHAPE"] 189 | ) 190 | 191 | comb_img[int(x0):int(x1),input_img.shape[1]+int(y0):input_img.shape[1]+int(y1),:] = result 192 | 193 | # Enhance output 194 | if options["enhance"] != 0: 195 | comb_img = -1*options["enhance"] * get_init_comb_img(input_img) + (1+options["enhance"]) * comb_img 196 | comb_img = np.clip(comb_img, 0, 255) 197 | 198 | if conf_score >= best_conf_score: 199 | mask_map[int(x0):int(x1),int(y0):int(y1),:] = result_a 200 | mask_map = np.clip(mask_map + .15 * input_img, 0, 255) 201 | # Possible bug: when small faces are detected before the most confident face, 202 | # the mask_map will show brighter input_img 203 | else: 204 | mask_map[int(x0):int(x1),int(y0):int(y1),:] += result_a 205 | mask_map = np.clip(mask_map, 0, 255) 206 | 207 | triple_img = get_init_triple_img(input_img) 208 | triple_img[:, :input_img.shape[1]*2, :] = comb_img 209 | triple_img[:, input_img.shape[1]*2:, :] = mask_map 210 | 211 | if options["output_type"] == 1: 212 | return comb_img[:, input_img.shape[1]:, :] # return only result image 213 | elif options["output_type"] == 2: 214 | return comb_img # return input and result image combined as one 215 | elif options["output_type"] == 3: 216 | return triple_img #return input,result and mask heatmap image combined as one 217 | 218 | @staticmethod 219 | def check_options(options): 220 | if options["roi_coverage"] <= 0 or options["roi_coverage"] >= 1: 221 | raise ValueError(f"roi_coverage should be between 0 and 1 (exclusive).") 222 | if options["bbox_moving_avg_coef"] < 0 or options["bbox_moving_avg_coef"] > 1: 223 | raise ValueError(f"bbox_moving_avg_coef should be between 0 and 1 (inclusive).") 224 | if options["detec_threshold"] < 0 or options["detec_threshold"] > 1: 225 | raise ValueError(f"detec_threshold should be between 0 and 1 (inclusive).") 226 | if options["use_smoothed_bbox"] not in [True, False]: 227 | raise ValueError(f"use_smoothed_bbox should be a boolean.") 228 | if options["use_kalman_filter"] not in [True, False]: 229 | raise ValueError(f"use_kalman_filter should be a boolean.") 230 | if options["use_auto_downscaling"] not in [True, False]: 231 | raise ValueError(f"use_auto_downscaling should be a boolean.") 232 | if options["output_type"] not in range(1,4): 233 | ot = options["output_type"] 234 | raise ValueError(f"Received an unknown output_type option: {ot}.") 235 | -------------------------------------------------------------------------------- /notes/README.md: -------------------------------------------------------------------------------- 1 | # Notes: 2 | ## In this page are notes for my ongoing experiments and failed attmepts. 3 | ### 1. BatchNorm/InstanceNorm: 4 | Caused input/output skin color inconsistency when the 2 training dataset had different skin color dsitribution (light condition, shadow, etc.). But I wonder if this will be solved after further training the model. 5 | 6 | ### 2. Perceptual loss 7 | Increasing perceptual loss weighting factor (to 1) unstablized training. But the weihgting [.01, .1, .1] I used is not optimal either. 8 | 9 | ### 3. Bottleneck layers 10 | ~~In the encoder architecture, flattening Conv2D and shrinking it to Dense(1024) is crutial for model to learn semantic features, or face representation. If we used Conv layers only (which means larger dimension), will it learn features like visaul descriptors? ([source paper](https://arxiv.org/abs/1706.02932v2), last paragraph of sec 3.1)~~ Similar results can be achieved by replacing the Dense layer with Conv2D strides 2 layers (shrinking feature map to 1x1). 11 | 12 | ### 4. Transforming Emi Takei to Hinko Sano 13 | Transform Emi Takei to Hinko Sano gave suboptimal results, due to imbalanced training data that over 65% of images of Hinako Sano came from the same video series. 14 | 15 | ### 5. About mixup and LSGAN 16 | **Mixup** technique ([arXiv](https://arxiv.org/abs/1710.09412)) and **least squares loss** function are adopted ([arXiv](https://arxiv.org/abs/1712.06391)) for training GAN. However, I did not do any ablation experiment on them. Don't know how much impact they had on the outputs. 17 | 18 | ### 6. Adding landmarks as input feature 19 | Adding face landmarks as the fourth input channel during training (w/ dropout_chance=0.3) force the model to learn(overfit) these face features. However it didn't give me decernible improvement. The following gif is the result clip, it should be mentoined that the landmarks information was not provided during video making, but the model was still able to prodcue accurate landmarks because similar [face, landmarks] pairs are already shown to the model during training. 20 | - ![landamrks_gif](https://www.dropbox.com/s/ek8y5fued7irq1j/sh_test_clipped4_lms_comb.gif?raw=1) 21 | 22 | ### 7. **Recursive loop:** Feed model's output image as its input, **repeat N times**. 23 | - Idea: Since our model is able to transform source face into target face, if we feed generated fake target face as its input, will the model refine the fake face to be more like a real target face? 24 | - **Version 1 result (w/o alpha mask)** (left to right: source, N=0, N=2, N=10, N=50) 25 | - ![v1_recur](https://www.dropbox.com/s/hha2w2n4dh49a1k/v1_comb.gif?raw=1) 26 | - The model seems to refine the fake face (to be more similar with target face), but its shape and color go awry. Furthermore, in certain frames of N=50, **there are blue colors that only appear in target face training data but not source face.** Does this mean that the model is trying to pull out trainnig images it had memoried, or does the mdoel trying to transform the input image into a particular trainnig data? 27 | - **Version 2 result (w/ alpha mask)** (left to right: source, N=0, N=50, N=150, N=500) 28 | - ![v2_recur](https://www.dropbox.com/s/zfl8zjlfv2srysx/v2_comb.gif?raw=1) 29 | - V2 model is more robust. Almost generates the same result before/after applying recursive loop except some artifacts on the bangs. 30 | 31 | ### 8. **Code manipulation and interpolation**: 32 | - ![knn_codes](https://www.dropbox.com/s/a3o1cvqts83h4fl/knn_code_fit.jpg?raw=1) 33 | - Idea: Refine output face by adding infromation from training images that look like the input image. 34 | - KNN takes features extracted from ResNet50 model as its input. 35 | - Similar results can be achieved by simply weighted averaging input image with images retrieved by kNNs (instead of the code). 36 | - TODO: Implement **alphaGAN**, which integrates VAE that has a more representative latent space. 37 | 38 | ### 9. **CycleGAN experiment**: 39 | - ![cyckeGAN exp result](https://www.dropbox.com/s/rj7gi5yft6yw7ng/cycleGAN_exp.JPG?raw=1) 40 | - Top row: input images.; Bottom row: output images. 41 | - CycleGAN produces artifacts on output faces. Also, featuers are not consitent before/after transformation, e.g., bangs and skin tone. 42 | - ~~**CycleGAN with masking**: To be updated.~~ 43 | 44 | ### 10. **(Towards) One Model to Swap Them All** 45 | - Objective: Train a model that is capable of swapping any given face to Emma Watson. 46 | - `faceA` folder contains ~2k images of Emma Watson. 47 | - `faceB` folder contains ~200k images from celebA dataset. 48 | - Hacks: Add **domain adversaria loss** on embedidngs (from [XGAN](https://arxiv.org/abs/1711.05139) and [this ICCV GAN tutorial](https://youtu.be/uUUvieVxCMs?t=18m59s)). It encourages encoder to generate embbeding from two diffeernt domains to lie in the same subspace (assuming celebA dataset covers almost the true face image dsitribution). Also, heavy data augmentation (random channel shifting, random downsampling, etc.) is applied on face A to pervent overfitting. 49 | - Result: Model performed poorly on hard sample, e.g., man with beard. 50 | 51 | ### 11. **Face parts swapping as data augmentation** 52 | - ![](https://www.dropbox.com/s/1l9n1ple6ymxy8b/data_augm_flowchart.jpg?raw=1) 53 | - Swap only part of source face (mouth/nose/eyes) to target face, treating the swapped face as a augmented training data for source face. 54 | - For each source face image, a look-alike target face is retrieved by using knn (taking a averaegd feature map as input) for face part swapping. 55 | - Result: Unfortunately, the model also learns to generates artifacts as appear in augmented data, e.g., sharp edges around eyes/nose and weirdly warped face. The artifacts of augmented data are caused by non-perfect blending (due to false landmarks and bad perspective warping). 56 | 57 | ### 12. Neural style transfer as output refinement 58 | - Problem: The output resolution 64x64 is blurry and sometimes the skin tone does not match the target face. 59 | - Question: Is there any other way to refine the 64x64 output face so that it looks natural in, say, a 256x256 input image except increasing output resolution (which leads to much longer training time) or training a super resolution model? 60 | - Attempts: **Applied neural style transfer techniques as output refinement**. Hoping it can improve output quality and solve color mismatch without additional training of superRes model or increasing model resolution. 61 | - Method: We used implementation of neural style transfer from [titu1994/Neural-Style-Transfer](https://github.com/titu1994/Neural-Style-Transfer), [eridgd/WCT-TF](https://github.com/eridgd/WCT-TF), and [jonrei/tf-AdaIN](https://github.com/jonrei/tf-AdaIN). All repos provide pre-trained models. We fed swapped face (i.e., the output image of GAN model) as content image and input face as style image. 62 | - Results: Style transfer of Gatys et al. gave decent results but require long execution time (~1.5 min per 256x256 image on K80), thus not appplicable for video conversion. The "Universal Style Transfer via Feature Transforms" (WCT) and "Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization" (AdaIN) somehow failed to preserve the content information (perhaps I did not tune the params well). 63 | - Conclusion: **Using neural style transfer to improve output quality seems promising**, but we are not sure if it will benefit video quality w/o introducing jitter. Also the execution time is a problem, we should experiment with more arbitrary style transfer networks to see if there is any model that can do a good job on face refinement within one (or several) forward pass(es). 64 | - ![style_transfer_exp](https://www.dropbox.com/s/r00q5zxojxjofde/style_transfer_comp.png?raw=1) 65 | 66 | ### 13. Model evaluation on Trump/Cage dataset 67 | - Problem: GANs are hard to evaluate. Generally, Inception Score (IS) and Fréchet Inception Distance (FID score) are the most seen metrics for evaluating the output "reality" (i.e., how close the outputs are to real samples). However, in face-swapping task, we care more about the "quality" of the outputs such as how similar is the transformed output face to its target face. Thus we want to find an objective approach to evauate the model performance as a counter-part of subjectively judging by output visualization. 68 | - **Evaluation method 1: Compare the predicted identities of VGGFace-ResNet50.** 69 | - We look at the predictions of ResNet50 and check if it spits out similar predictions on real/fake images. 70 | - There are 8631 identities in VGGFace (but unfortunately both Donald Trump and Nicolas Cage are not in this dataset) 71 | - Top 3 most look-alike identities of "real Trump" are: Alan_Mulally, Jon_Voight, and Tom_Berenger 72 | - Top 3 most look-alike identities of "fake Trump" are: Alan_Mulally, Franjo_Pooth, and Jon_Voight 73 | - 74 | - Top 3 most look-alike identities of "real Cage" are: Jimmy_Stewart, Nick_Grimshaw, and Sylvester_Stallone 75 | - Top 3 most look-alike identities of "fake Cage" are: Franjo_Pooth, Jimmy_Stewart, and Bob_Beckel 76 | - 77 | - **Observation:** Overall, the top-1 look-alike identity of the real Trump/Cage also appear in the top-3 that of the fake one. (Notice that the face-swapping only changes the facial attributes, not the chins and face shape. Thus the fake faces will not look exactly the same with its target face.) 78 | - **Evaluation method 2: Compare the cosine similarity of extracted VGGFace-ResNet50 features.** 79 | - Features (embeddings) are extracted from the global average pooling layer (the last layer the before fully-connected layer) of ResNet50, which have diimension of 2048. 80 | - 81 | - 82 | - The definition of cosine distance can be found [here](https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cosine.html). The cosine similarity is just cosine distance w/o the one minus part. 83 | - The following 2 heatmaps depict the within-class cosine similarity of real Trump images and real Cage images. 84 | - 85 | - The following 2 heatmaps illustrate the cosine similarity between real/fake Trump images and between real/fake Cage images. It is obvious that the similarity is not as high as real samples but is still close enough (Note that the low similarity between real and fake Cage is caused by profile faces and heavily occluded faces in real Trump samples, which are hard for the faceswap model to transform.) 86 | - 87 | - We also checked the cosine similarity between real Trump and real Cage. And the result was not suprising: it shows low similarity between the two identites. This also supports the above observations that the swapped face is much look-alike its target face. 88 | - 89 | - **Observation:** Evaluation using ResNet50 features demonstrates clear indication that the swapped faces are very look-alike its target face. 90 | - **Conclusion:** Cosine similarity seems to be a good way to compare performance among different models on the same dataset. Hope this can accelerate our iterations for seaching optimal hyper-parameters and exploring model architectures. 91 | 92 | ### 14. 3D face reconstruction for output refinement 93 | - Using [PRNet](https://github.com/YadiraF/PRNet) and its accompanying [face-swapping script](https://github.com/YadiraF/PRNet/blob/master/demo_texture.py) to refine the output image. 94 | - **Result:** For extreme facial expressions, the mouth shape becomes more consistent after face texture editing. (The missing details can be restored through style transfer as shown in exp. 12 above.) 95 | - Left to right: Input, output, refined output 96 | - ![3dface01](https://www.dropbox.com/s/dwsj57za9tj127y/3dmodel_refine01.jpg?raw=1) 97 | - ![3dface02](https://www.dropbox.com/s/fn3sli0gtlb4y78/3dmodel_refine02.jpg?raw=1) 98 | - For occluded faces, their pose might not be correctly estimated, thus the refined outputs are likely to be distorted. e.g., the displaced microphone in the figure below. 99 | - ![3dface03](https://www.dropbox.com/s/oaui3vaavv7c9zw/3dmodel_refine03.jpg?raw=1) 100 | -------------------------------------------------------------------------------- /networks/faceswap_gan_model.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import * 3 | from keras.optimizers import Adam 4 | from .nn_blocks import * 5 | from .losses import * 6 | 7 | class FaceswapGANModel(): 8 | """ 9 | faceswap-GAN v2.2 model 10 | 11 | Attributes: 12 | arch_config: A dictionary that contains architecture configurations (details are described in train notebook). 13 | nc_G_inp: int, number of generator input channels 14 | nc_D_inp: int, number of discriminator input channels 15 | lrG: float, learning rate of the generator 16 | lrD: float, learning rate of the discriminator 17 | """ 18 | def __init__(self, **arch_config): 19 | self.nc_G_inp = 3 20 | self.nc_D_inp = 6 21 | self.IMAGE_SHAPE = arch_config['IMAGE_SHAPE'] 22 | self.lrD = 2e-4 23 | self.lrG = 1e-4 24 | self.use_self_attn = arch_config['use_self_attn'] 25 | self.norm = arch_config['norm'] 26 | self.model_capacity = arch_config['model_capacity'] 27 | self.enc_nc_out = 256 if self.model_capacity == "lite" else 512 28 | 29 | # define networks 30 | self.encoder = self.build_encoder(nc_in=self.nc_G_inp, 31 | input_size=self.IMAGE_SHAPE[0], 32 | use_self_attn=self.use_self_attn, 33 | norm=self.norm, 34 | model_capacity=self.model_capacity 35 | ) 36 | self.decoder_A = self.build_decoder(nc_in=self.enc_nc_out, 37 | input_size=8, 38 | output_size=self.IMAGE_SHAPE[0], 39 | use_self_attn=self.use_self_attn, 40 | norm=self.norm, 41 | model_capacity=self.model_capacity 42 | ) 43 | self.decoder_B = self.build_decoder(nc_in=self.enc_nc_out, 44 | input_size=8, 45 | output_size=self.IMAGE_SHAPE[0], 46 | use_self_attn=self.use_self_attn, 47 | norm=self.norm, 48 | model_capacity=self.model_capacity 49 | ) 50 | self.netDA = self.build_discriminator(nc_in=self.nc_D_inp, 51 | input_size=self.IMAGE_SHAPE[0], 52 | use_self_attn=self.use_self_attn, 53 | norm=self.norm 54 | ) 55 | self.netDB = self.build_discriminator(nc_in=self.nc_D_inp, 56 | input_size=self.IMAGE_SHAPE[0], 57 | use_self_attn=self.use_self_attn, 58 | norm=self.norm 59 | ) 60 | x = Input(shape=self.IMAGE_SHAPE) # dummy input tensor 61 | self.netGA = Model(x, self.decoder_A(self.encoder(x))) 62 | self.netGB = Model(x, self.decoder_B(self.encoder(x))) 63 | 64 | # define variables 65 | self.distorted_A, self.fake_A, self.mask_A, \ 66 | self.path_A, self.path_mask_A, self.path_abgr_A, self.path_bgr_A = self.define_variables(netG=self.netGA) 67 | self.distorted_B, self.fake_B, self.mask_B, \ 68 | self.path_B, self.path_mask_B, self.path_abgr_B, self.path_bgr_B = self.define_variables(netG=self.netGB) 69 | self.real_A = Input(shape=self.IMAGE_SHAPE) 70 | self.real_B = Input(shape=self.IMAGE_SHAPE) 71 | self.mask_eyes_A = Input(shape=self.IMAGE_SHAPE) 72 | self.mask_eyes_B = Input(shape=self.IMAGE_SHAPE) 73 | 74 | @staticmethod 75 | def build_encoder(nc_in=3, 76 | input_size=64, 77 | use_self_attn=True, 78 | norm='none', 79 | model_capacity='standard'): 80 | coef = 2 if model_capacity == "lite" else 1 81 | latent_dim = 2048 if (model_capacity == "lite" and input_size > 64) else 1024 82 | upscale_block = upscale_nn if model_capacity == "lite" else upscale_ps 83 | activ_map_size = input_size 84 | use_norm = False if (norm == 'none') else True 85 | 86 | inp = Input(shape=(input_size, input_size, nc_in)) 87 | x = Conv2D(64//coef, kernel_size=5, use_bias=False, padding="same")(inp) # use_bias should be True 88 | x = conv_block(x, 128//coef) 89 | x = conv_block(x, 256//coef, use_norm, norm=norm) 90 | x = self_attn_block(x, 256//coef) if use_self_attn else x 91 | x = conv_block(x, 512//coef, use_norm, norm=norm) 92 | x = self_attn_block(x, 512//coef) if use_self_attn else x 93 | x = conv_block(x, 1024//(coef**2), use_norm, norm=norm) 94 | 95 | activ_map_size = activ_map_size//16 96 | while (activ_map_size > 4): 97 | x = conv_block(x, 1024//(coef**2), use_norm, norm=norm) 98 | activ_map_size = activ_map_size//2 99 | 100 | x = Dense(latent_dim)(Flatten()(x)) 101 | x = Dense(4*4*1024//(coef**2))(x) 102 | x = Reshape((4, 4, 1024//(coef**2)))(x) 103 | out = upscale_block(x, 512//coef, use_norm, norm=norm) 104 | return Model(inputs=inp, outputs=out) 105 | 106 | @staticmethod 107 | def build_decoder(nc_in=512, 108 | input_size=8, 109 | output_size=64, 110 | use_self_attn=True, 111 | norm='none', 112 | model_capacity='standard'): 113 | coef = 2 if model_capacity == "lite" else 1 114 | upscale_block = upscale_nn if model_capacity == "lite" else upscale_ps 115 | activ_map_size = input_size 116 | use_norm = False if (norm == 'none') else True 117 | 118 | inp = Input(shape=(input_size, input_size, nc_in)) 119 | x = inp 120 | x = upscale_block(x, 256//coef, use_norm, norm=norm) 121 | x = upscale_block(x, 128//coef, use_norm, norm=norm) 122 | x = self_attn_block(x, 128//coef) if use_self_attn else x 123 | x = upscale_block(x, 64//coef, use_norm, norm=norm) 124 | x = res_block(x, 64//coef, norm=norm) 125 | x = self_attn_block(x, 64//coef) if use_self_attn else conv_block(x, 64//coef, strides=1) 126 | 127 | outputs = [] 128 | activ_map_size = activ_map_size * 8 129 | while (activ_map_size < output_size): 130 | outputs.append(Conv2D(3, kernel_size=5, padding='same', activation="tanh")(x)) 131 | x = upscale_block(x, 64//coef, use_norm, norm=norm) 132 | x = conv_block(x, 64//coef, strides=1) 133 | activ_map_size *= 2 134 | 135 | alpha = Conv2D(1, kernel_size=5, padding='same', activation="sigmoid")(x) 136 | bgr = Conv2D(3, kernel_size=5, padding='same', activation="tanh")(x) 137 | out = concatenate([alpha, bgr]) 138 | outputs.append(out) 139 | return Model(inp, outputs) 140 | 141 | @staticmethod 142 | def build_discriminator(nc_in, 143 | input_size=64, 144 | use_self_attn=True, 145 | norm='none'): 146 | activ_map_size = input_size 147 | use_norm = False if (norm == 'none') else True 148 | 149 | inp = Input(shape=(input_size, input_size, nc_in)) 150 | x = conv_block_d(inp, 64, False) 151 | x = conv_block_d(x, 128, use_norm, norm=norm) 152 | x = conv_block_d(x, 256, use_norm, norm=norm) 153 | x = self_attn_block(x, 256) if use_self_attn else x 154 | 155 | activ_map_size = activ_map_size//8 156 | while (activ_map_size > 8): 157 | x = conv_block_d(x, 256, use_norm, norm=norm) 158 | x = self_attn_block(x, 256) if use_self_attn else x 159 | activ_map_size = activ_map_size//2 160 | 161 | out = Conv2D(1, kernel_size=4, use_bias=False, padding="same")(x) # use_bias should be True 162 | return Model(inputs=[inp], outputs=out) 163 | 164 | @staticmethod 165 | def define_variables(netG): 166 | distorted_input = netG.inputs[0] 167 | fake_output = netG.outputs[-1] 168 | alpha = Lambda(lambda x: x[:,:,:, :1])(fake_output) 169 | bgr = Lambda(lambda x: x[:,:,:, 1:])(fake_output) 170 | 171 | masked_fake_output = alpha * bgr + (1-alpha) * distorted_input 172 | 173 | fn_generate = K.function([distorted_input], [masked_fake_output]) 174 | fn_mask = K.function([distorted_input], [concatenate([alpha, alpha, alpha])]) 175 | fn_abgr = K.function([distorted_input], [concatenate([alpha, bgr])]) 176 | fn_bgr = K.function([distorted_input], [bgr]) 177 | return distorted_input, fake_output, alpha, fn_generate, fn_mask, fn_abgr, fn_bgr 178 | 179 | def build_train_functions(self, loss_weights=None, **loss_config): 180 | assert loss_weights is not None, "loss weights are not provided." 181 | # Adversarial loss 182 | loss_DA, loss_adv_GA = adversarial_loss(self.netDA, self.real_A, self.fake_A, 183 | self.distorted_A, 184 | loss_config["gan_training"], 185 | **loss_weights) 186 | loss_DB, loss_adv_GB = adversarial_loss(self.netDB, self.real_B, self.fake_B, 187 | self.distorted_B, 188 | loss_config["gan_training"], 189 | **loss_weights) 190 | 191 | # Reconstruction loss 192 | loss_recon_GA = reconstruction_loss(self.real_A, self.fake_A, 193 | self.mask_eyes_A, self.netGA.outputs, 194 | **loss_weights) 195 | loss_recon_GB = reconstruction_loss(self.real_B, self.fake_B, 196 | self.mask_eyes_B, self.netGB.outputs, 197 | **loss_weights) 198 | 199 | # Edge loss 200 | loss_edge_GA = edge_loss(self.real_A, self.fake_A, self.mask_eyes_A, **loss_weights) 201 | loss_edge_GB = edge_loss(self.real_B, self.fake_B, self.mask_eyes_B, **loss_weights) 202 | 203 | if loss_config['use_PL']: 204 | loss_pl_GA = perceptual_loss(self.real_A, self.fake_A, self.distorted_A, 205 | self.mask_eyes_A, self.vggface_feats, **loss_weights) 206 | loss_pl_GB = perceptual_loss(self.real_B, self.fake_B, self.distorted_B, 207 | self.mask_eyes_B, self.vggface_feats, **loss_weights) 208 | else: 209 | loss_pl_GA = loss_pl_GB = K.zeros(1) 210 | 211 | loss_GA = loss_adv_GA + loss_recon_GA + loss_edge_GA + loss_pl_GA 212 | loss_GB = loss_adv_GB + loss_recon_GB + loss_edge_GB + loss_pl_GB 213 | 214 | # The following losses are rather trivial, thus their wegihts are fixed. 215 | # Cycle consistency loss 216 | if loss_config['use_cyclic_loss']: 217 | loss_GA += 10 * cyclic_loss(self.netGA, self.netGB, self.real_A) 218 | loss_GB += 10 * cyclic_loss(self.netGB, self.netGA, self.real_B) 219 | 220 | # Alpha mask loss 221 | if not loss_config['use_mask_hinge_loss']: 222 | loss_GA += 1e-2 * K.mean(K.abs(self.mask_A)) 223 | loss_GB += 1e-2 * K.mean(K.abs(self.mask_B)) 224 | else: 225 | loss_GA += 0.1 * K.mean(K.maximum(0., loss_config['m_mask'] - self.mask_A)) 226 | loss_GB += 0.1 * K.mean(K.maximum(0., loss_config['m_mask'] - self.mask_B)) 227 | 228 | # Alpha mask total variation loss 229 | loss_GA += 0.1 * K.mean(first_order(self.mask_A, axis=1)) 230 | loss_GA += 0.1 * K.mean(first_order(self.mask_A, axis=2)) 231 | loss_GB += 0.1 * K.mean(first_order(self.mask_B, axis=1)) 232 | loss_GB += 0.1 * K.mean(first_order(self.mask_B, axis=2)) 233 | 234 | # L2 weight decay 235 | # https://github.com/keras-team/keras/issues/2662 236 | for loss_tensor in self.netGA.losses: 237 | loss_GA += loss_tensor 238 | for loss_tensor in self.netGB.losses: 239 | loss_GB += loss_tensor 240 | for loss_tensor in self.netDA.losses: 241 | loss_DA += loss_tensor 242 | for loss_tensor in self.netDB.losses: 243 | loss_DB += loss_tensor 244 | 245 | weightsDA = self.netDA.trainable_weights 246 | weightsGA = self.netGA.trainable_weights 247 | weightsDB = self.netDB.trainable_weights 248 | weightsGB = self.netGB.trainable_weights 249 | 250 | # Define training functions 251 | # Adam(...).get_updates(...) 252 | training_updates = Adam(lr=self.lrD*loss_config['lr_factor'], beta_1=0.5).get_updates(weightsDA,[],loss_DA) 253 | self.netDA_train = K.function([self.distorted_A, self.real_A],[loss_DA], training_updates) 254 | training_updates = Adam(lr=self.lrG*loss_config['lr_factor'], beta_1=0.5).get_updates(weightsGA,[], loss_GA) 255 | self.netGA_train = K.function([self.distorted_A, self.real_A, self.mask_eyes_A], 256 | [loss_GA, loss_adv_GA, loss_recon_GA, loss_edge_GA, loss_pl_GA], 257 | training_updates) 258 | 259 | training_updates = Adam(lr=self.lrD*loss_config['lr_factor'], beta_1=0.5).get_updates(weightsDB,[],loss_DB) 260 | self.netDB_train = K.function([self.distorted_B, self.real_B],[loss_DB], training_updates) 261 | training_updates = Adam(lr=self.lrG*loss_config['lr_factor'], beta_1=0.5).get_updates(weightsGB,[], loss_GB) 262 | self.netGB_train = K.function([self.distorted_B, self.real_B, self.mask_eyes_B], 263 | [loss_GB, loss_adv_GB, loss_recon_GB, loss_edge_GB, loss_pl_GB], 264 | training_updates) 265 | 266 | def build_pl_model(self, vggface_model, before_activ=False): 267 | # Define Perceptual Loss Model 268 | vggface_model.trainable = False 269 | if before_activ == False: 270 | out_size112 = vggface_model.layers[1].output 271 | out_size55 = vggface_model.layers[36].output 272 | out_size28 = vggface_model.layers[78].output 273 | out_size7 = vggface_model.layers[-2].output 274 | else: 275 | out_size112 = vggface_model.layers[15].output # misnamed: the output size is 55 276 | out_size55 = vggface_model.layers[35].output 277 | out_size28 = vggface_model.layers[77].output 278 | out_size7 = vggface_model.layers[-3].output 279 | self.vggface_feats = Model(vggface_model.input, [out_size112, out_size55, out_size28, out_size7]) 280 | self.vggface_feats.trainable = False 281 | 282 | def load_weights(self, path="./models"): 283 | try: 284 | self.encoder.load_weights(f"{path}/encoder.h5") 285 | self.decoder_A.load_weights(f"{path}/decoder_A.h5") 286 | self.decoder_B.load_weights(f"{path}/decoder_B.h5") 287 | self.netDA.load_weights(f"{path}/netDA.h5") 288 | self.netDB.load_weights(f"{path}/netDB.h5") 289 | print ("Model weights files are successfully loaded.") 290 | except: 291 | print ("Error occurs during loading weights files.") 292 | pass 293 | 294 | def save_weights(self, path="./models"): 295 | try: 296 | self.encoder.save_weights(f"{path}/encoder.h5") 297 | self.decoder_A.save_weights(f"{path}/decoder_A.h5") 298 | self.decoder_B.save_weights(f"{path}/decoder_B.h5") 299 | self.netDA.save_weights(f"{path}/netDA.h5") 300 | self.netDB.save_weights(f"{path}/netDB.h5") 301 | print (f"Model weights files have been saved to {path}.") 302 | except: 303 | print ("Error occurs during saving weights.") 304 | pass 305 | 306 | def train_one_batch_G(self, data_A, data_B): 307 | if len(data_A) == 4 and len(data_B) == 4: 308 | _, warped_A, target_A, bm_eyes_A = data_A 309 | _, warped_B, target_B, bm_eyes_B = data_B 310 | elif len(data_A) == 3 and len(data_B) == 3: 311 | warped_A, target_A, bm_eyes_A = data_A 312 | warped_B, target_B, bm_eyes_B = data_B 313 | else: 314 | raise ValueError("Something's wrong with the input data generator.") 315 | errGA = self.netGA_train([warped_A, target_A, bm_eyes_A]) 316 | errGB = self.netGB_train([warped_B, target_B, bm_eyes_B]) 317 | return errGA, errGB 318 | 319 | def train_one_batch_D(self, data_A, data_B): 320 | if len(data_A) == 4 and len(data_B) == 4: 321 | _, warped_A, target_A, _ = data_A 322 | _, warped_B, target_B, _ = data_B 323 | elif len(data_A) == 3 and len(data_B) == 3: 324 | warped_A, target_A, _ = data_A 325 | warped_B, target_B, _ = data_B 326 | else: 327 | raise ValueError("Something's wrong with the input data generator.") 328 | errDA = self.netDA_train([warped_A, target_A]) 329 | errDB = self.netDB_train([warped_B, target_B]) 330 | return errDA, errDB 331 | 332 | def transform_A2B(self, img): 333 | return self.path_abgr_B([[img]]) 334 | 335 | def transform_B2A(self, img): 336 | return self.path_abgr_A([[img]]) -------------------------------------------------------------------------------- /legacy/FaceSwap_GAN_v2_test_img.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "# 1. Import packages" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [ 16 | { 17 | "name": "stderr", 18 | "output_type": "stream", 19 | "text": [ 20 | "Using TensorFlow backend.\n" 21 | ] 22 | } 23 | ], 24 | "source": [ 25 | "from keras.models import Sequential, Model\n", 26 | "from keras.layers import *\n", 27 | "from keras.layers.advanced_activations import LeakyReLU\n", 28 | "from keras.activations import relu\n", 29 | "from keras.initializers import RandomNormal\n", 30 | "from keras.applications import *\n", 31 | "import keras.backend as K\n", 32 | "from tensorflow.contrib.distributions import Beta\n", 33 | "import tensorflow as tf\n", 34 | "from keras.optimizers import Adam" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "from image_augmentation import random_transform\n", 44 | "from image_augmentation import random_warp\n", 45 | "from utils import get_image_paths, load_images, stack_images\n", 46 | "from pixel_shuffler import PixelShuffler" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import time\n", 56 | "import numpy as np\n", 57 | "from PIL import Image\n", 58 | "import cv2\n", 59 | "import glob\n", 60 | "from random import randint, shuffle\n", 61 | "from IPython.display import clear_output\n", 62 | "from IPython.display import display\n", 63 | "import matplotlib.pyplot as plt\n", 64 | "%matplotlib inline" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "\n", 72 | "# 4. Config\n", 73 | "\n", 74 | "mixup paper: https://arxiv.org/abs/1710.09412\n", 75 | "\n", 76 | "Default training data directories: `./faceA/` and `./faceB/`" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 4, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "K.set_learning_phase(0)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 5, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "channel_axis=-1\n", 95 | "channel_first = False" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 6, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "IMAGE_SHAPE = (64, 64, 3)\n", 105 | "nc_in = 3 # number of input channels of generators\n", 106 | "nc_D_inp = 6 # number of input channels of discriminators\n", 107 | "\n", 108 | "use_self_attn = False\n", 109 | "w_l2 = 1e-4 # weight decay\n", 110 | "\n", 111 | "batchSize = 8\n", 112 | "\n", 113 | "# Path of training images\n", 114 | "img_dirA = './faceA/*.*'\n", 115 | "img_dirB = './faceB/*.*'" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "\n", 123 | "# 5. Define models" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 7, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "class Scale(Layer):\n", 133 | " '''\n", 134 | " Code borrows from https://github.com/flyyufelix/cnn_finetune\n", 135 | " '''\n", 136 | " def __init__(self, weights=None, axis=-1, gamma_init='zero', **kwargs):\n", 137 | " self.axis = axis\n", 138 | " self.gamma_init = initializers.get(gamma_init)\n", 139 | " self.initial_weights = weights\n", 140 | " super(Scale, self).__init__(**kwargs)\n", 141 | "\n", 142 | " def build(self, input_shape):\n", 143 | " self.input_spec = [InputSpec(shape=input_shape)]\n", 144 | "\n", 145 | " # Compatibility with TensorFlow >= 1.0.0\n", 146 | " self.gamma = K.variable(self.gamma_init((1,)), name='{}_gamma'.format(self.name))\n", 147 | " self.trainable_weights = [self.gamma]\n", 148 | "\n", 149 | " if self.initial_weights is not None:\n", 150 | " self.set_weights(self.initial_weights)\n", 151 | " del self.initial_weights\n", 152 | "\n", 153 | " def call(self, x, mask=None):\n", 154 | " return self.gamma * x\n", 155 | "\n", 156 | " def get_config(self):\n", 157 | " config = {\"axis\": self.axis}\n", 158 | " base_config = super(Scale, self).get_config()\n", 159 | " return dict(list(base_config.items()) + list(config.items()))\n", 160 | "\n", 161 | "\n", 162 | "def self_attn_block(inp, nc):\n", 163 | " '''\n", 164 | " Code borrows from https://github.com/taki0112/Self-Attention-GAN-Tensorflow\n", 165 | " '''\n", 166 | " assert nc//8 > 0, f\"Input channels must be >= 8, but got nc={nc}\"\n", 167 | " x = inp\n", 168 | " shape_x = x.get_shape().as_list()\n", 169 | " \n", 170 | " f = Conv2D(nc//8, 1, kernel_initializer=conv_init)(x)\n", 171 | " g = Conv2D(nc//8, 1, kernel_initializer=conv_init)(x)\n", 172 | " h = Conv2D(nc, 1, kernel_initializer=conv_init)(x)\n", 173 | " \n", 174 | " shape_f = f.get_shape().as_list()\n", 175 | " shape_g = g.get_shape().as_list()\n", 176 | " shape_h = h.get_shape().as_list()\n", 177 | " flat_f = Reshape((-1, shape_f[-1]))(f)\n", 178 | " flat_g = Reshape((-1, shape_g[-1]))(g)\n", 179 | " flat_h = Reshape((-1, shape_h[-1]))(h) \n", 180 | " \n", 181 | " s = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_b=True))([flat_g, flat_f])\n", 182 | "\n", 183 | " beta = Softmax(axis=-1)(s)\n", 184 | " o = Lambda(lambda x: tf.matmul(x[0], x[1]))([beta, flat_h])\n", 185 | " o = Reshape(shape_x[1:])(o)\n", 186 | " o = Scale()(o)\n", 187 | " \n", 188 | " out = add([o, inp])\n", 189 | " return out\n", 190 | "\n", 191 | "def conv_block(input_tensor, f):\n", 192 | " x = input_tensor\n", 193 | " x = Conv2D(f, kernel_size=3, strides=2, kernel_regularizer=regularizers.l2(w_l2), \n", 194 | " kernel_initializer=conv_init, use_bias=False, padding=\"same\")(x)\n", 195 | " x = Activation(\"relu\")(x)\n", 196 | " return x\n", 197 | "\n", 198 | "def conv_block_d(input_tensor, f, use_instance_norm=False):\n", 199 | " x = input_tensor\n", 200 | " x = Conv2D(f, kernel_size=4, strides=2, kernel_regularizer=regularizers.l2(w_l2), \n", 201 | " kernel_initializer=conv_init, use_bias=False, padding=\"same\")(x)\n", 202 | " x = LeakyReLU(alpha=0.2)(x)\n", 203 | " return x\n", 204 | "\n", 205 | "def res_block(input_tensor, f):\n", 206 | " x = input_tensor\n", 207 | " x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), \n", 208 | " kernel_initializer=conv_init, use_bias=False, padding=\"same\")(x)\n", 209 | " x = LeakyReLU(alpha=0.2)(x)\n", 210 | " x = Conv2D(f, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), \n", 211 | " kernel_initializer=conv_init, use_bias=False, padding=\"same\")(x)\n", 212 | " x = add([x, input_tensor])\n", 213 | " x = LeakyReLU(alpha=0.2)(x)\n", 214 | " return x\n", 215 | "\n", 216 | "def upscale_ps(filters, use_norm=True):\n", 217 | " def block(x):\n", 218 | " x = Conv2D(filters*4, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), \n", 219 | " kernel_initializer=RandomNormal(0, 0.02), padding='same')(x)\n", 220 | " x = LeakyReLU(0.2)(x)\n", 221 | " x = PixelShuffler()(x)\n", 222 | " return x\n", 223 | " return block\n", 224 | "\n", 225 | "def Discriminator(nc_in, input_size=64):\n", 226 | " inp = Input(shape=(input_size, input_size, nc_in))\n", 227 | " #x = GaussianNoise(0.05)(inp)\n", 228 | " x = conv_block_d(inp, 64, False)\n", 229 | " x = conv_block_d(x, 128, False)\n", 230 | " x = self_attn_block(x, 128) if use_self_attn else x\n", 231 | " x = conv_block_d(x, 256, False)\n", 232 | " x = self_attn_block(x, 256) if use_self_attn else x\n", 233 | " out = Conv2D(1, kernel_size=4, kernel_initializer=conv_init, use_bias=False, padding=\"same\")(x) \n", 234 | " return Model(inputs=[inp], outputs=out)\n", 235 | "\n", 236 | "def Encoder(nc_in=3, input_size=64):\n", 237 | " inp = Input(shape=(input_size, input_size, nc_in))\n", 238 | " x = Conv2D(64, kernel_size=5, kernel_initializer=conv_init, use_bias=False, padding=\"same\")(inp)\n", 239 | " x = conv_block(x,128)\n", 240 | " x = conv_block(x,256)\n", 241 | " x = self_attn_block(x, 256) if use_self_attn else x\n", 242 | " x = conv_block(x,512) \n", 243 | " x = self_attn_block(x, 512) if use_self_attn else x\n", 244 | " x = conv_block(x,1024)\n", 245 | " x = Dense(1024)(Flatten()(x))\n", 246 | " x = Dense(4*4*1024)(x)\n", 247 | " x = Reshape((4, 4, 1024))(x)\n", 248 | " out = upscale_ps(512)(x)\n", 249 | " return Model(inputs=inp, outputs=out)\n", 250 | "\n", 251 | "def Decoder_ps(nc_in=512, input_size=8):\n", 252 | " input_ = Input(shape=(input_size, input_size, nc_in))\n", 253 | " x = input_\n", 254 | " x = upscale_ps(256)(x)\n", 255 | " x = upscale_ps(128)(x)\n", 256 | " x = self_attn_block(x, 128) if use_self_attn else x\n", 257 | " x = upscale_ps(64)(x)\n", 258 | " x = res_block(x, 64)\n", 259 | " x = self_attn_block(x, 64) if use_self_attn else x\n", 260 | " #x = Conv2D(4, kernel_size=5, padding='same')(x) \n", 261 | " alpha = Conv2D(1, kernel_size=5, padding='same', activation=\"sigmoid\")(x)\n", 262 | " rgb = Conv2D(3, kernel_size=5, padding='same', activation=\"tanh\")(x)\n", 263 | " out = concatenate([alpha, rgb])\n", 264 | " return Model(input_, out) " 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 8, 270 | "metadata": { 271 | "scrolled": true 272 | }, 273 | "outputs": [], 274 | "source": [ 275 | "encoder = Encoder()\n", 276 | "decoder_A = Decoder_ps()\n", 277 | "decoder_B = Decoder_ps()\n", 278 | "\n", 279 | "x = Input(shape=IMAGE_SHAPE)\n", 280 | "\n", 281 | "netGA = Model(x, decoder_A(encoder(x)))\n", 282 | "netGB = Model(x, decoder_B(encoder(x)))" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 9, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "netDA = Discriminator(nc_D_inp)\n", 292 | "netDB = Discriminator(nc_D_inp)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "\n", 300 | "# 6. Load Models" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 10, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "name": "stdout", 310 | "output_type": "stream", 311 | "text": [ 312 | "model loaded.\n" 313 | ] 314 | } 315 | ], 316 | "source": [ 317 | "try:\n", 318 | " encoder.load_weights(\"models/encoder.h5\")\n", 319 | " decoder_A.load_weights(\"models/decoder_A.h5\")\n", 320 | " decoder_B.load_weights(\"models/decoder_B.h5\")\n", 321 | " #netDA.load_weights(\"models/netDA.h5\") \n", 322 | " #netDB.load_weights(\"models/netDB.h5\") \n", 323 | " print (\"model loaded.\")\n", 324 | "except:\n", 325 | " print (\"Weights file not found.\")\n", 326 | " pass" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "\n", 334 | "# 7. Define Inputs/Outputs Variables\n", 335 | "\n", 336 | " distorted_A: A (batch_size, 64, 64, 3) tensor, input of generator_A (netGA).\n", 337 | " distorted_B: A (batch_size, 64, 64, 3) tensor, input of generator_B (netGB).\n", 338 | " fake_A: (batch_size, 64, 64, 3) tensor, output of generator_A (netGA).\n", 339 | " fake_B: (batch_size, 64, 64, 3) tensor, output of generator_B (netGB).\n", 340 | " mask_A: (batch_size, 64, 64, 1) tensor, mask output of generator_A (netGA).\n", 341 | " mask_B: (batch_size, 64, 64, 1) tensor, mask output of generator_B (netGB).\n", 342 | " path_A: A function that takes distorted_A as input and outputs fake_A.\n", 343 | " path_B: A function that takes distorted_B as input and outputs fake_B.\n", 344 | " path_mask_A: A function that takes distorted_A as input and outputs mask_A.\n", 345 | " path_mask_B: A function that takes distorted_B as input and outputs mask_B.\n", 346 | " path_abgr_A: A function that takes distorted_A as input and outputs concat([mask_A, fake_A]).\n", 347 | " path_abgr_B: A function that takes distorted_B as input and outputs concat([mask_B, fake_B]).\n", 348 | " real_A: A (batch_size, 64, 64, 3) tensor, target images for generator_A given input distorted_A.\n", 349 | " real_B: A (batch_size, 64, 64, 3) tensor, target images for generator_B given input distorted_B." 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 11, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "def cycle_variables(netG):\n", 359 | " distorted_input = netG.inputs[0]\n", 360 | " fake_output = netG.outputs[0]\n", 361 | " alpha = Lambda(lambda x: x[:,:,:, :1])(fake_output)\n", 362 | " rgb = Lambda(lambda x: x[:,:,:, 1:])(fake_output)\n", 363 | " \n", 364 | " masked_fake_output = alpha * rgb + (1-alpha) * distorted_input \n", 365 | "\n", 366 | " fn_generate = K.function([distorted_input], [masked_fake_output])\n", 367 | " fn_mask = K.function([distorted_input], [concatenate([alpha, alpha, alpha])])\n", 368 | " fn_abgr = K.function([distorted_input], [concatenate([alpha, rgb])])\n", 369 | " return distorted_input, fake_output, alpha, fn_generate, fn_mask, fn_abgr" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 12, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "distorted_A, fake_A, mask_A, path_A, path_mask_A, path_abgr_A = cycle_variables(netGA)\n", 379 | "distorted_B, fake_B, mask_B, path_B, path_mask_B, path_abgr_B = cycle_variables(netGB)\n", 380 | "real_A = Input(shape=IMAGE_SHAPE)\n", 381 | "real_B = Input(shape=IMAGE_SHAPE)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "\n", 396 | "# 11. Helper Function: face_swap()\n", 397 | "This function is provided for those who don't have enough VRAM to run dlib's CNN and GAN model at the same time.\n", 398 | "\n", 399 | " INPUTS:\n", 400 | " img: A RGB face image of any size.\n", 401 | " path_func: a function that is either path_abgr_A or path_abgr_B.\n", 402 | " OUPUTS:\n", 403 | " result_img: A RGB swapped face image after masking.\n", 404 | " result_mask: A single channel uint8 mask image." 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 33, 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "def swap_face(img, path_func):\n", 414 | " input_size = img.shape\n", 415 | " img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # generator expects BGR input \n", 416 | " ae_input = cv2.resize(img, (64,64))/255. * 2 - 1 \n", 417 | " \n", 418 | " result = np.squeeze(np.array([path_func([[ae_input]])]))\n", 419 | " result_a = result[:,:,0] * 255\n", 420 | " result_a = cv2.resize(result_a, (input_size[1],input_size[0]))[...,np.newaxis]\n", 421 | " result_bgr = np.clip( (result[:,:,1:] + 1) * 255 / 2, 0, 255)\n", 422 | " result_bgr = cv2.resize(result_bgr, (input_size[1],input_size[0]))\n", 423 | " result = (result_a/255 * result_bgr + (1 - result_a/255) * img).astype('uint8')\n", 424 | " \n", 425 | " result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB) \n", 426 | " result = cv2.resize(result, (input_size[1],input_size[0]))\n", 427 | " result_a = np.expand_dims(cv2.resize(result_a, (input_size[1],input_size[0])), axis=2)\n", 428 | " return result, result_a" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 34, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "whom2whom = \"BtoA\" # default trainsforming faceB to faceA\n", 438 | "\n", 439 | "if whom2whom is \"AtoB\":\n", 440 | " path_func = path_abgr_B\n", 441 | "elif whom2whom is \"BtoA\":\n", 442 | " path_func = path_abgr_A\n", 443 | "else:\n", 444 | " print (\"whom2whom should be either AtoB or BtoA\")" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 35, 450 | "metadata": {}, 451 | "outputs": [], 452 | "source": [ 453 | "input_img = plt.imread(\"./IMAGE_FILENAME.jpg\")" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [ 462 | "plt.imshow(input_img)" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": 37, 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [ 471 | "result_img, result_mask = swap_face(input_img, path_func)" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": null, 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [ 480 | "plt.imshow(result_img)" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": null, 486 | "metadata": {}, 487 | "outputs": [], 488 | "source": [ 489 | "plt.imshow(result_mask[:, :, 0]) # cmap='gray'" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": null, 495 | "metadata": {}, 496 | "outputs": [], 497 | "source": [] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": null, 502 | "metadata": {}, 503 | "outputs": [], 504 | "source": [] 505 | } 506 | ], 507 | "metadata": { 508 | "kernelspec": { 509 | "display_name": "Python 3", 510 | "language": "python", 511 | "name": "python3" 512 | }, 513 | "language_info": { 514 | "codemirror_mode": { 515 | "name": "ipython", 516 | "version": 3 517 | }, 518 | "file_extension": ".py", 519 | "mimetype": "text/x-python", 520 | "name": "python", 521 | "nbconvert_exporter": "python", 522 | "pygments_lexer": "ipython3", 523 | "version": "3.6.4" 524 | } 525 | }, 526 | "nbformat": 4, 527 | "nbformat_minor": 2 528 | } 529 | -------------------------------------------------------------------------------- /prep_binary_masks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Install face-alignment package\n", 8 | "Environment: Google colab" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": { 15 | "colab": { 16 | "autoexec": { 17 | "startup": false, 18 | "wait_interval": 0 19 | }, 20 | "base_uri": "https://localhost:8080/", 21 | "height": 139 22 | }, 23 | "colab_type": "code", 24 | "executionInfo": { 25 | "elapsed": 46673, 26 | "status": "ok", 27 | "timestamp": 1529151506883, 28 | "user": { 29 | "displayName": "Lu SA", 30 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", 31 | "userId": "109275333798683015269" 32 | }, 33 | "user_tz": -480 34 | }, 35 | "id": "NK7_yFjNV-wY", 36 | "outputId": "091fda26-6e40-4c9d-fd39-2d48cdd7d14f" 37 | }, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "Collecting torch\n", 44 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/69/43/380514bd9663f1bf708abeb359b8b48d3fabb1c8e95bb3427a980a064c57/torch-0.4.0-cp36-cp36m-manylinux1_x86_64.whl (484.0MB)\n", 45 | "\u001b[K 100% |████████████████████████████████| 484.0MB 24kB/s \n", 46 | "tcmalloc: large alloc 1073750016 bytes == 0x5c3f8000 @ 0x7fe09f1ac1c4 0x46d6a4 0x5fcbcc 0x4c494d 0x54f3c4 0x553aaf 0x54e4c8 0x54f4f6 0x553aaf 0x54efc1 0x54f24d 0x553aaf 0x54efc1 0x54f24d 0x553aaf 0x54efc1 0x54f24d 0x551ee0 0x54e4c8 0x54f4f6 0x553aaf 0x54efc1 0x54f24d 0x551ee0 0x54efc1 0x54f24d 0x551ee0 0x54e4c8 0x54f4f6 0x553aaf 0x54e4c8\n", 47 | "\u001b[?25hInstalling collected packages: torch\n", 48 | "Successfully installed torch-0.4.0\n" 49 | ] 50 | } 51 | ], 52 | "source": [ 53 | "# Update pyorch to 0.4\n", 54 | "#!pip install --upgrade torch" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 2, 60 | "metadata": { 61 | "colab": { 62 | "autoexec": { 63 | "startup": false, 64 | "wait_interval": 0 65 | }, 66 | "base_uri": "https://localhost:8080/", 67 | "height": 204 68 | }, 69 | "colab_type": "code", 70 | "executionInfo": { 71 | "elapsed": 6783, 72 | "status": "ok", 73 | "timestamp": 1529151513699, 74 | "user": { 75 | "displayName": "Lu SA", 76 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", 77 | "userId": "109275333798683015269" 78 | }, 79 | "user_tz": -480 80 | }, 81 | "id": "n79ip5KbWH0s", 82 | "outputId": "34fd0b04-52d3-45a9-a4e8-feb48013d5b2" 83 | }, 84 | "outputs": [ 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | "Initialized empty Git repository in /content/.git/\n", 90 | "remote: Counting objects: 277, done.\u001b[K\n", 91 | "remote: Compressing objects: 100% (14/14), done.\u001b[K\n", 92 | "remote: Total 277 (delta 1), reused 3 (delta 0), pack-reused 263\u001b[K\n", 93 | "Receiving objects: 100% (277/277), 3.46 MiB | 13.68 MiB/s, done.\n", 94 | "Resolving deltas: 100% (156/156), done.\n", 95 | "From https://github.com/1adrianb/face-alignment\n", 96 | " * [new branch] master -> origin/master\n", 97 | " * [new tag] v1.0.0 -> v1.0.0\n", 98 | "Branch master set up to track remote branch master from origin.\n", 99 | "Already on 'master'\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "#!git init .\n", 105 | "#!git remote add origin https://github.com/1adrianb/face-alignment.git\n", 106 | "#!git fetch origin\n", 107 | "#!git checkout master" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 0, 113 | "metadata": { 114 | "colab": { 115 | "autoexec": { 116 | "startup": false, 117 | "wait_interval": 0 118 | } 119 | }, 120 | "colab_type": "code", 121 | "id": "48vu2IoAXNcU" 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "#%%capture\n", 126 | "#!apt update\n", 127 | "#!apt install -y cmake" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "**Install dlib (CUDA enabled or CPU version)**\n", 135 | "\n", 136 | "CUDA enabled dlib" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "#%%capture\n", 146 | "#!git clone https://github.com/davisking/dlib.git dlib/\n", 147 | "#cd dlib/\n", 148 | "#!python setup.py install –yes USE_AVX_INSTRUCTIONS –yes DLIB_USE_CUDA" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "dlib w/o CUDA" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "# CPU dlib\n", 165 | "#!pip install dlib" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 13, 171 | "metadata": { 172 | "colab": { 173 | "autoexec": { 174 | "startup": false, 175 | "wait_interval": 0 176 | }, 177 | "base_uri": "https://localhost:8080/", 178 | "height": 71 179 | }, 180 | "colab_type": "code", 181 | "executionInfo": { 182 | "elapsed": 1784, 183 | "status": "ok", 184 | "timestamp": 1529131895182, 185 | "user": { 186 | "displayName": "Lu SA", 187 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", 188 | "userId": "109275333798683015269" 189 | }, 190 | "user_tz": -480 191 | }, 192 | "id": "c8b776O8WH5_", 193 | "outputId": "1adbfb1a-1399-4396-c05c-8685573ee25a" 194 | }, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | "datalab examples\t README.md\t setup.cfg tox.ini\r\n", 201 | "Dockerfile face_alignment README.rst\t setup.py\r\n", 202 | "docs\t LICENSE\t requirements.txt test\r\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "#!ls" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 3, 213 | "metadata": { 214 | "colab": { 215 | "autoexec": { 216 | "startup": false, 217 | "wait_interval": 0 218 | }, 219 | "base_uri": "https://localhost:8080/", 220 | "height": 35 221 | }, 222 | "colab_type": "code", 223 | "executionInfo": { 224 | "elapsed": 7848, 225 | "status": "ok", 226 | "timestamp": 1529131778373, 227 | "user": { 228 | "displayName": "Lu SA", 229 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", 230 | "userId": "109275333798683015269" 231 | }, 232 | "user_tz": -480 233 | }, 234 | "id": "HLqaUVyQWH3V", 235 | "outputId": "3c814038-afca-4934-bd40-c2e7b6a344e5" 236 | }, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "'0.4.0'" 242 | ] 243 | }, 244 | "execution_count": 3, 245 | "metadata": { 246 | "tags": [] 247 | }, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "#import torch\n", 253 | "#torch.__version__" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 0, 259 | "metadata": { 260 | "colab": { 261 | "autoexec": { 262 | "startup": false, 263 | "wait_interval": 0 264 | } 265 | }, 266 | "colab_type": "code", 267 | "id": "QNdDVGxkblKX" 268 | }, 269 | "outputs": [], 270 | "source": [] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": { 275 | "colab_type": "text", 276 | "id": "RT_PlazfbldI" 277 | }, 278 | "source": [ 279 | "# Generate binary masks\n", 280 | "\n", 281 | "```bash\n", 282 | " Inputs:\n", 283 | " Images from ./faceA and ./faceB\n", 284 | " Outputs:\n", 285 | " Eyes binary masks, which are saved to ./binary_masks/faceA_eyes and ./binary_masks/faceB_eyes respectively\n", 286 | "```" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 0, 292 | "metadata": { 293 | "colab": { 294 | "autoexec": { 295 | "startup": false, 296 | "wait_interval": 0 297 | } 298 | }, 299 | "colab_type": "code", 300 | "id": "zD5XgAEJbkjS" 301 | }, 302 | "outputs": [], 303 | "source": [ 304 | "import face_alignment" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 0, 310 | "metadata": { 311 | "colab": { 312 | "autoexec": { 313 | "startup": false, 314 | "wait_interval": 0 315 | } 316 | }, 317 | "colab_type": "code", 318 | "id": "hmSj4zaXXz5W" 319 | }, 320 | "outputs": [], 321 | "source": [ 322 | "import cv2\n", 323 | "import numpy as np\n", 324 | "from glob import glob\n", 325 | "from pathlib import PurePath, Path\n", 326 | "from matplotlib import pyplot as plt\n", 327 | "%matplotlib inline" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 0, 333 | "metadata": { 334 | "colab": { 335 | "autoexec": { 336 | "startup": false, 337 | "wait_interval": 0 338 | } 339 | }, 340 | "colab_type": "code", 341 | "id": "34x3mp7wb2bR" 342 | }, 343 | "outputs": [], 344 | "source": [ 345 | "dir_faceA = \"./faceA\"\n", 346 | "dir_faceB = \"./faceB\"\n", 347 | "dir_bm_faceA_eyes = \"./binary_masks/faceA_eyes\"\n", 348 | "dir_bm_faceB_eyes = \"./binary_masks/faceB_eyes\"" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": 0, 354 | "metadata": { 355 | "colab": { 356 | "autoexec": { 357 | "startup": false, 358 | "wait_interval": 0 359 | } 360 | }, 361 | "colab_type": "code", 362 | "id": "17GYV20FXz78" 363 | }, 364 | "outputs": [], 365 | "source": [ 366 | "fns_faceA = glob(f\"{dir_faceA}/*.*\")\n", 367 | "fns_faceB = glob(f\"{dir_faceB}/*.*\")" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 11, 373 | "metadata": { 374 | "colab": { 375 | "autoexec": { 376 | "startup": false, 377 | "wait_interval": 0 378 | }, 379 | "base_uri": "https://localhost:8080/", 380 | "height": 34 381 | }, 382 | "colab_type": "code", 383 | "executionInfo": { 384 | "elapsed": 11767, 385 | "status": "ok", 386 | "timestamp": 1529152245581, 387 | "user": { 388 | "displayName": "Lu SA", 389 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", 390 | "userId": "109275333798683015269" 391 | }, 392 | "user_tz": -480 393 | }, 394 | "id": "fwD2DVvQcEMw", 395 | "outputId": "698867a7-2b85-4703-cdd2-fa7531bc6b52" 396 | }, 397 | "outputs": [ 398 | { 399 | "name": "stdout", 400 | "output_type": "stream", 401 | "text": [ 402 | "Downloading the Face Alignment Network(FAN). Please wait...\n" 403 | ] 404 | } 405 | ], 406 | "source": [ 407 | "fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, enable_cuda=True, flip_input=False)" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 0, 413 | "metadata": { 414 | "colab": { 415 | "autoexec": { 416 | "startup": false, 417 | "wait_interval": 0 418 | } 419 | }, 420 | "colab_type": "code", 421 | "id": "cYCCJK1ehpNi" 422 | }, 423 | "outputs": [], 424 | "source": [ 425 | "# !mkdir -p binary_masks/faceA_eyes\n", 426 | "Path(f\"binary_masks/faceA_eyes\").mkdir(parents=True, exist_ok=True)\n", 427 | "# !mkdir -p binary_masks/faceB_eyes\n", 428 | "Path(f\"binary_masks/faceB_eyes\").mkdir(parents=True, exist_ok=True)" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": { 435 | "colab": { 436 | "autoexec": { 437 | "startup": false, 438 | "wait_interval": 0 439 | }, 440 | "base_uri": "https://localhost:8080/", 441 | "height": 2397 442 | }, 443 | "colab_type": "code", 444 | "executionInfo": { 445 | "elapsed": 121564, 446 | "status": "ok", 447 | "timestamp": 1529152370522, 448 | "user": { 449 | "displayName": "Lu SA", 450 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", 451 | "userId": "109275333798683015269" 452 | }, 453 | "user_tz": -480 454 | }, 455 | "id": "iMZHXBmzcEUY", 456 | "outputId": "2dc0737f-6990-40db-adb9-c5e2baf49ae9" 457 | }, 458 | "outputs": [], 459 | "source": [ 460 | "fns_face_not_detected = []\n", 461 | "\n", 462 | "for idx, fns in enumerate([fns_faceA, fns_faceB]):\n", 463 | " if idx == 0:\n", 464 | " save_path = dir_bm_faceA_eyes\n", 465 | " elif idx == 1:\n", 466 | " save_path = dir_bm_faceB_eyes \n", 467 | " \n", 468 | " # create binary mask for each training image\n", 469 | " for fn in fns:\n", 470 | " raw_fn = PurePath(fn).parts[-1]\n", 471 | "\n", 472 | " x = plt.imread(fn)\n", 473 | " x = cv2.resize(x, (256,256))\n", 474 | " preds = fa.get_landmarks(x)\n", 475 | " \n", 476 | " if preds is not None:\n", 477 | " preds = preds[0]\n", 478 | " mask = np.zeros_like(x)\n", 479 | " \n", 480 | " # Draw right eye binary mask\n", 481 | " pnts_right = [(preds[i,0],preds[i,1]) for i in range(36,42)]\n", 482 | " hull = cv2.convexHull(np.array(pnts_right)).astype(np.int32)\n", 483 | " mask = cv2.drawContours(mask,[hull],0,(255,255,255),-1)\n", 484 | "\n", 485 | " # Draw left eye binary mask\n", 486 | " pnts_left = [(preds[i,0],preds[i,1]) for i in range(42,48)]\n", 487 | " hull = cv2.convexHull(np.array(pnts_left)).astype(np.int32)\n", 488 | " mask = cv2.drawContours(mask,[hull],0,(255,255,255),-1)\n", 489 | "\n", 490 | " # Draw mouth binary mask\n", 491 | " #pnts_mouth = [(preds[i,0],preds[i,1]) for i in range(48,60)]\n", 492 | " #hull = cv2.convexHull(np.array(pnts_mouth)).astype(np.int32)\n", 493 | " #mask = cv2.drawContours(mask,[hull],0,(255,255,255),-1)\n", 494 | " \n", 495 | " mask = cv2.dilate(mask, np.ones((13,13), np.uint8), iterations=1)\n", 496 | " mask = cv2.GaussianBlur(mask, (7,7), 0)\n", 497 | " \n", 498 | " else:\n", 499 | " mask = np.zeros_like(x)\n", 500 | " print(f\"No faces were detected in image '{fn}''\")\n", 501 | " fns_face_not_detected.append(fn)\n", 502 | " \n", 503 | " plt.imsave(fname=f\"{save_path}/{raw_fn}\", arr=mask, format=\"jpg\")" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 14, 509 | "metadata": { 510 | "colab": { 511 | "autoexec": { 512 | "startup": false, 513 | "wait_interval": 0 514 | }, 515 | "base_uri": "https://localhost:8080/", 516 | "height": 51 517 | }, 518 | "colab_type": "code", 519 | "executionInfo": { 520 | "elapsed": 559, 521 | "status": "ok", 522 | "timestamp": 1529152371122, 523 | "user": { 524 | "displayName": "Lu SA", 525 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", 526 | "userId": "109275333798683015269" 527 | }, 528 | "user_tz": -480 529 | }, 530 | "id": "uXgooPybq7PG", 531 | "outputId": "362246e2-9b58-487c-cb4e-99ce69136eda" 532 | }, 533 | "outputs": [ 534 | { 535 | "name": "stdout", 536 | "output_type": "stream", 537 | "text": [ 538 | "Nuber of processed images: 694\n", 539 | "Number of image(s) with no face detected: 70\n" 540 | ] 541 | } 542 | ], 543 | "source": [ 544 | "num_faceA = len(glob(dir_faceA+\"/*.*\"))\n", 545 | "num_faceB = len(glob(dir_faceB+\"/*.*\"))\n", 546 | "\n", 547 | "print(\"Nuber of processed images: \"+ str(num_faceA + num_faceB))\n", 548 | "print(\"Number of image(s) with no face detected: \" + str(len(fns_face_not_detected)))" 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "metadata": { 554 | "colab_type": "text", 555 | "id": "uRKzF42Wy0Ba" 556 | }, 557 | "source": [ 558 | "# Randomly diaplay a face image and its result binary mask" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": { 565 | "colab": { 566 | "autoexec": { 567 | "startup": false, 568 | "wait_interval": 0 569 | }, 570 | "base_uri": "https://localhost:8080/", 571 | "height": 316 572 | }, 573 | "colab_type": "code", 574 | "executionInfo": { 575 | "elapsed": 1080, 576 | "status": "ok", 577 | "timestamp": 1529139911130, 578 | "user": { 579 | "displayName": "Lu SA", 580 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", 581 | "userId": "109275333798683015269" 582 | }, 583 | "user_tz": -480 584 | }, 585 | "id": "Ja4aOHNay0HH", 586 | "outputId": "c6e1b3d4-d427-4267-a33b-efc8882373e3" 587 | }, 588 | "outputs": [], 589 | "source": [ 590 | "face = np.random.choice([\"A\",\"B\"])\n", 591 | "\n", 592 | "dir_face = dir_faceA if face == \"A\" else dir_faceB\n", 593 | "fns_face = fns_faceA if face == \"A\" else fns_faceB\n", 594 | "num_face = len(glob(dir_face+\"/*.*\"))\n", 595 | "rand_idx = np.random.randint(num_face)\n", 596 | "rand_fn = fns_face[rand_idx]\n", 597 | "raw_fn = PurePath(rand_fn).parts[-1]\n", 598 | "mask_fn = f\"{dir_bm_faceA_eyes}/{raw_fn}\" if face == \"A\" else f\"{dir_bm_faceB_eyes}/{raw_fn}\"\n", 599 | "im = plt.imread(rand_fn)\n", 600 | "mask = plt.imread(mask_fn)\n", 601 | "\n", 602 | "if rand_fn in fns_face_not_detected:\n", 603 | " print(\"========== No faces were detected in this image! ==========\")\n", 604 | "\n", 605 | "fig = plt.figure(figsize=(15,6))\n", 606 | "plt.subplot(1,3,1)\n", 607 | "plt.grid('off')\n", 608 | "plt.imshow(im)\n", 609 | "plt.subplot(1,3,2)\n", 610 | "plt.grid('off')\n", 611 | "plt.imshow(mask)\n", 612 | "plt.subplot(1,3,3)\n", 613 | "plt.grid('off')\n", 614 | "plt.imshow((mask/255*im).astype(np.uint8))\n", 615 | "\n", 616 | "#fa.get_landmarks(x)" 617 | ] 618 | }, 619 | { 620 | "cell_type": "markdown", 621 | "metadata": { 622 | "colab_type": "text", 623 | "id": "X6c22xxGsVQR" 624 | }, 625 | "source": [ 626 | "# Randomly diaplay an image that has no face detected" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": null, 632 | "metadata": { 633 | "colab": { 634 | "autoexec": { 635 | "startup": false, 636 | "wait_interval": 0 637 | }, 638 | "base_uri": "https://localhost:8080/", 639 | "height": 287 640 | }, 641 | "colab_type": "code", 642 | "executionInfo": { 643 | "elapsed": 697, 644 | "status": "ok", 645 | "timestamp": 1529139782680, 646 | "user": { 647 | "displayName": "Lu SA", 648 | "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", 649 | "userId": "109275333798683015269" 650 | }, 651 | "user_tz": -480 652 | }, 653 | "id": "aAgOLJ_WowlK", 654 | "outputId": "00138071-ab9e-4c91-aeb6-67d253c81cf5" 655 | }, 656 | "outputs": [], 657 | "source": [ 658 | "num_no_face_img = len(fns_face_not_detected)\n", 659 | "rand_idx = np.random.randint(num_no_face_img)\n", 660 | "x = plt.imread(fns_face_not_detected[rand_idx])\n", 661 | "#x = cv2.resize(x, (256,256))\n", 662 | "\n", 663 | "plt.grid('off')\n", 664 | "plt.imshow(x)\n", 665 | "\n", 666 | "#fa.get_landmarks(x)" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": null, 672 | "metadata": {}, 673 | "outputs": [], 674 | "source": [] 675 | } 676 | ], 677 | "metadata": { 678 | "accelerator": "GPU", 679 | "colab": { 680 | "collapsed_sections": [], 681 | "default_view": {}, 682 | "name": "make_binary_masks.ipynb", 683 | "provenance": [], 684 | "version": "0.3.2", 685 | "views": {} 686 | }, 687 | "kernelspec": { 688 | "display_name": "Python 3", 689 | "language": "python", 690 | "name": "python3" 691 | }, 692 | "language_info": { 693 | "codemirror_mode": { 694 | "name": "ipython", 695 | "version": 3 696 | }, 697 | "file_extension": ".py", 698 | "mimetype": "text/x-python", 699 | "name": "python", 700 | "nbconvert_exporter": "python", 701 | "pygments_lexer": "ipython3", 702 | "version": "3.6.4" 703 | } 704 | }, 705 | "nbformat": 4, 706 | "nbformat_minor": 1 707 | } 708 | --------------------------------------------------------------------------------