├── _config.yml ├── output_compressed.avi ├── README.md └── autoencoder.py /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /output_compressed.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParmuSingh/face_swap/HEAD/output_compressed.avi -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # face_swap 2 | face swap for siraj coding challange. 3 | 4 | I'm sorry I have put everything in one file. To test, just put n_epochs = 0. 5 | 6 | Please see output video. I cannot provide the saved model or the data cause I cannot disclose Siraj's face images. 7 | 8 | - Someone please add de-convolution layers. 9 | - It needs more training and a way more diverse dataset. I'll be trying to upgrade it. For now, I've uploaded this for Siraj Raval's coding challange. 10 | - It seems like most outputs are the same. I don't know why this is happening. It probably just needs a diverse dataset. 11 | -------------------------------------------------------------------------------- /autoencoder.py: -------------------------------------------------------------------------------- 1 | ''' 2 | epochs completed: 150 + 50 + 150 + 50 + 100 3 | 4 | Please use your path where necessary. 5 | ''' 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | from PIL import Image 10 | 11 | base_path1 = "E:/workspace_py/datasets/Siraj images/processed/" 12 | base_path2 = "E:/workspace_py/datasets/Parmu images/" 13 | 14 | data = [] 15 | parmu_data = [] 16 | 17 | # Getting the data. Data_parmu stores my (subject) images and data stores output faces. 18 | for i in range(503): 19 | path = base_path1 + str(i) + '.png' 20 | try: 21 | img = Image.open(path) 22 | img = np.asarray(img) 23 | img = np.resize(img, [112*112]) 24 | data.append(img) 25 | except: 26 | print("err") 27 | continue 28 | path = base_path2 + "Parmu" + str(i) + '.png' 29 | 30 | try: 31 | img = Image.open(path) 32 | img = np.asarray(img) 33 | img = np.resize(img, [112*112]) 34 | parmu_data.append(img) 35 | except: 36 | continue 37 | 38 | 39 | # Model 40 | 41 | n_epochs = 0 42 | n_examples = 475 43 | batch_size = 1 44 | 45 | data_ph = tf.placeholder('float', [None, 112*112], name = 'data_ph') 46 | output_ph = tf.placeholder('float', [None, 112*112], name = 'output_ph') 47 | learning_rate = tf.placeholder('float', [], name = 'learning_rate_ph') # for variable lr. 48 | 49 | def conv2d(x, W): # convolution layer. 50 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 51 | 52 | def maxpool2d(x): # max pooling layer. 53 | return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') 54 | 55 | # weights and biases: 56 | weights_encoder = { 57 | 'w_conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])), # [filter_h, filter_w, in_channels, n_filters] 58 | 'w_conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])), # [filter_h, filter_w, in_channels, n_filters] 59 | 'w_conv3': tf.Variable(tf.random_normal([5, 5, 64, 128])), 60 | 'w_fc': tf.Variable(tf.random_normal([25088, 500])), 61 | 'w_out': tf.Variable(tf.random_normal([500, 200])) 62 | } 63 | 64 | biases_encoder = { 65 | 'b_conv1': tf.Variable(tf.random_normal([32])), 66 | 'b_conv2': tf.Variable(tf.random_normal([64])), 67 | 'b_conv3': tf.Variable(tf.random_normal([128])), 68 | 'b_fc': tf.Variable(tf.random_normal([500])), 69 | 'b_out': tf.Variable(tf.random_normal([200])) 70 | } 71 | 72 | 73 | weights_decoder_A = { 74 | 'hl1': tf.Variable(tf.random_normal([200, 300])), 75 | 'hl2': tf.Variable(tf.random_normal([300, 500])), 76 | 'hl3': tf.Variable(tf.random_normal([500, 700])), # middle layer 77 | 'ol': tf.Variable(tf.random_normal([700, 112*112])), 78 | # 'hl5': tf.Variable(tf.random_normal([250, 500])), 79 | # 'ol': tf.Variable(tf.random_normal([500, 256*256])) 80 | } 81 | 82 | biases_decoder_A = { 83 | 'hl1': tf.Variable(tf.random_normal([300])), 84 | 'hl2': tf.Variable(tf.random_normal([500])), 85 | 'hl3': tf.Variable(tf.random_normal([700])), 86 | 'ol': tf.Variable(tf.random_normal([112*112])), 87 | # 'hl5': tf.Variable(tf.random_normal([500])), 88 | # 'ol': tf.Variable(tf.random_normal([256*256])) 89 | } 90 | 91 | weights_decoder_B = { 92 | 'hl1': tf.Variable(tf.random_normal([200, 300])), 93 | 'hl2': tf.Variable(tf.random_normal([300, 500])), 94 | 'hl3': tf.Variable(tf.random_normal([500, 700])), # middle layer 95 | 'ol': tf.Variable(tf.random_normal([700, 112*112])), 96 | # 'hl5': tf.Variable(tf.random_normal([250, 500])), 97 | # 'ol': tf.Variable(tf.random_normal([500, 256*256])) 98 | } 99 | 100 | biases_decoder_B = { 101 | 'hl1': tf.Variable(tf.random_normal([300])), 102 | 'hl2': tf.Variable(tf.random_normal([500])), 103 | 'hl3': tf.Variable(tf.random_normal([700])), 104 | 'ol': tf.Variable(tf.random_normal([112*112])), 105 | # 'hl5': tf.Variable(tf.random_normal([500])), 106 | # 'ol': tf.Variable(tf.random_normal([256*256])) 107 | } 108 | 109 | def encoder(x): # encoder 110 | 111 | global weights_encoder 112 | global biases_encoder 113 | x = tf.reshape(x, [-1, 112, 112, 1]) 114 | 115 | conv1 = maxpool2d(conv2d(x, weights_encoder['w_conv1'])) 116 | conv2 = maxpool2d(conv2d(conv1, weights_encoder['w_conv2'])) 117 | conv3 = maxpool2d(conv2d(conv2, weights_encoder['w_conv3'])) 118 | fc = tf.reshape(conv3, [-1, 25088]) 119 | fc = tf.nn.relu(tf.add(tf.matmul(fc, weights_encoder['w_fc']), biases_encoder['b_fc'])) 120 | ol = tf.nn.relu(tf.add(tf.matmul(fc, weights_encoder['w_out']), biases_encoder['b_out'])) 121 | 122 | return ol 123 | 124 | def decoder_A(x): # DecoderA. It generates output faces. Someone make them de-convolution layers. 125 | 126 | global weights_decoder_A 127 | global weights_decoder_A 128 | 129 | hl1 = tf.nn.relu(tf.add(tf.matmul(x, weights_decoder_A['hl1']), biases_decoder_B['hl1']), name = 'hl1') 130 | hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights_decoder_A['hl2']), biases_decoder_A['hl2']), name = 'hl2') 131 | hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights_decoder_A['hl3']), biases_decoder_A['hl3']), name = 'hl3') 132 | ol = tf.nn.relu(tf.add(tf.matmul(hl3, weights_decoder_A['ol']), biases_decoder_A['ol']), name = 'ol') 133 | 134 | return ol 135 | # hl5 = tf.nn.relu(tf.add(tf.matmul(hl4, weights['hl5']), biases['hl5']), name = 'hl5') 136 | # ol = tf.nn.relu(tf.add(tf.matmul(hl1, weights['ol']), biases['ol']), name = 'ol') 137 | 138 | def decoder_B(x): # DecoderB. It generates subject faces. Someone make them de-convolution layers. 139 | 140 | global weights_decoder_B 141 | global biases_decoder_B 142 | 143 | hl1 = tf.nn.relu(tf.add(tf.matmul(x, weights_decoder_B['hl1']), biases_decoder_B['hl1']), name = 'hl1') 144 | hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights_decoder_B['hl2']), biases_decoder_B['hl2']), name = 'hl2') 145 | hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights_decoder_B['hl3']), biases_decoder_B['hl3']), name = 'hl3') 146 | ol = tf.nn.relu(tf.add(tf.matmul(hl3, weights_decoder_B['ol']), biases_decoder_B['ol']), name = 'ol') 147 | 148 | return ol 149 | 150 | 151 | lossA = tf.reduce_mean((decoder_A(encoder(data_ph)) - output_ph)**2, name = 'loss') 152 | lossB = tf.reduce_mean((decoder_B(encoder(data_ph)) - output_ph)**2, name = 'loss') 153 | trainA = tf.train.AdamOptimizer(learning_rate).minimize(lossA) 154 | trainB = tf.train.AdamOptimizer(learning_rate).minimize(lossB) 155 | 156 | sess = tf.Session() 157 | sess.run(tf.global_variables_initializer()) 158 | saver = tf.train.Saver() 159 | 160 | ########################### UN-COMMENT THIS TO RESUME FROM SAVED MODEL. 161 | # saver = tf.train.import_meta_graph("E:/workspace_py/saved_models/face_swap/autoencoder/autoencoder-1.ckpt.meta") 162 | # saver.restore(sess, tf.train.latest_checkpoint('E:/workspace_py/saved_models/face_swap/autoencoder/')) 163 | ########################### UN-COMMENT THIS TO RESUME FROM SAVED MODEL. 164 | 165 | errA = 999999 # infinity 166 | errB = 999999 167 | for epoch in range(n_epochs): 168 | ptr = 0 169 | for iteration in range(int(n_examples/batch_size)): 170 | epoch_x = data[ptr : ptr + batch_size] 171 | epoch_y = parmu_data[ptr : ptr + batch_size] 172 | ptr += batch_size 173 | # if errA < 9250: 174 | # _, errA = sess.run([trainA, lossA], feed_dict={data_ph: epoch_x, output_ph: epoch_x, learning_rate: 0.001}) 175 | # else: 176 | _, errA = sess.run([trainA, lossA], feed_dict={data_ph: epoch_x, output_ph: epoch_x, learning_rate: 0.005}) 177 | # if errB < 1687: 178 | # _, errB = sess.run([trainB, lossB], feed_dict={data_ph: epoch_x, output_ph: epoch_y, learning_rate: 0.01}) 179 | # else: 180 | _, errB = sess.run([trainB, lossB], feed_dict={data_ph: epoch_x, output_ph: epoch_y, learning_rate: 0.001}) 181 | 182 | print("Loss @ epoch ", str(epoch), " = ", errA, " and ", errB) 183 | if (epoch + 1) % 50 == 0: 184 | save_path = saver.save(sess, "E:/workspace_py/saved_models/face_swap/autoencoder/autoencoder-1.ckpt") 185 | 186 | prediction = sess.run(decoder_A(encoder(data_ph)), feed_dict={data_ph: [parmu_data[0]]}) 187 | print("prediction: ", prediction) 188 | 189 | import matplotlib.pyplot as plt 190 | plt.subplot(1, 2, 1) 191 | plt.imshow(np.reshape(parmu_data[0], [112, 112])) 192 | plt.subplot(1,2, 2) 193 | plt.imshow(np.reshape(prediction, [112, 112])) 194 | plt.show() 195 | 196 | import cv2 as c 197 | 198 | face_cascade = c.CascadeClassifier('E:\workspace_py\OpenCV Cascades\haarcascades\haarcascades\haarcascade_frontalface_default.xml') 199 | eye_cascade = c.CascadeClassifier('E:\workspace_py\OpenCV Cascades\haarcascades\haarcascades\haarcascade_eye.xml') 200 | 201 | 202 | cap = c.VideoCapture(0) 203 | out = c.VideoWriter('output.avi',-1, 5.0, (640,480)) 204 | 205 | while True: 206 | #Capture frame-by-frame 207 | ret, frame = cap.read() # ret = frame exist? 208 | 209 | #Our operations on the frame come here 210 | gray = c.cvtColor(frame, c.COLOR_BGR2GRAY) 211 | 212 | faces = face_cascade.detectMultiScale(gray, 1.3, 5) 213 | # face_cascade.load('haarcascade_frontalface_default.xml') 214 | for (x,y,w,h) in faces: 215 | img = gray[y:y+h, x:x+w] 216 | face_size = (h, w) 217 | img = np.resize(img, [112*112]) 218 | 219 | prediction = np.reshape(sess.run(decoder_A(encoder(data_ph)), feed_dict={data_ph: [img]}), [112, 112]) 220 | 221 | prediction = c.resize(prediction, (face_size[0], face_size[1])) 222 | gray[y:y+h, x:x+w] = prediction 223 | 224 | 225 | 226 | #desplay the resulting frame 227 | c.imshow('frame', gray) 228 | out.write(gray) 229 | if c.waitKey(1) == ord('q'): 230 | break 231 | 232 | cap.release() 233 | c.destroyAllWindows() 234 | out.release() 235 | sess.close() 236 | --------------------------------------------------------------------------------