├── _config.yml
├── output_compressed.avi
├── README.md
└── autoencoder.py


/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/output_compressed.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ParmuSingh/face_swap/HEAD/output_compressed.avi


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # face_swap
 2 | face swap for siraj coding challange.
 3 | 
 4 | I'm sorry I have put everything in one file. To test, just put n_epochs = 0.
 5 | 
 6 | Please see output video. I cannot provide the saved model or the data cause I cannot disclose Siraj's face images.
 7 | 
 8 | - Someone please add de-convolution layers.
 9 | - It needs more training and a way more diverse dataset. I'll be trying to upgrade it. For now, I've uploaded this for Siraj Raval's coding challange.
10 | - It seems like most outputs are the same. I don't know why this is happening. It probably just needs a diverse dataset.
11 | 


--------------------------------------------------------------------------------
/autoencoder.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | epochs completed: 150 + 50 + 150 + 50 + 100
  3 | 
  4 | Please use your path where necessary.
  5 | '''
  6 | 
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | from PIL import Image
 10 | 
 11 | base_path1 = "E:/workspace_py/datasets/Siraj images/processed/"
 12 | base_path2 = "E:/workspace_py/datasets/Parmu images/"
 13 | 
 14 | data = []
 15 | parmu_data = []
 16 | 
 17 | # Getting the data. Data_parmu stores my (subject) images and data stores output faces.
 18 | for i in range(503):
 19 | 	path = base_path1 + str(i) + '.png'
 20 | 	try:
 21 | 		img = Image.open(path)
 22 | 		img = np.asarray(img)
 23 | 		img = np.resize(img, [112*112])
 24 | 		data.append(img)
 25 | 	except:
 26 | 		print("err")
 27 | 		continue
 28 | 	path = base_path2 + "Parmu" + str(i) + '.png'
 29 | 
 30 | 	try:
 31 | 		img = Image.open(path)
 32 | 		img = np.asarray(img)
 33 | 		img = np.resize(img, [112*112])
 34 | 		parmu_data.append(img)
 35 | 	except:
 36 | 		continue
 37 | 
 38 | 	
 39 | # Model
 40 | 
 41 | n_epochs = 0
 42 | n_examples = 475
 43 | batch_size = 1
 44 | 
 45 | data_ph = tf.placeholder('float', [None, 112*112], name = 'data_ph')
 46 | output_ph = tf.placeholder('float', [None, 112*112], name = 'output_ph')
 47 | learning_rate = tf.placeholder('float', [], name = 'learning_rate_ph') # for variable lr.
 48 | 
 49 | def conv2d(x, W): # convolution layer.
 50 | 	return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
 51 | 
 52 | def maxpool2d(x): # max pooling layer.
 53 | 	return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
 54 | 
 55 | # weights and biases:
 56 | weights_encoder = {
 57 | 				'w_conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])), # [filter_h, filter_w, in_channels, n_filters]
 58 | 				'w_conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])), # [filter_h, filter_w, in_channels, n_filters]
 59 | 				'w_conv3': tf.Variable(tf.random_normal([5, 5, 64, 128])),
 60 | 				'w_fc': tf.Variable(tf.random_normal([25088, 500])),
 61 | 				'w_out': tf.Variable(tf.random_normal([500, 200]))
 62 | 				}
 63 | 
 64 | biases_encoder = {
 65 | 				'b_conv1': tf.Variable(tf.random_normal([32])),
 66 | 				'b_conv2': tf.Variable(tf.random_normal([64])),
 67 | 				'b_conv3': tf.Variable(tf.random_normal([128])),
 68 | 				'b_fc': tf.Variable(tf.random_normal([500])),
 69 | 				'b_out': tf.Variable(tf.random_normal([200]))
 70 | 				}
 71 | 
 72 | 
 73 | weights_decoder_A = {
 74 | 	'hl1': tf.Variable(tf.random_normal([200, 300])),
 75 | 	'hl2': tf.Variable(tf.random_normal([300, 500])),
 76 | 	'hl3': tf.Variable(tf.random_normal([500, 700])), # middle layer
 77 | 	'ol': tf.Variable(tf.random_normal([700, 112*112])),
 78 | 	# 'hl5': tf.Variable(tf.random_normal([250, 500])),
 79 | 	# 'ol': tf.Variable(tf.random_normal([500, 256*256]))
 80 | 	}
 81 | 
 82 | biases_decoder_A = {
 83 | 	'hl1': tf.Variable(tf.random_normal([300])),
 84 | 	'hl2': tf.Variable(tf.random_normal([500])),
 85 | 	'hl3': tf.Variable(tf.random_normal([700])),
 86 | 	'ol': tf.Variable(tf.random_normal([112*112])),
 87 | 	# 'hl5': tf.Variable(tf.random_normal([500])),
 88 | 	# 'ol': tf.Variable(tf.random_normal([256*256]))
 89 | 	}
 90 | 
 91 | weights_decoder_B = {
 92 | 		'hl1': tf.Variable(tf.random_normal([200, 300])),
 93 | 		'hl2': tf.Variable(tf.random_normal([300, 500])),
 94 | 		'hl3': tf.Variable(tf.random_normal([500, 700])), # middle layer
 95 | 		'ol': tf.Variable(tf.random_normal([700, 112*112])),
 96 | 		# 'hl5': tf.Variable(tf.random_normal([250, 500])),
 97 | 		# 'ol': tf.Variable(tf.random_normal([500, 256*256]))
 98 | 	}
 99 | 
100 | biases_decoder_B = {
101 | 		'hl1': tf.Variable(tf.random_normal([300])),
102 | 		'hl2': tf.Variable(tf.random_normal([500])),
103 | 		'hl3': tf.Variable(tf.random_normal([700])),
104 | 		'ol': tf.Variable(tf.random_normal([112*112])),
105 | 		# 'hl5': tf.Variable(tf.random_normal([500])),
106 | 		# 'ol': tf.Variable(tf.random_normal([256*256]))
107 | 	}
108 | 	
109 | def encoder(x): # encoder
110 | 	
111 | 	global weights_encoder
112 | 	global biases_encoder
113 | 	x = tf.reshape(x, [-1, 112, 112, 1])
114 | 
115 | 	conv1 = maxpool2d(conv2d(x, weights_encoder['w_conv1']))
116 | 	conv2 = maxpool2d(conv2d(conv1, weights_encoder['w_conv2']))
117 | 	conv3 = maxpool2d(conv2d(conv2, weights_encoder['w_conv3']))
118 | 	fc = tf.reshape(conv3, [-1, 25088])
119 | 	fc = tf.nn.relu(tf.add(tf.matmul(fc, weights_encoder['w_fc']), biases_encoder['b_fc']))
120 | 	ol = tf.nn.relu(tf.add(tf.matmul(fc, weights_encoder['w_out']), biases_encoder['b_out']))
121 | 
122 | 	return ol
123 | 
124 | def decoder_A(x): # DecoderA. It generates output faces. Someone make them de-convolution layers.
125 | 	
126 | 	global weights_decoder_A
127 | 	global weights_decoder_A
128 | 
129 | 	hl1 = tf.nn.relu(tf.add(tf.matmul(x, weights_decoder_A['hl1']), biases_decoder_B['hl1']), name = 'hl1')
130 | 	hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights_decoder_A['hl2']), biases_decoder_A['hl2']), name = 'hl2')
131 | 	hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights_decoder_A['hl3']), biases_decoder_A['hl3']), name = 'hl3')
132 | 	ol = tf.nn.relu(tf.add(tf.matmul(hl3, weights_decoder_A['ol']), biases_decoder_A['ol']), name = 'ol')
133 | 
134 | 	return ol
135 | 	# hl5 = tf.nn.relu(tf.add(tf.matmul(hl4, weights['hl5']), biases['hl5']), name = 'hl5')
136 | 	# ol = tf.nn.relu(tf.add(tf.matmul(hl1, weights['ol']), biases['ol']), name = 'ol')
137 | 
138 | def decoder_B(x): # DecoderB. It generates subject faces. Someone make them de-convolution layers.
139 | 	
140 | 	global weights_decoder_B
141 | 	global biases_decoder_B
142 | 
143 | 	hl1 = tf.nn.relu(tf.add(tf.matmul(x, weights_decoder_B['hl1']), biases_decoder_B['hl1']), name = 'hl1')
144 | 	hl2 = tf.nn.relu(tf.add(tf.matmul(hl1, weights_decoder_B['hl2']), biases_decoder_B['hl2']), name = 'hl2')
145 | 	hl3 = tf.nn.relu(tf.add(tf.matmul(hl2, weights_decoder_B['hl3']), biases_decoder_B['hl3']), name = 'hl3')
146 | 	ol = tf.nn.relu(tf.add(tf.matmul(hl3, weights_decoder_B['ol']), biases_decoder_B['ol']), name = 'ol')
147 | 
148 | 	return ol
149 | 
150 | 
151 | lossA = tf.reduce_mean((decoder_A(encoder(data_ph)) - output_ph)**2, name = 'loss')
152 | lossB = tf.reduce_mean((decoder_B(encoder(data_ph)) - output_ph)**2, name = 'loss')
153 | trainA = tf.train.AdamOptimizer(learning_rate).minimize(lossA)
154 | trainB = tf.train.AdamOptimizer(learning_rate).minimize(lossB)
155 | 
156 | sess = tf.Session()
157 | sess.run(tf.global_variables_initializer())
158 | saver = tf.train.Saver()
159 | 
160 | ########################### UN-COMMENT THIS TO RESUME FROM SAVED MODEL.
161 | # saver = tf.train.import_meta_graph("E:/workspace_py/saved_models/face_swap/autoencoder/autoencoder-1.ckpt.meta")
162 | # saver.restore(sess, tf.train.latest_checkpoint('E:/workspace_py/saved_models/face_swap/autoencoder/'))
163 | ########################### UN-COMMENT THIS TO RESUME FROM SAVED MODEL.
164 | 
165 | errA = 999999 # infinity
166 | errB = 999999
167 | for epoch in range(n_epochs):
168 | 	ptr = 0
169 | 	for iteration in range(int(n_examples/batch_size)):
170 | 		epoch_x = data[ptr : ptr + batch_size]
171 | 		epoch_y = parmu_data[ptr : ptr + batch_size]
172 | 		ptr += batch_size
173 | 		# if errA < 9250:
174 | 		# 	_, errA = sess.run([trainA, lossA], feed_dict={data_ph: epoch_x, output_ph: epoch_x, learning_rate: 0.001})
175 | 		# else:
176 | 		_, errA = sess.run([trainA, lossA], feed_dict={data_ph: epoch_x, output_ph: epoch_x, learning_rate: 0.005})
177 | 		# if errB < 1687:
178 | 		# 	_, errB = sess.run([trainB, lossB], feed_dict={data_ph: epoch_x, output_ph: epoch_y, learning_rate: 0.01})
179 | 		# else:
180 | 		_, errB = sess.run([trainB, lossB], feed_dict={data_ph: epoch_x, output_ph: epoch_y, learning_rate: 0.001})
181 | 
182 | 	print("Loss @ epoch ", str(epoch), " = ", errA, " and ", errB)
183 | 	if (epoch + 1) % 50 == 0:
184 | 		save_path = saver.save(sess, "E:/workspace_py/saved_models/face_swap/autoencoder/autoencoder-1.ckpt")
185 | 
186 | prediction = sess.run(decoder_A(encoder(data_ph)), feed_dict={data_ph: [parmu_data[0]]})
187 | print("prediction: ", prediction)
188 | 
189 | import matplotlib.pyplot as plt
190 | plt.subplot(1, 2, 1)
191 | plt.imshow(np.reshape(parmu_data[0], [112, 112]))
192 | plt.subplot(1,2, 2)
193 | plt.imshow(np.reshape(prediction, [112, 112]))
194 | plt.show()
195 | 
196 | import cv2 as c
197 | 
198 | face_cascade = c.CascadeClassifier('E:\workspace_py\OpenCV Cascades\haarcascades\haarcascades\haarcascade_frontalface_default.xml')
199 | eye_cascade = c.CascadeClassifier('E:\workspace_py\OpenCV Cascades\haarcascades\haarcascades\haarcascade_eye.xml')
200 | 
201 | 
202 | cap = c.VideoCapture(0)
203 | out = c.VideoWriter('output.avi',-1, 5.0, (640,480))
204 | 
205 | while True:
206 | 	#Capture frame-by-frame
207 | 	ret, frame = cap.read() # ret = frame exist?
208 | 
209 | 	#Our operations on the frame come here
210 | 	gray = c.cvtColor(frame, c.COLOR_BGR2GRAY)
211 | 
212 | 	faces = face_cascade.detectMultiScale(gray, 1.3, 5)
213 | 	# face_cascade.load('haarcascade_frontalface_default.xml')
214 | 	for (x,y,w,h) in faces:
215 | 		img = gray[y:y+h, x:x+w]
216 | 		face_size = (h, w)
217 | 		img = np.resize(img, [112*112])
218 | 
219 | 		prediction = np.reshape(sess.run(decoder_A(encoder(data_ph)), feed_dict={data_ph: [img]}), [112, 112])
220 | 
221 | 		prediction = c.resize(prediction, (face_size[0], face_size[1]))
222 | 		gray[y:y+h, x:x+w] = prediction
223 | 
224 | 
225 | 
226 | 	#desplay the resulting frame
227 | 	c.imshow('frame', gray)
228 | 	out.write(gray)
229 | 	if c.waitKey(1)  == ord('q'):
230 | 		break
231 | 
232 | cap.release()
233 | c.destroyAllWindows()
234 | out.release()
235 | sess.close()
236 | 


--------------------------------------------------------------------------------