├── Samples
    ├── dcgan
    │   └── 497.png
    ├── vae
    │   └── 499_s.png
    ├── wgan
    │   └── 260.png
    ├── began
    │   └── 228_r.png
    ├── ebgan
    │   └── 109_r.png
    └── began_n
    │   └── 369_r.png
├── vae.py
├── dcgan.py
├── wgan.py
├── ebgan.py
├── utils
    ├── datas.py
    └── nets.py
├── began.py
└── README.md


/Samples/dcgan/497.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/dcgan/497.png


--------------------------------------------------------------------------------
/Samples/vae/499_s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/vae/499_s.png


--------------------------------------------------------------------------------
/Samples/wgan/260.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/wgan/260.png


--------------------------------------------------------------------------------
/Samples/began/228_r.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/began/228_r.png


--------------------------------------------------------------------------------
/Samples/ebgan/109_r.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/ebgan/109_r.png


--------------------------------------------------------------------------------
/Samples/began_n/369_r.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/began_n/369_r.png


--------------------------------------------------------------------------------
/vae.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.examples.tutorials.mnist import input_data
  3 | import numpy as np
  4 | import matplotlib as mpl
  5 | mpl.use('Agg')
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.gridspec as gridspec
  8 | import os,sys
  9 | 
 10 | sys.path.append('utils')
 11 | from nets import *
 12 | from datas import *
 13 | 
 14 | def sample_z(m, n):
 15 | 	return np.random.uniform(0, 1., size=[m, n])
 16 | 
 17 | class VAE():
 18 | 	def __init__(self, generator, discriminator, data):
 19 | 		self.generator = generator
 20 | 		self.discriminator = discriminator
 21 | 		self.data = data
 22 | 
 23 | 		# data
 24 | 		self.z_dim = self.data.z_dim
 25 | 		self.size = self.data.size
 26 | 		self.channel = self.data.channel
 27 | 
 28 | 		self.X = tf.placeholder(tf.float32, shape=[None, self.size, self.size, self.channel])
 29 | 		self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim])
 30 | 
 31 | 		# nets
 32 | 		mu, sigma = self.discriminator(self.X) 
 33 | 		latent_code = mu + tf.exp(sigma/2)*self.z
 34 | 		
 35 | 		self.G_real = self.generator(latent_code)
 36 | 		self.G_sample = self.generator(self.z)
 37 | 		
 38 | 		# loss
 39 | 		# E[log P(X|z)]
 40 | 		epsilon = 1e-8
 41 | 		self.recon = tf.reduce_sum(-self.X * tf.log(self.G_real + epsilon) -(1.0 - self.X) * tf.log(1.0 - self.G_real + epsilon))
 42 | 		
 43 | 		# D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian
 44 | 		self.kl = 0.5 * tf.reduce_sum(tf.exp(sigma) + tf.square(mu) - 1. - sigma)
 45 | 
 46 | 		self.loss = self.recon + self.kl
 47 | 
 48 | 		# solver
 49 | 		self.learning_rate = tf.placeholder(tf.float32, shape=[])
 50 | 		self.solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss, var_list=self.generator.vars + self.discriminator.vars)
 51 | 		
 52 | 		self.saver = tf.train.Saver()
 53 | 		gpu_options = tf.GPUOptions(allow_growth=True)
 54 | 		self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 55 | 		self.model_name = 'Models/vae_cifar.ckpt'
 56 | 
 57 | 	def train(self, sample_dir, training_epoches = 500000, batch_size = 32):
 58 | 		fig_count = 0
 59 | 		self.sess.run(tf.global_variables_initializer())
 60 | 		#self.saver.restore(self.sess, self.model_name)		
 61 | 
 62 | 		learning_rate_initial = 1e-4
 63 | 		for epoch in range(training_epoches):
 64 | 			learning_rate = learning_rate_initial * pow(0.5, epoch // 50000)
 65 | 			X_b = self.data(batch_size)
 66 | 			self.sess.run(
 67 | 				self.solver,
 68 | 				feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate}
 69 | 				)
 70 | 			# save img, model. print loss
 71 | 			if epoch % 100 == 0 or epoch < 100:
 72 | 				loss_curr = self.sess.run(
 73 | 						self.loss,
 74 |             			feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim)})
 75 | 				print('Iter: {}; loss: {:.4}'.format(epoch, loss_curr))
 76 | 
 77 | 				if epoch % 1000 == 0:
 78 | 					real, samples = self.sess.run([self.G_real, self.G_sample], feed_dict={self.X: X_b[:16,:,:,:], self.z: sample_z(16, self.z_dim)})
 79 | 
 80 | 					fig = self.data.data2fig(real)
 81 | 					plt.savefig('{}/{}.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight')
 82 | 					plt.close(fig)
 83 | 
 84 | 					fig = self.data.data2fig(samples)
 85 | 					plt.savefig('{}/{}_s.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight')
 86 | 					plt.close(fig)
 87 | 
 88 | 					fig_count += 1
 89 | 
 90 | 				if epoch % 5000 == 0:
 91 | 					self.saver.save(self.sess, self.model_name)
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 | 
 96 | 	# constraint GPU
 97 | 	os.environ['CUDA_VISIBLE_DEVICES'] = '2'
 98 | 
 99 | 	# save generated images
100 | 	sample_dir = 'Samples/vae'
101 | 	if not os.path.exists(sample_dir):
102 | 		os.makedirs(sample_dir)
103 | 
104 | 	# param
105 | 	generator = G_conv()
106 | 	discriminator = D_vae()
107 | 
108 | 	data = celebA()
109 | 
110 | 	# run
111 | 	vae = VAE(generator, discriminator, data)
112 | 	vae.train(sample_dir)
113 | 
114 | 


--------------------------------------------------------------------------------
/dcgan.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.examples.tutorials.mnist import input_data
  3 | import numpy as np
  4 | import matplotlib as mpl
  5 | mpl.use('Agg')
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.gridspec as gridspec
  8 | import os,sys
  9 | 
 10 | sys.path.append('utils')
 11 | from nets import *
 12 | from datas import *
 13 | 
 14 | def sample_z(m, n):
 15 | 	return np.random.uniform(-1., 1., size=[m, n])
 16 | 
 17 | class DCGAN():
 18 | 	def __init__(self, generator, discriminator, data):
 19 | 		self.generator = generator
 20 | 		self.discriminator = discriminator
 21 | 		self.data = data
 22 | 
 23 | 		# data
 24 | 		self.z_dim = self.data.z_dim
 25 | 		self.size = self.data.size
 26 | 		self.channel = self.data.channel
 27 | 
 28 | 		self.X = tf.placeholder(tf.float32, shape=[None, self.size, self.size, self.channel])
 29 | 		self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim])
 30 | 
 31 | 		# nets
 32 | 		self.G_sample = self.generator(self.z)
 33 | 
 34 | 		self.D_real = self.discriminator(self.X)
 35 | 		self.D_fake = self.discriminator(self.G_sample, reuse = True)
 36 | 		
 37 | 		# loss
 38 | 		self.D_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_real, labels=tf.ones_like(self.D_real))) + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_fake, labels=tf.zeros_like(self.D_fake)))
 39 | 		self.G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_fake, labels=tf.ones_like(self.D_fake)))
 40 | 
 41 | 		# solver
 42 | 		self.learning_rate = tf.placeholder(tf.float32, shape=[])
 43 | 		self.D_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.D_loss, var_list=self.discriminator.vars)
 44 | 		self.G_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.G_loss, var_list=self.generator.vars)
 45 | 		
 46 | 		self.saver = tf.train.Saver()
 47 | 		gpu_options = tf.GPUOptions(allow_growth=True)
 48 | 		self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 49 | 		self.model_name = 'Models/dcgan.ckpt'
 50 | 
 51 | 	def train(self, sample_dir, training_epoches = 500000, batch_size = 32):
 52 | 		fig_count = 0
 53 | 		self.sess.run(tf.global_variables_initializer())
 54 | 		#self.saver.restore(self.sess, self.model_name)		
 55 | 		learning_rate_initial = 1e-4
 56 | 		for epoch in range(training_epoches):
 57 | 			learning_rate = learning_rate_initial * pow(0.5, epoch // 50000)
 58 | 			# update D
 59 | 			X_b = self.data(batch_size)
 60 | 			self.sess.run(
 61 | 				self.D_solver,
 62 | 				feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate}
 63 | 				)
 64 | 			# update G
 65 | 			for _ in range(1):
 66 | 				self.sess.run(
 67 | 					self.G_solver,
 68 | 					feed_dict={self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate}
 69 | 				)
 70 | 
 71 | 			# save img, model. print loss
 72 | 			if epoch % 100 == 0 or epoch < 100:
 73 | 				D_loss_curr, G_loss_curr = self.sess.run(
 74 | 						[self.D_loss, self.G_loss],
 75 |             			feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim)})
 76 | 				print('Iter: {}; D loss: {:.4}; G_loss: {:.4}'.format(epoch, D_loss_curr, G_loss_curr))
 77 | 
 78 | 				if epoch % 1000 == 0:
 79 | 					samples = self.sess.run(self.G_sample, feed_dict={self.z: sample_z(16, self.z_dim)})
 80 | 
 81 | 					fig = self.data.data2fig(samples)
 82 | 					plt.savefig('{}/{}.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight')
 83 | 					fig_count += 1
 84 | 					plt.close(fig)
 85 | 
 86 | 				if epoch % 5000 == 0:
 87 | 					self.saver.save(self.sess, self.model_name)
 88 | 
 89 | 
 90 | if __name__ == '__main__':
 91 | 
 92 | 	# constraint GPU
 93 | 	os.environ['CUDA_VISIBLE_DEVICES'] = '2'
 94 | 
 95 | 	# save generated images
 96 | 	sample_dir = 'Samples/dcgan'
 97 | 	if not os.path.exists(sample_dir):
 98 | 		os.makedirs(sample_dir)
 99 | 
100 | 	# param
101 | 	generator = G_conv()
102 | 	discriminator = D_conv()
103 | 
104 | 	data = celebA()
105 | 
106 | 	# run
107 | 	dcgan = DCGAN(generator, discriminator, data)
108 | 	dcgan.train(sample_dir)
109 | 
110 | 


--------------------------------------------------------------------------------
/wgan.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.examples.tutorials.mnist import input_data
  3 | import numpy as np
  4 | import matplotlib as mpl
  5 | mpl.use('Agg')
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.gridspec as gridspec
  8 | import os,sys
  9 | 
 10 | sys.path.append('utils')
 11 | from nets import *
 12 | from datas import *
 13 | 
 14 | def sample_z(m, n):
 15 | 	return np.random.uniform(-1., 1., size=[m, n])
 16 | 
 17 | class WGAN():
 18 | 	def __init__(self, generator, discriminator, data):
 19 | 		self.generator = generator
 20 | 		self.discriminator = discriminator
 21 | 		self.data = data
 22 | 
 23 | 		# data
 24 | 		self.z_dim = self.data.z_dim
 25 | 		self.size = self.data.size
 26 | 		self.channel = self.data.channel
 27 | 
 28 | 		self.X = tf.placeholder(tf.float32, shape=[None, self.size, self.size, self.channel])
 29 | 		self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim])
 30 | 
 31 | 		# nets
 32 | 		self.G_sample = self.generator(self.z)
 33 | 
 34 | 		self.D_real = self.discriminator(self.X)
 35 | 		self.D_fake = self.discriminator(self.G_sample, reuse = True)
 36 | 		
 37 | 		# loss
 38 | 		self.D_loss = - tf.reduce_mean(self.D_real) + tf.reduce_mean(self.D_fake)
 39 | 		self.G_loss = - tf.reduce_mean(self.D_fake)
 40 | 
 41 | 		# clip
 42 | 		self.clip_D = [var.assign(tf.clip_by_value(var, -0.01, 0.01)) for var in self.discriminator.vars]		
 43 | 
 44 | 		# solver
 45 | 		self.learning_rate = tf.placeholder(tf.float32, shape=[])
 46 | 		self.D_solver = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.D_loss, var_list=self.discriminator.vars)
 47 | 		self.G_solver = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.G_loss, var_list=self.generator.vars)
 48 | 
 49 | 		gpu_options = tf.GPUOptions(allow_growth=True)
 50 | 		self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 51 | 		self.saver = tf.train.Saver()
 52 | 		self.model_name = 'Models/wgan.ckpt'
 53 | 
 54 | 	def train(self, sample_dir, training_epoches = 500000, batch_size = 32):
 55 | 		fig_count = 0
 56 | 		self.sess.run(tf.global_variables_initializer())
 57 | 		#self.saver.restore(self.sess, self.model_name)		
 58 | 
 59 | 		learning_rate_initial = 1e-4
 60 | 		for epoch in range(training_epoches):
 61 | 			learning_rate =  learning_rate_initial * pow(0.5, epoch // 50000)
 62 | 			# update D
 63 | 			n_d = 100 if epoch < 25 or (epoch+1) % 500 == 0 else 5
 64 | 			for _ in range(n_d):
 65 | 				X_b = self.data(batch_size)
 66 | 				self.sess.run(
 67 | 					[self.clip_D,self.D_solver],
 68 | 					feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate}
 69 | 					)
 70 | 			# update G
 71 | 			for _ in range(1):
 72 | 				self.sess.run(
 73 | 					self.G_solver,
 74 | 					feed_dict={self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate}
 75 | 				)
 76 | 
 77 | 			# save img, model. print loss
 78 | 			if epoch % 100 == 0 or epoch < 100:
 79 | 				D_loss_curr, G_loss_curr = self.sess.run(
 80 | 						[self.D_loss, self.G_loss],
 81 |             			feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim)})
 82 | 				print('Iter: {}; D loss: {:.4}; G_loss: {:.4}'.format(epoch, D_loss_curr, G_loss_curr))
 83 | 
 84 | 				if epoch % 1000 == 0:
 85 | 					samples = self.sess.run(self.G_sample, feed_dict={self.z: sample_z(16, self.z_dim)})
 86 | 
 87 | 					fig = self.data.data2fig(samples)
 88 | 					plt.savefig('{}/{}.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight')
 89 | 					fig_count += 1
 90 | 					plt.close(fig)
 91 | 
 92 | 				if epoch % 5000 == 0:
 93 | 					self.saver.save(self.sess, self.model_name)
 94 | 
 95 | 
 96 | if __name__ == '__main__':
 97 | 
 98 | 	# constraint GPU
 99 | 	os.environ['CUDA_VISIBLE_DEVICES'] = '1'
100 | 
101 | 	# save generated images
102 | 	sample_dir = 'Samples/wgan'
103 | 	if not os.path.exists(sample_dir):
104 | 		os.makedirs(sample_dir)
105 | 
106 | 	# param
107 | 	generator = G_conv()
108 | 	discriminator = D_conv()
109 | 
110 | 	data = celebA()
111 | 
112 | 	# run
113 | 	wgan = WGAN(generator, discriminator, data)
114 | 	wgan.train(sample_dir)
115 | 
116 | 


--------------------------------------------------------------------------------
/ebgan.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.examples.tutorials.mnist import input_data
  3 | import numpy as np
  4 | import matplotlib as mpl
  5 | mpl.use('Agg')
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.gridspec as gridspec
  8 | import os,sys
  9 | 
 10 | sys.path.append('utils')
 11 | from nets import *
 12 | from datas import *
 13 | 
 14 | def sample_z(m, n):
 15 | 	return np.random.uniform(-1., 1., size=[m, n])
 16 | 
 17 | class EBGAN():
 18 | 	def __init__(self, generator, discriminator, data):
 19 | 		self.generator = generator
 20 | 		self.discriminator = discriminator
 21 | 		self.data = data
 22 | 
 23 | 		# data
 24 | 		self.z_dim = self.data.z_dim
 25 | 		self.size = self.data.size
 26 | 		self.channel = self.data.channel
 27 | 
 28 | 		self.X = tf.placeholder(tf.float32, shape=[None, self.size, self.size, self.channel])
 29 | 		self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim])
 30 | 
 31 | 		# ebgan parameters
 32 | 		margin = 50. #
 33 | 
 34 | 		# nets
 35 | 		self.G_sample = self.generator(self.z)
 36 | 
 37 | 		self.D_real = self.discriminator(self.X)
 38 | 		self.D_fake = self.discriminator(self.G_sample, reuse = True)
 39 | 		
 40 | 		# loss
 41 | 		#L_real = tf.reduce_mean((self.X - self.D_real)**2, [1,2,3])
 42 | 		#L_fake = tf.reduce_mean((self.G_sample - self.D_fake)**2, [1,2,3])
 43 | 		L_real = tf.nn.l2_loss(self.X - self.D_real)
 44 | 		L_fake = tf.nn.l2_loss(self.G_sample - self.D_fake)
 45 | 
 46 | 		self.D_loss = L_real + tf.maximum(0., margin - L_fake)
 47 | 		self.G_loss = L_fake
 48 | 		
 49 | 		# solver
 50 | 		self.learning_rate = tf.placeholder(tf.float32, shape=[])
 51 | 		self.D_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.D_loss, var_list=self.discriminator.vars)
 52 | 		self.G_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.G_loss, var_list=self.generator.vars)
 53 | 		
 54 | 		self.saver = tf.train.Saver()
 55 | 		gpu_options = tf.GPUOptions(allow_growth=True)
 56 | 		self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 57 | 		self.model_name = 'Models/ebgan.ckpt'
 58 | 
 59 | 	def train(self, sample_dir, training_epoches = 500000, batch_size = 32):
 60 | 		fig_count = 0
 61 | 		self.sess.run(tf.global_variables_initializer())
 62 | 		#self.saver.restore(self.sess, self.model_name)		
 63 | 
 64 | 		learning_rate_initial = 1e-4
 65 | 		for epoch in range(training_epoches):
 66 | 			learning_rate =  learning_rate_initial * pow(0.5, epoch // 50000)
 67 | 			# update D and G
 68 | 			X_b = self.data(batch_size)
 69 | 			self.sess.run(
 70 | 				[self.D_solver, self.G_solver],
 71 | 				feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate}
 72 | 				)
 73 | 			# save img, model. print loss
 74 | 			if epoch % 100 == 0 or epoch < 100:
 75 | 				D_loss_curr, G_loss_curr = self.sess.run(
 76 | 						[self.D_loss, self.G_loss],
 77 |             			feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim)})
 78 | 				print('Iter: {}; D loss: {:.4}; G_loss: {:.4};'.format(epoch, D_loss_curr, G_loss_curr))
 79 | 
 80 | 				if epoch % 1000 == 0:
 81 | 					X_s, real, samples = self.sess.run([self.X, self.D_real, self.G_sample], feed_dict={self.X: X_b[:16,:,:,:], self.z: sample_z(16, self.z_dim)})
 82 | 
 83 | 					fig = self.data.data2fig(X_s)
 84 | 					plt.savefig('{}/{}.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight')
 85 | 					plt.close(fig)
 86 | 
 87 | 					fig = self.data.data2fig(real)
 88 | 					plt.savefig('{}/{}_d.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight')
 89 | 					plt.close(fig)
 90 | 
 91 | 					fig = self.data.data2fig(samples)
 92 | 					plt.savefig('{}/{}_r.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight')
 93 | 					plt.close(fig)
 94 | 
 95 | 					fig_count += 1
 96 | 
 97 | 				if epoch % 5000 == 0:
 98 | 					self.saver.save(self.sess, self.model_name)
 99 | 
100 | if __name__ == '__main__':
101 | 
102 | 	# constraint GPU
103 | 	os.environ['CUDA_VISIBLE_DEVICES'] = '1'
104 | 
105 | 	# save generated images
106 | 	sample_dir = 'Samples/ebgan'
107 | 	if not os.path.exists(sample_dir):
108 | 		os.makedirs(sample_dir)
109 | 
110 | 	# param
111 | 	generator = G_conv()
112 | 	discriminator = D_autoencoder()
113 | 
114 | 	data = celebA()
115 | 
116 | 	# run
117 | 	ebgan = EBGAN(generator, discriminator, data)
118 | 	ebgan.train(sample_dir)
119 | 
120 | 


--------------------------------------------------------------------------------
/utils/datas.py:
--------------------------------------------------------------------------------
  1 | import os,sys
  2 | from PIL import Image
  3 | import scipy.misc
  4 | from glob import glob
  5 | import numpy as np
  6 | import matplotlib as mpl
  7 | mpl.use('Agg')
  8 | import matplotlib.pyplot as plt
  9 | import matplotlib.gridspec as gridspec
 10 | 
 11 | from tensorflow.examples.tutorials.mnist import input_data
 12 | 
 13 | prefix = './Datas/'
 14 | 
 15 | def get_img(img_path, is_crop=True, crop_h=256, resize_h=64):
 16 | 	img=scipy.misc.imread(img_path).astype(np.float)
 17 | 	resize_w = resize_h
 18 | 	if is_crop:
 19 | 		crop_w = crop_h
 20 | 		h, w = img.shape[:2]
 21 | 		j = int(round((h - crop_h)/2.))
 22 | 		i = int(round((w - crop_w)/2.))
 23 | 		cropped_image = scipy.misc.imresize(img[j:j+crop_h, i:i+crop_w],[resize_h, resize_w])
 24 | 	else:
 25 | 		cropped_image = scipy.misc.imresize(img,[resize_h, resize_w])
 26 | 	return np.array(cropped_image)/255.0
 27 | 
 28 | 
 29 | class celebA():
 30 | 	def __init__(self):
 31 | 		datapath = prefix + 'celebA'
 32 | 		self.z_dim = 100
 33 | 		self.size = 64
 34 | 		self.channel = 3
 35 | 		self.data = glob(os.path.join(datapath, '*.jpg'))
 36 | 
 37 | 		self.batch_count = 0
 38 | 
 39 | 	def __call__(self,batch_size):
 40 | 		batch_number = len(self.data)/batch_size
 41 | 		if self.batch_count < batch_number-2:
 42 | 			self.batch_count += 1
 43 | 		else:
 44 | 			self.batch_count = 0
 45 | 
 46 | 		path_list = self.data[self.batch_count*batch_size:(self.batch_count+1)*batch_size]
 47 | 
 48 | 		batch = [get_img(img_path, True, 128, self.size) for img_path in path_list]
 49 | 		batch_imgs = np.array(batch).astype(np.float32)
 50 | 		
 51 | 		return batch_imgs
 52 | 
 53 | 	def data2fig(self, samples):
 54 | 		fig = plt.figure(figsize=(4, 4))
 55 | 		gs = gridspec.GridSpec(4, 4)
 56 | 		gs.update(wspace=0.05, hspace=0.05)
 57 | 
 58 | 		for i, sample in enumerate(samples):
 59 | 			ax = plt.subplot(gs[i])
 60 | 			plt.axis('off')
 61 | 			ax.set_xticklabels([])
 62 | 			ax.set_yticklabels([])
 63 | 			ax.set_aspect('equal')
 64 | 			plt.imshow(sample)
 65 | 		return fig
 66 | 
 67 | class cifar():
 68 | 	def __init__(self):
 69 | 		datapath = prefix + 'cifar10'
 70 | 		self.z_dim = 100
 71 | 		self.size = 64
 72 | 		self.channel = 3
 73 | 		self.data = glob(os.path.join(datapath, '*'))
 74 | 
 75 | 		self.batch_count = 0
 76 | 
 77 | 	def __call__(self,batch_size):
 78 | 		batch_number = len(self.data)/batch_size
 79 | 		if self.batch_count < batch_number-2:
 80 | 			self.batch_count += 1
 81 | 		else:
 82 | 			self.batch_count = 0
 83 | 
 84 | 		path_list = self.data[self.batch_count*batch_size:(self.batch_count+1)*batch_size]
 85 | 
 86 | 		batch = [get_img(img_path, False, 128, self.size) for img_path in path_list]
 87 | 		batch_imgs = np.array(batch).astype(np.float32)
 88 | 	
 89 | 		return batch_imgs
 90 | 
 91 | 	def data2fig(self, samples):
 92 | 		fig = plt.figure(figsize=(4, 4))
 93 | 		gs = gridspec.GridSpec(4, 4)
 94 | 		gs.update(wspace=0.05, hspace=0.05)
 95 | 
 96 | 		for i, sample in enumerate(samples):
 97 | 			ax = plt.subplot(gs[i])
 98 | 			plt.axis('off')
 99 | 			ax.set_xticklabels([])
100 | 			ax.set_yticklabels([])
101 | 			ax.set_aspect('equal')
102 | 			plt.imshow(sample)
103 | 		return fig
104 | 
105 | 
106 | class mnist():
107 | 	def __init__(self):
108 | 		datapath = prefix + 'mnist'
109 | 		self.z_dim = 100
110 | 		self.size = 64
111 | 		self.channel = 1
112 | 		self.data = input_data.read_data_sets(datapath, one_hot=True)
113 | 
114 | 	def __call__(self,batch_size):
115 | 		batch_imgs = np.zeros([batch_size, self.size, self.size, self.channel])
116 | 
117 | 		batch_x,y = self.data.train.next_batch(batch_size)
118 | 		batch_x = np.reshape(batch_x, (batch_size, 28, 28, self.channel))
119 | 		for i in range(batch_size):
120 | 			img = batch_x[i,:,:,0]
121 | 			batch_imgs[i,:,:,0] = scipy.misc.imresize(img, [self.size, self.size])
122 | 		batch_imgs /= 255.
123 | 		return batch_imgs, y
124 | 
125 | 	def data2fig(self, samples):
126 | 		fig = plt.figure(figsize=(4, 4))
127 | 		gs = gridspec.GridSpec(4, 4)
128 | 		gs.update(wspace=0.05, hspace=0.05)
129 | 
130 | 		for i, sample in enumerate(samples):
131 | 			ax = plt.subplot(gs[i])
132 | 			plt.axis('off')
133 | 			ax.set_xticklabels([])
134 | 			ax.set_yticklabels([])
135 | 			ax.set_aspect('equal')
136 | 			plt.imshow(sample.reshape(self.size,self.size), cmap='Greys_r')
137 | 		return fig	
138 | 
139 | 
140 | if __name__ == '__main__':
141 | 	data = mnist()
142 | 	imgs,_ = data(20)
143 | 
144 | 	fig = mnist.data2fig(imgs[:16,:,:])
145 | 	plt.savefig('Samples/test.png', bbox_inches='tight')
146 | 	plt.close(fig)
147 | 


--------------------------------------------------------------------------------
/began.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.examples.tutorials.mnist import input_data
  3 | import numpy as np
  4 | import matplotlib as mpl
  5 | mpl.use('Agg')
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.gridspec as gridspec
  8 | import os,sys
  9 | 
 10 | sys.path.append('utils')
 11 | from nets import *
 12 | from datas import *
 13 | 
 14 | def sample_z(m, n):
 15 | 	return np.random.uniform(-1., 1., size=[m, n])
 16 | 
 17 | class BEGAN():
 18 | 	def __init__(self, generator, discriminator, data):
 19 | 		self.generator = generator
 20 | 		self.discriminator = discriminator
 21 | 		self.data = data
 22 | 
 23 | 		# data
 24 | 		self.z_dim = self.data.z_dim
 25 | 		self.size = self.data.size
 26 | 		self.channel = self.data.channel
 27 | 
 28 | 		self.X = tf.placeholder(tf.float32, shape=[None, self.size, self.size, self.channel])
 29 | 		self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim])
 30 | 
 31 | 		# began parameters
 32 | 		self.k_t =  tf.placeholder(tf.float32, shape=[]) # weighting parameter which constantly updates during training
 33 | 		gamma = 0.75  # diversity ratio, used to control model equibilibrium.
 34 | 		lambda_k = 0.001 # learning rate for k. Berthelot et al. use 0.001
 35 | 
 36 | 		# nets
 37 | 		self.G_sample = self.generator(self.z)
 38 | 
 39 | 		self.D_real = self.discriminator(self.X)
 40 | 		self.D_fake = self.discriminator(self.G_sample, reuse = True)
 41 | 		
 42 | 		# loss
 43 | 		L_real = tf.reduce_mean(tf.abs(self.X - self.D_real))
 44 | 		L_fake = tf.reduce_mean(tf.abs(self.G_sample - self.D_fake))
 45 | 
 46 | 		self.D_loss = L_real - self.k_t * L_fake
 47 | 		self.G_loss = L_fake
 48 | 		
 49 | 		self.k_tn = self.k_t + lambda_k * (gamma*L_real - L_fake)
 50 | 		self.M_global = L_real + tf.abs(gamma*L_real - L_fake)		
 51 | 	
 52 | 		# solver
 53 | 		self.learning_rate = tf.placeholder(tf.float32, shape=[])
 54 | 		self.D_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.D_loss, var_list=self.discriminator.vars)
 55 | 		self.G_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.G_loss, var_list=self.generator.vars)
 56 | 		
 57 | 		self.saver = tf.train.Saver()
 58 | 		gpu_options = tf.GPUOptions(allow_growth=True)
 59 | 		self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 60 | 		self.model_name = 'Models/began.ckpt'
 61 | 
 62 | 	def train(self, sample_dir, training_epoches = 500000, batch_size = 16):
 63 | 		fig_count = 0
 64 | 		self.sess.run(tf.global_variables_initializer())
 65 | 		#self.saver.restore(self.sess, self.model_name)		
 66 | 
 67 | 		k_tn = 0
 68 | 		learning_rate_initial = 1e-4
 69 | 		for epoch in range(training_epoches):
 70 | 			learning_rate =  learning_rate_initial * pow(0.5, epoch // 50000)
 71 | 			# update D and G
 72 | 			X_b = self.data(batch_size)
 73 | 			_, _, k_tn = self.sess.run(
 74 | 				[self.D_solver, self.G_solver, self.k_tn],
 75 | 				feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.k_t: min(max(k_tn, 0.), 1.), self.learning_rate: learning_rate}
 76 | 				)
 77 | 			# save img, model. print loss
 78 | 			if epoch % 100 == 0 or epoch < 100:
 79 | 				D_loss_curr, G_loss_curr, M_global_curr = self.sess.run(
 80 | 						[self.D_loss, self.G_loss, self.M_global],
 81 |             			feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.k_t: min(max(k_tn, 0.), 1.)})
 82 | 				print('Iter: {}; D loss: {:.4}; G_loss: {:.4}; M_global: {:.4}; k_t: {:.6}; learning_rate:{:.8}'.format(epoch, D_loss_curr, G_loss_curr, M_global_curr, min(max(k_tn, 0.), 1.), learning_rate))
 83 | 
 84 | 				if epoch % 1000 == 0:
 85 | 					X_s, real, samples = self.sess.run([self.X, self.D_real, self.G_sample], feed_dict={self.X: X_b[:16,:,:,:], self.z: sample_z(16, self.z_dim)})
 86 | 
 87 | 					fig = self.data.data2fig(X_s)
 88 | 					plt.savefig('{}/{}.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight')
 89 | 					plt.close(fig)
 90 | 
 91 | 					fig = self.data.data2fig(real)
 92 | 					plt.savefig('{}/{}_d.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight')
 93 | 					plt.close(fig)
 94 | 
 95 | 					fig = self.data.data2fig(samples)
 96 | 					plt.savefig('{}/{}_r.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight')
 97 | 					plt.close(fig)
 98 | 
 99 | 					fig_count += 1
100 | 
101 | 				if epoch % 5000 == 0:
102 | 					self.saver.save(self.sess, self.model_name)
103 | 
104 | if __name__ == '__main__':
105 | 
106 | 	# constraint GPU
107 | 	os.environ['CUDA_VISIBLE_DEVICES'] = '1'
108 | 
109 | 	# save generated images
110 | 	sample_dir = 'Samples/began'
111 | 	if not os.path.exists(sample_dir):
112 | 		os.makedirs(sample_dir)
113 | 
114 | 	# param
115 | 	generator = G_conv()
116 | 	discriminator = D_autoencoder()
117 | 
118 | 	data = cifar()
119 | 
120 | 	# run
121 | 	began = BEGAN(generator, discriminator, data)
122 | 	began.train(sample_dir)
123 | 
124 | 


--------------------------------------------------------------------------------
/utils/nets.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow.contrib as tc
  3 | import tensorflow.contrib.layers as tcl
  4 | 
  5 | def lrelu(x, leak=0.2, name="lrelu"):
  6 | 	with tf.variable_scope(name):
  7 | 		f1 = 0.5 * (1 + leak)
  8 | 		f2 = 0.5 * (1 - leak)
  9 | 		return f1 * x + f2 * abs(x)
 10 | 
 11 | class G_conv(object):
 12 | 	def __init__(self, channel=3, name='G_conv'):
 13 | 		self.name = name
 14 | 		self.size = 64/16
 15 | 		self.channel = channel
 16 | 
 17 | 	def __call__(self, z):
 18 | 		with tf.variable_scope(self.name) as scope:
 19 | 			g = tcl.fully_connected(z, self.size * self.size * 512, activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm)
 20 | 			g = tf.reshape(g, (-1, self.size, self.size, 512))  # size
 21 | 			g = tcl.conv2d_transpose(g, 256, 3, stride=2, # size*2
 22 | 									activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 23 | 			g = tcl.conv2d_transpose(g, 128, 3, stride=2, # size*4
 24 | 									activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 25 | 			g = tcl.conv2d_transpose(g, 64, 3, stride=2, # size*8 32x32x64
 26 | 									activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 27 | 			
 28 | 			g = tcl.conv2d_transpose(g, self.channel, 3, stride=2, # size*16 
 29 | 										activation_fn=tf.nn.sigmoid, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 30 | 			return g
 31 | 	@property
 32 | 	def vars(self):
 33 | 		return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
 34 | 
 35 | 
 36 | class D_conv(object):
 37 | 	def __init__(self, name='D_conv'):
 38 | 		self.name = name
 39 | 
 40 | 	def __call__(self, x, reuse=False):
 41 | 		with tf.variable_scope(self.name) as scope:
 42 | 			if reuse:
 43 | 				scope.reuse_variables()
 44 | 			size = 64
 45 | 			d = tcl.conv2d(x, num_outputs=size, kernel_size=3, # bzx64x64x3 -> bzx32x32x64
 46 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 47 | 			d = tcl.conv2d(d, num_outputs=size * 2, kernel_size=3, # 16x16x128
 48 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 49 | 			d = tcl.conv2d(d, num_outputs=size * 4, kernel_size=3, # 8x8x256
 50 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 51 | 			d = tcl.conv2d(d, num_outputs=size * 8, kernel_size=3, # 4x4x512
 52 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 53 | 
 54 | 			d = tcl.fully_connected(tcl.flatten(d), 256, activation_fn=lrelu, weights_initializer=tf.random_normal_initializer(0, 0.02))
 55 | 			d = tcl.fully_connected(d, 1, activation_fn=None, weights_initializer=tf.random_normal_initializer(0, 0.02))
 56 | 			
 57 | 			return d
 58 | 			
 59 | 	@property
 60 | 	def vars(self):
 61 | 		return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
 62 | 
 63 | # for ebgan and began
 64 | class D_autoencoder(object):
 65 | 	def __init__(self, n_hidden=256, name='D_autoencoder'):
 66 | 		self.name = name
 67 | 		self.n_hidden = n_hidden
 68 | 
 69 | 	def __call__(self, x, reuse=False):
 70 | 		with tf.variable_scope(self.name) as scope:
 71 | 			if reuse:
 72 | 				scope.reuse_variables()
 73 | 			# --- conv
 74 | 			size = 64
 75 | 			d = tcl.conv2d(x, num_outputs=size, kernel_size=3, # bzx64x64x3 -> bzx32x32x64
 76 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 77 | 			d = tcl.conv2d(d, num_outputs=size * 2, kernel_size=3, # 16x16x128
 78 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 79 | 			d = tcl.conv2d(d, num_outputs=size * 4, kernel_size=3, # 8x8x256
 80 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 81 | 			d = tcl.conv2d(d, num_outputs=size * 8, kernel_size=3, # 4x4x512
 82 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 83 | 	
 84 | 			h = tcl.fully_connected(tcl.flatten(d), self.n_hidden, activation_fn=lrelu, weights_initializer=tf.random_normal_initializer(0, 0.02))
 85 | 
 86 | 			# -- deconv
 87 | 			d = tcl.fully_connected(h, 4 * 4 * 512, activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm)
 88 | 			d = tf.reshape(d, (-1, 4, 4, 512))  # size
 89 | 			d = tcl.conv2d_transpose(d, 256, 3, stride=2, # size*2
 90 | 									activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 91 | 			d = tcl.conv2d_transpose(d, 128, 3, stride=2, # size*4
 92 | 									activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 93 | 			d = tcl.conv2d_transpose(d, 64, 3, stride=2, # size*8
 94 | 									activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 95 | 			
 96 | 			d = tcl.conv2d_transpose(d, 3, 3, stride=2, # size*16
 97 | 									activation_fn=tf.nn.sigmoid, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
 98 | 			return d
 99 | 
100 | 	@property
101 | 	def vars(self):
102 | 		return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
103 | 
104 | # for vae
105 | class D_vae(object):
106 | 	def __init__(self, name='D_vae'):
107 | 		self.name = name
108 | 
109 | 	def __call__(self, x, reuse=False):
110 | 		with tf.variable_scope(self.name) as scope:
111 | 			if reuse:
112 | 				scope.reuse_variables()
113 | 			size = 64
114 | 			d = tcl.conv2d(x, num_outputs=size, kernel_size=3, # bzx64x64x3 -> bzx32x32x64
115 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
116 | 			d = tcl.conv2d(d, num_outputs=size * 2, kernel_size=3, # 16x16x128
117 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
118 | 			d = tcl.conv2d(d, num_outputs=size * 4, kernel_size=3, # 8x8x256
119 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
120 | 			d = tcl.conv2d(d, num_outputs=size * 8, kernel_size=3, # 4x4x512
121 | 						stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02))
122 | 
123 | 			d = tcl.fully_connected(tcl.flatten(d), 256, activation_fn=lrelu, weights_initializer=tf.random_normal_initializer(0, 0.02))
124 | 			mu = tcl.fully_connected(d, 100, activation_fn=None, weights_initializer=tf.random_normal_initializer(0, 0.02))
125 | 			sigma = tcl.fully_connected(d, 100, activation_fn=None, weights_initializer=tf.random_normal_initializer(0, 0.02))
126 | 			
127 | 			return mu, sigma
128 | 			
129 | 	@property
130 | 	def vars(self):
131 | 		return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
132 | 
133 | 
134 | 
135 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | All have been tested with python2.7+ and tensorflow1.0+ in linux.  
  2 | 
  3 | * Samples: save generated data, each folder contains a figure to show the results.  
  4 | * utils: contains 2 files  
  5 | 	* data.py: prepreocessing data.  
  6 | 	* nets.py: Generator and Discriminator are saved here.   
  7 | 
  8 | 
  9 | For research purpose,   
 10 | **Network architecture**: all GANs used the same network architecture(the Discriminator of EBGAN and BEGAN are the combination of traditional D and G)  
 11 | **Learning rate**: all initialized by 1e-4 and decayed by a factor of 2 each 5000 epoches (Maybe it is unfair for some GANs, but the influences are small, so I ignored)  
 12 | **Dataset**: celebA cropped with 128 and resized to 64, users should copy all celebA images to `./Datas/celebA` for training  
 13 | 
 14 | - [x] DCGAN  
 15 | - [x] EBGAN  
 16 | - [x] WGAN  
 17 | - [x] BEGAN  
 18 | And for comparsion, I added VAE here.  
 19 | - [x] VAE  
 20 | 
 21 | The generated results are shown in the end of this page.   
 22 | 
 23 |  ***************
 24 |  
 25 |  
 26 | # Theories
 27 |  
 28 | :sparkles:DCGAN  
 29 | --------
 30 | **Main idea: Techniques(of architecture) to stabilize GAN**  
 31 | [Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks](https://arxiv.org/pdf/1511.06434.pdf)[2015]  
 32 | 
 33 | **Loss Function** (the same as Vanilla GAN)   
 34 | 
 35 | ![DCGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/GAN_loss.png)    
 36 | 
 37 | 
 38 | **Architecture guidelines for stable Deep Convolutional GANs**  
 39 | 
 40 | * Replace any pooling layers with strided convolutions (discriminator) and fractional-strided convolutions (generator).  
 41 | * Use batchnorm in both the generator and the discriminator  
 42 | * Remove fully connected hidden layers for deeper architectures. Just use average pooling at the end.  
 43 | * Use ReLU activation in generator for all layers except for the output, which uses Tanh.  
 44 | * Use LeakyReLU activation in the discriminator for all layers.  
 45 | 
 46 |  ***************
 47 | 
 48 | :sparkles:EBGAN  
 49 | --------
 50 | **Main idea: Views the discriminator as an energy function**
 51 | [Energy-based Generative Adversarial Network](https://arxiv.org/pdf/1609.03126.pdf)[2016]   
 52 | (Here introduce EBGAN just for BEGAN, they use the same network structure)  
 53 | 
 54 | What is energy function?  
 55 | ![EBGAN_structure](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/Energy_based_model.png)   
 56 | The figure is from [LeCun, Yann, et al. "A tutorial on energy-based learning." ](http://yann.lecun.com/exdb/publis/pdf/lecun-06.pdf)  
 57 | 
 58 | In EBGAN, we want the Discriminator to distinguish the real images and the generated(fake) images. How? A simple idea is to set X as the real image and Y as the reconstructed image, and then minimize the energy of X and Y. So we need a auto-encoder to get Y from X, and a measure to calcuate the energy (here are MSE, so simple).   
 59 | Finally we get the structure of Discriminator as shown below.  
 60 | 
 61 | ![EBGAN_structure](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/EBGAN_structure.png)    
 62 | 
 63 | So the task of D is to minimize the MSE of real image and the corresponding reconstructed image, and maximize the MSE of fake image from the G and the corresponding reconstructed fake image. And G is to do the adversarial task: minimize the MSE of fake images...   
 64 | Then obviously the loss function can be written as:  
 65 | ![EBGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/EBGAN_loss1.png)  
 66 | 
 67 | And for comparison with BEGAN, we can set the D only as the auto-encoder and L(*) for the MSE loss.
 68 | **Loss Function**
 69 | ![EBGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/EBGAN_loss2.png)  
 70 | 
 71 | m is a positive margin here, when L(G(z)) is close to zero, the L_D is L(x) + m, which means to train D more heavily, and on the contrary, when L(G(z))>m, the L_D is L(x), which means the the D loosens the judgement of the fake images.  
 72 | 
 73 | 
 74 |  Finally, there is a quetion for EBGAN, why use auto-encoder in D instead of the traditonal one? What are the benifits?  
 75 |  I have not read the paper carefully, but one reason I think is that (said in the paper) auto-encoders have the ability to learn an energy manifold without supervision or negative examples.  So, rather than simply judge the real or fake of images, the new D can catch the primary distribution of data then distinguish them. And the generated result shown in EBGAN also illustrated that(my understanding): the generated images of celebA from dcgan can hardly distinguish the face and the complex background, but the images from EBGAN focus more heavily on generating faces.  
 76 | 
 77 |  ***************
 78 | 
 79 | :sparkles:Wasserstein GAN
 80 | --------
 81 | **Main idea: Stabilize the training by using Wasserstein-1 distance instead of Jenson-Shannon(JS) divergence**  
 82 | GAN before using JS divergence has the problem of non-overlapping, leading to mode collapse and convergence difficulty.   
 83 | Use EM distance or Wasserstein-1 distance, so GAN can solve the two problems above without particular architecture (like dcgan).   
 84 | [Wasserstein GAN](https://arxiv.org/pdf/1701.07875.pdf)[2017]   
 85 | 
 86 | **Mathmatics Analysis**  
 87 | Why JS divergence has problems? pleas see [Towards Principled Methods for Training Generative Adversarial Networks](https://arxiv.org/pdf/1701.04862.pdf)  
 88 | 
 89 | Anyway, this highlights the fact that **the KL, JS, and TV distances are not sensible
 90 | cost functions** when learning distributions supported by low dimensional manifolds.   
 91 | 
 92 | so the author use Wasserstein distance  
 93 | ![WGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/WGAN_loss1.png)  
 94 | Apparently, the G is to maximize the distance, while the D is to minimize the distance. 
 95 | 
 96 | However,  it is difficult to directly calculate the original formula, ||f||_L<=1 is hard to express. So the authors change it to the clip of varibales in D after some mathematical analysis,  then the Wasserstein distance version of GAN loss function can be:
 97 | **Loss Function**  
 98 | ![WGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/WGAN_loss2.png)  
 99 | 
100 | 
101 | **Algorithm guidelines for stable GANs**  
102 | 
103 | * No log in the loss. The output of D is no longer a probability, hence we do not apply sigmoid at the output of D
104 | >
105 | 		G_loss = -tf.reduce_mean(D_fake)
106 | 		D_loss = tf.reduce_mean(D_fake) - tf.reduce_mean(D_real) 
107 | * Clip the weight of D (0.01)
108 | >
109 | 		self.clip_D = [var.assign(tf.clip_by_value(var, -0.01, 0.01)) for var in self.discriminator.vars]
110 | * Train D more than G (5:1)
111 | * Use RMSProp instead of ADAM
112 | * Lower learning rate (0.00005)
113 | 
114 | 
115 | ****************
116 | 
117 | :sparkles: BEGAN
118 | --------
119 | **Main idea: Match auto-encoder loss distributions using a loss derived from the Wasserstein distance**  
120 | [BEGAN: Boundary Equilibrium Generative Adversarial Networks](https://arxiv.org/pdf/1703.10717.pdf)[2017]   
121 | 
122 | **Mathmatics Analysis**  
123 | We have already introduced the structure of EBGAN, which is also used in BEGAN.   
124 | Then, instead of calculating the Wasserstein distance of the samples distribution in WGAN, BEGAN calculates the wasserstein distance of loss distribution.   
125 | (The mathematical analysis in BEGAN I think is more clear and intuitive than in WGAN)  
126 | So, simply replace the E of L, we get the loss function:  
127 |  ![BEGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/BEGAN_loss1.png)  
128 | 
129 | Then, the most intereting part is comming:  
130 | a new hyper-paramer  to control the trade-off between image diversity and visual quality.   
131 |  ![BEGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/BEGAN_loss2.png)  
132 | Lower values of γ lead to lower image diversity because the discriminator focuses more heavily on auto-encoding real images.  
133 | 
134 | The final loss function is:  
135 | **Loss Function**  
136 |  ![BEGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/BEGAN_loss3.png)  
137 |  
138 |  The intuition behind the function is easy to understand:  
139 |  (Here I describe my understanding roughly...)   
140 | (1). In the beginning, the G and D are initialized randomly and k_0 = 0, so the L_real is larger than L_fake, leading to a short increase of k.   
141 | (2). After several iterations, the D easily learned how to reconstruct the real data, so gamma x L_real - L_fake is negative, k decreased to 0, now D is only to reconstruct the real data and G is to learn real data distrubition so as to minimize the reconstruction error in D.   
142 | (3). Along with the improvement of the ability of G to generate images like real data, L_fake becomes smaller and k becomes larger, so D focuses more on discriminating the real and fake data, then G trained more following.   
143 | (4). In the end, k becomes a constant, which means  gamma x L_real - L_fake=0, so the optimization is done.  
144 | 
145 |  
146 |  And the global loss is defined the addition of L_real (how well D learns the distribution of real data) and |gamma*L_real - L_fake| (how closed of the generated data from G and the real data)   
147 |   ![BEGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/BEGAN_loss4.png)  
148 | 
149 | 
150 | I set gamma=0.75, learning rate of k = 0.001, then the learning curve of loss and k is shown below.   
151 |   ![BEGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/BEGAN_curve.png)  
152 | 
153 | 
154 | 
155 | # Results
156 | 
157 | DCGAN  
158 | ![DCGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/dcgan/497.png)  
159 | 
160 | EBGAN (not trained enough)  
161 | ![EBGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/ebgan/109_r.png)  
162 | 
163 | WGAN (not trained enough)  
164 | ![WGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/wgan/260.png)  
165 | 
166 | BEGAN: gamma=0.75 learning rate of k=0.001  
167 | ![BEGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/began_n/369_r.png)  
168 | 
169 | BEGAN: gamma= 0.5 learning rate of k = 0.002  
170 | ![BEGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/began/228_r.png)  
171 | 
172 | VAE  
173 | ![BEGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/vae/499_s.png)   
174 | 
175 | 
176 | # References
177 | http://wiseodd.github.io/techblog/2016/12/10/variational-autoencoder/ (a good blog to introduce VAE)  
178 | https://github.com/wiseodd/generative-models/tree/master/GAN  
179 | https://github.com/artcg/BEGAN  
180 | 
181 | 
182 | # Others
183 | Tensorflow style: https://www.tensorflow.org/community/style_guide  
184 | 
185 | 
186 | 
187 | A good website to convert latex equation to img(then insert into README):
188 | http://www.sciweavers.org/free-online-latex-equation-editor 
189 | 


--------------------------------------------------------------------------------