├── Samples ├── dcgan │ └── 497.png ├── vae │ └── 499_s.png ├── wgan │ └── 260.png ├── began │ └── 228_r.png ├── ebgan │ └── 109_r.png └── began_n │ └── 369_r.png ├── vae.py ├── dcgan.py ├── wgan.py ├── ebgan.py ├── utils ├── datas.py └── nets.py ├── began.py └── README.md /Samples/dcgan/497.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/dcgan/497.png -------------------------------------------------------------------------------- /Samples/vae/499_s.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/vae/499_s.png -------------------------------------------------------------------------------- /Samples/wgan/260.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/wgan/260.png -------------------------------------------------------------------------------- /Samples/began/228_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/began/228_r.png -------------------------------------------------------------------------------- /Samples/ebgan/109_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/ebgan/109_r.png -------------------------------------------------------------------------------- /Samples/began_n/369_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfeng95/GAN_Theories/HEAD/Samples/began_n/369_r.png -------------------------------------------------------------------------------- /vae.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.examples.tutorials.mnist import input_data 3 | import numpy as np 4 | import matplotlib as mpl 5 | mpl.use('Agg') 6 | import matplotlib.pyplot as plt 7 | import matplotlib.gridspec as gridspec 8 | import os,sys 9 | 10 | sys.path.append('utils') 11 | from nets import * 12 | from datas import * 13 | 14 | def sample_z(m, n): 15 | return np.random.uniform(0, 1., size=[m, n]) 16 | 17 | class VAE(): 18 | def __init__(self, generator, discriminator, data): 19 | self.generator = generator 20 | self.discriminator = discriminator 21 | self.data = data 22 | 23 | # data 24 | self.z_dim = self.data.z_dim 25 | self.size = self.data.size 26 | self.channel = self.data.channel 27 | 28 | self.X = tf.placeholder(tf.float32, shape=[None, self.size, self.size, self.channel]) 29 | self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim]) 30 | 31 | # nets 32 | mu, sigma = self.discriminator(self.X) 33 | latent_code = mu + tf.exp(sigma/2)*self.z 34 | 35 | self.G_real = self.generator(latent_code) 36 | self.G_sample = self.generator(self.z) 37 | 38 | # loss 39 | # E[log P(X|z)] 40 | epsilon = 1e-8 41 | self.recon = tf.reduce_sum(-self.X * tf.log(self.G_real + epsilon) -(1.0 - self.X) * tf.log(1.0 - self.G_real + epsilon)) 42 | 43 | # D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian 44 | self.kl = 0.5 * tf.reduce_sum(tf.exp(sigma) + tf.square(mu) - 1. - sigma) 45 | 46 | self.loss = self.recon + self.kl 47 | 48 | # solver 49 | self.learning_rate = tf.placeholder(tf.float32, shape=[]) 50 | self.solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss, var_list=self.generator.vars + self.discriminator.vars) 51 | 52 | self.saver = tf.train.Saver() 53 | gpu_options = tf.GPUOptions(allow_growth=True) 54 | self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 55 | self.model_name = 'Models/vae_cifar.ckpt' 56 | 57 | def train(self, sample_dir, training_epoches = 500000, batch_size = 32): 58 | fig_count = 0 59 | self.sess.run(tf.global_variables_initializer()) 60 | #self.saver.restore(self.sess, self.model_name) 61 | 62 | learning_rate_initial = 1e-4 63 | for epoch in range(training_epoches): 64 | learning_rate = learning_rate_initial * pow(0.5, epoch // 50000) 65 | X_b = self.data(batch_size) 66 | self.sess.run( 67 | self.solver, 68 | feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate} 69 | ) 70 | # save img, model. print loss 71 | if epoch % 100 == 0 or epoch < 100: 72 | loss_curr = self.sess.run( 73 | self.loss, 74 | feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim)}) 75 | print('Iter: {}; loss: {:.4}'.format(epoch, loss_curr)) 76 | 77 | if epoch % 1000 == 0: 78 | real, samples = self.sess.run([self.G_real, self.G_sample], feed_dict={self.X: X_b[:16,:,:,:], self.z: sample_z(16, self.z_dim)}) 79 | 80 | fig = self.data.data2fig(real) 81 | plt.savefig('{}/{}.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight') 82 | plt.close(fig) 83 | 84 | fig = self.data.data2fig(samples) 85 | plt.savefig('{}/{}_s.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight') 86 | plt.close(fig) 87 | 88 | fig_count += 1 89 | 90 | if epoch % 5000 == 0: 91 | self.saver.save(self.sess, self.model_name) 92 | 93 | 94 | if __name__ == '__main__': 95 | 96 | # constraint GPU 97 | os.environ['CUDA_VISIBLE_DEVICES'] = '2' 98 | 99 | # save generated images 100 | sample_dir = 'Samples/vae' 101 | if not os.path.exists(sample_dir): 102 | os.makedirs(sample_dir) 103 | 104 | # param 105 | generator = G_conv() 106 | discriminator = D_vae() 107 | 108 | data = celebA() 109 | 110 | # run 111 | vae = VAE(generator, discriminator, data) 112 | vae.train(sample_dir) 113 | 114 | -------------------------------------------------------------------------------- /dcgan.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.examples.tutorials.mnist import input_data 3 | import numpy as np 4 | import matplotlib as mpl 5 | mpl.use('Agg') 6 | import matplotlib.pyplot as plt 7 | import matplotlib.gridspec as gridspec 8 | import os,sys 9 | 10 | sys.path.append('utils') 11 | from nets import * 12 | from datas import * 13 | 14 | def sample_z(m, n): 15 | return np.random.uniform(-1., 1., size=[m, n]) 16 | 17 | class DCGAN(): 18 | def __init__(self, generator, discriminator, data): 19 | self.generator = generator 20 | self.discriminator = discriminator 21 | self.data = data 22 | 23 | # data 24 | self.z_dim = self.data.z_dim 25 | self.size = self.data.size 26 | self.channel = self.data.channel 27 | 28 | self.X = tf.placeholder(tf.float32, shape=[None, self.size, self.size, self.channel]) 29 | self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim]) 30 | 31 | # nets 32 | self.G_sample = self.generator(self.z) 33 | 34 | self.D_real = self.discriminator(self.X) 35 | self.D_fake = self.discriminator(self.G_sample, reuse = True) 36 | 37 | # loss 38 | self.D_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_real, labels=tf.ones_like(self.D_real))) + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_fake, labels=tf.zeros_like(self.D_fake))) 39 | self.G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_fake, labels=tf.ones_like(self.D_fake))) 40 | 41 | # solver 42 | self.learning_rate = tf.placeholder(tf.float32, shape=[]) 43 | self.D_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.D_loss, var_list=self.discriminator.vars) 44 | self.G_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.G_loss, var_list=self.generator.vars) 45 | 46 | self.saver = tf.train.Saver() 47 | gpu_options = tf.GPUOptions(allow_growth=True) 48 | self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 49 | self.model_name = 'Models/dcgan.ckpt' 50 | 51 | def train(self, sample_dir, training_epoches = 500000, batch_size = 32): 52 | fig_count = 0 53 | self.sess.run(tf.global_variables_initializer()) 54 | #self.saver.restore(self.sess, self.model_name) 55 | learning_rate_initial = 1e-4 56 | for epoch in range(training_epoches): 57 | learning_rate = learning_rate_initial * pow(0.5, epoch // 50000) 58 | # update D 59 | X_b = self.data(batch_size) 60 | self.sess.run( 61 | self.D_solver, 62 | feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate} 63 | ) 64 | # update G 65 | for _ in range(1): 66 | self.sess.run( 67 | self.G_solver, 68 | feed_dict={self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate} 69 | ) 70 | 71 | # save img, model. print loss 72 | if epoch % 100 == 0 or epoch < 100: 73 | D_loss_curr, G_loss_curr = self.sess.run( 74 | [self.D_loss, self.G_loss], 75 | feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim)}) 76 | print('Iter: {}; D loss: {:.4}; G_loss: {:.4}'.format(epoch, D_loss_curr, G_loss_curr)) 77 | 78 | if epoch % 1000 == 0: 79 | samples = self.sess.run(self.G_sample, feed_dict={self.z: sample_z(16, self.z_dim)}) 80 | 81 | fig = self.data.data2fig(samples) 82 | plt.savefig('{}/{}.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight') 83 | fig_count += 1 84 | plt.close(fig) 85 | 86 | if epoch % 5000 == 0: 87 | self.saver.save(self.sess, self.model_name) 88 | 89 | 90 | if __name__ == '__main__': 91 | 92 | # constraint GPU 93 | os.environ['CUDA_VISIBLE_DEVICES'] = '2' 94 | 95 | # save generated images 96 | sample_dir = 'Samples/dcgan' 97 | if not os.path.exists(sample_dir): 98 | os.makedirs(sample_dir) 99 | 100 | # param 101 | generator = G_conv() 102 | discriminator = D_conv() 103 | 104 | data = celebA() 105 | 106 | # run 107 | dcgan = DCGAN(generator, discriminator, data) 108 | dcgan.train(sample_dir) 109 | 110 | -------------------------------------------------------------------------------- /wgan.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.examples.tutorials.mnist import input_data 3 | import numpy as np 4 | import matplotlib as mpl 5 | mpl.use('Agg') 6 | import matplotlib.pyplot as plt 7 | import matplotlib.gridspec as gridspec 8 | import os,sys 9 | 10 | sys.path.append('utils') 11 | from nets import * 12 | from datas import * 13 | 14 | def sample_z(m, n): 15 | return np.random.uniform(-1., 1., size=[m, n]) 16 | 17 | class WGAN(): 18 | def __init__(self, generator, discriminator, data): 19 | self.generator = generator 20 | self.discriminator = discriminator 21 | self.data = data 22 | 23 | # data 24 | self.z_dim = self.data.z_dim 25 | self.size = self.data.size 26 | self.channel = self.data.channel 27 | 28 | self.X = tf.placeholder(tf.float32, shape=[None, self.size, self.size, self.channel]) 29 | self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim]) 30 | 31 | # nets 32 | self.G_sample = self.generator(self.z) 33 | 34 | self.D_real = self.discriminator(self.X) 35 | self.D_fake = self.discriminator(self.G_sample, reuse = True) 36 | 37 | # loss 38 | self.D_loss = - tf.reduce_mean(self.D_real) + tf.reduce_mean(self.D_fake) 39 | self.G_loss = - tf.reduce_mean(self.D_fake) 40 | 41 | # clip 42 | self.clip_D = [var.assign(tf.clip_by_value(var, -0.01, 0.01)) for var in self.discriminator.vars] 43 | 44 | # solver 45 | self.learning_rate = tf.placeholder(tf.float32, shape=[]) 46 | self.D_solver = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.D_loss, var_list=self.discriminator.vars) 47 | self.G_solver = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.G_loss, var_list=self.generator.vars) 48 | 49 | gpu_options = tf.GPUOptions(allow_growth=True) 50 | self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 51 | self.saver = tf.train.Saver() 52 | self.model_name = 'Models/wgan.ckpt' 53 | 54 | def train(self, sample_dir, training_epoches = 500000, batch_size = 32): 55 | fig_count = 0 56 | self.sess.run(tf.global_variables_initializer()) 57 | #self.saver.restore(self.sess, self.model_name) 58 | 59 | learning_rate_initial = 1e-4 60 | for epoch in range(training_epoches): 61 | learning_rate = learning_rate_initial * pow(0.5, epoch // 50000) 62 | # update D 63 | n_d = 100 if epoch < 25 or (epoch+1) % 500 == 0 else 5 64 | for _ in range(n_d): 65 | X_b = self.data(batch_size) 66 | self.sess.run( 67 | [self.clip_D,self.D_solver], 68 | feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate} 69 | ) 70 | # update G 71 | for _ in range(1): 72 | self.sess.run( 73 | self.G_solver, 74 | feed_dict={self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate} 75 | ) 76 | 77 | # save img, model. print loss 78 | if epoch % 100 == 0 or epoch < 100: 79 | D_loss_curr, G_loss_curr = self.sess.run( 80 | [self.D_loss, self.G_loss], 81 | feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim)}) 82 | print('Iter: {}; D loss: {:.4}; G_loss: {:.4}'.format(epoch, D_loss_curr, G_loss_curr)) 83 | 84 | if epoch % 1000 == 0: 85 | samples = self.sess.run(self.G_sample, feed_dict={self.z: sample_z(16, self.z_dim)}) 86 | 87 | fig = self.data.data2fig(samples) 88 | plt.savefig('{}/{}.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight') 89 | fig_count += 1 90 | plt.close(fig) 91 | 92 | if epoch % 5000 == 0: 93 | self.saver.save(self.sess, self.model_name) 94 | 95 | 96 | if __name__ == '__main__': 97 | 98 | # constraint GPU 99 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 100 | 101 | # save generated images 102 | sample_dir = 'Samples/wgan' 103 | if not os.path.exists(sample_dir): 104 | os.makedirs(sample_dir) 105 | 106 | # param 107 | generator = G_conv() 108 | discriminator = D_conv() 109 | 110 | data = celebA() 111 | 112 | # run 113 | wgan = WGAN(generator, discriminator, data) 114 | wgan.train(sample_dir) 115 | 116 | -------------------------------------------------------------------------------- /ebgan.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.examples.tutorials.mnist import input_data 3 | import numpy as np 4 | import matplotlib as mpl 5 | mpl.use('Agg') 6 | import matplotlib.pyplot as plt 7 | import matplotlib.gridspec as gridspec 8 | import os,sys 9 | 10 | sys.path.append('utils') 11 | from nets import * 12 | from datas import * 13 | 14 | def sample_z(m, n): 15 | return np.random.uniform(-1., 1., size=[m, n]) 16 | 17 | class EBGAN(): 18 | def __init__(self, generator, discriminator, data): 19 | self.generator = generator 20 | self.discriminator = discriminator 21 | self.data = data 22 | 23 | # data 24 | self.z_dim = self.data.z_dim 25 | self.size = self.data.size 26 | self.channel = self.data.channel 27 | 28 | self.X = tf.placeholder(tf.float32, shape=[None, self.size, self.size, self.channel]) 29 | self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim]) 30 | 31 | # ebgan parameters 32 | margin = 50. # 33 | 34 | # nets 35 | self.G_sample = self.generator(self.z) 36 | 37 | self.D_real = self.discriminator(self.X) 38 | self.D_fake = self.discriminator(self.G_sample, reuse = True) 39 | 40 | # loss 41 | #L_real = tf.reduce_mean((self.X - self.D_real)**2, [1,2,3]) 42 | #L_fake = tf.reduce_mean((self.G_sample - self.D_fake)**2, [1,2,3]) 43 | L_real = tf.nn.l2_loss(self.X - self.D_real) 44 | L_fake = tf.nn.l2_loss(self.G_sample - self.D_fake) 45 | 46 | self.D_loss = L_real + tf.maximum(0., margin - L_fake) 47 | self.G_loss = L_fake 48 | 49 | # solver 50 | self.learning_rate = tf.placeholder(tf.float32, shape=[]) 51 | self.D_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.D_loss, var_list=self.discriminator.vars) 52 | self.G_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.G_loss, var_list=self.generator.vars) 53 | 54 | self.saver = tf.train.Saver() 55 | gpu_options = tf.GPUOptions(allow_growth=True) 56 | self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 57 | self.model_name = 'Models/ebgan.ckpt' 58 | 59 | def train(self, sample_dir, training_epoches = 500000, batch_size = 32): 60 | fig_count = 0 61 | self.sess.run(tf.global_variables_initializer()) 62 | #self.saver.restore(self.sess, self.model_name) 63 | 64 | learning_rate_initial = 1e-4 65 | for epoch in range(training_epoches): 66 | learning_rate = learning_rate_initial * pow(0.5, epoch // 50000) 67 | # update D and G 68 | X_b = self.data(batch_size) 69 | self.sess.run( 70 | [self.D_solver, self.G_solver], 71 | feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.learning_rate: learning_rate} 72 | ) 73 | # save img, model. print loss 74 | if epoch % 100 == 0 or epoch < 100: 75 | D_loss_curr, G_loss_curr = self.sess.run( 76 | [self.D_loss, self.G_loss], 77 | feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim)}) 78 | print('Iter: {}; D loss: {:.4}; G_loss: {:.4};'.format(epoch, D_loss_curr, G_loss_curr)) 79 | 80 | if epoch % 1000 == 0: 81 | X_s, real, samples = self.sess.run([self.X, self.D_real, self.G_sample], feed_dict={self.X: X_b[:16,:,:,:], self.z: sample_z(16, self.z_dim)}) 82 | 83 | fig = self.data.data2fig(X_s) 84 | plt.savefig('{}/{}.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight') 85 | plt.close(fig) 86 | 87 | fig = self.data.data2fig(real) 88 | plt.savefig('{}/{}_d.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight') 89 | plt.close(fig) 90 | 91 | fig = self.data.data2fig(samples) 92 | plt.savefig('{}/{}_r.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight') 93 | plt.close(fig) 94 | 95 | fig_count += 1 96 | 97 | if epoch % 5000 == 0: 98 | self.saver.save(self.sess, self.model_name) 99 | 100 | if __name__ == '__main__': 101 | 102 | # constraint GPU 103 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 104 | 105 | # save generated images 106 | sample_dir = 'Samples/ebgan' 107 | if not os.path.exists(sample_dir): 108 | os.makedirs(sample_dir) 109 | 110 | # param 111 | generator = G_conv() 112 | discriminator = D_autoencoder() 113 | 114 | data = celebA() 115 | 116 | # run 117 | ebgan = EBGAN(generator, discriminator, data) 118 | ebgan.train(sample_dir) 119 | 120 | -------------------------------------------------------------------------------- /utils/datas.py: -------------------------------------------------------------------------------- 1 | import os,sys 2 | from PIL import Image 3 | import scipy.misc 4 | from glob import glob 5 | import numpy as np 6 | import matplotlib as mpl 7 | mpl.use('Agg') 8 | import matplotlib.pyplot as plt 9 | import matplotlib.gridspec as gridspec 10 | 11 | from tensorflow.examples.tutorials.mnist import input_data 12 | 13 | prefix = './Datas/' 14 | 15 | def get_img(img_path, is_crop=True, crop_h=256, resize_h=64): 16 | img=scipy.misc.imread(img_path).astype(np.float) 17 | resize_w = resize_h 18 | if is_crop: 19 | crop_w = crop_h 20 | h, w = img.shape[:2] 21 | j = int(round((h - crop_h)/2.)) 22 | i = int(round((w - crop_w)/2.)) 23 | cropped_image = scipy.misc.imresize(img[j:j+crop_h, i:i+crop_w],[resize_h, resize_w]) 24 | else: 25 | cropped_image = scipy.misc.imresize(img,[resize_h, resize_w]) 26 | return np.array(cropped_image)/255.0 27 | 28 | 29 | class celebA(): 30 | def __init__(self): 31 | datapath = prefix + 'celebA' 32 | self.z_dim = 100 33 | self.size = 64 34 | self.channel = 3 35 | self.data = glob(os.path.join(datapath, '*.jpg')) 36 | 37 | self.batch_count = 0 38 | 39 | def __call__(self,batch_size): 40 | batch_number = len(self.data)/batch_size 41 | if self.batch_count < batch_number-2: 42 | self.batch_count += 1 43 | else: 44 | self.batch_count = 0 45 | 46 | path_list = self.data[self.batch_count*batch_size:(self.batch_count+1)*batch_size] 47 | 48 | batch = [get_img(img_path, True, 128, self.size) for img_path in path_list] 49 | batch_imgs = np.array(batch).astype(np.float32) 50 | 51 | return batch_imgs 52 | 53 | def data2fig(self, samples): 54 | fig = plt.figure(figsize=(4, 4)) 55 | gs = gridspec.GridSpec(4, 4) 56 | gs.update(wspace=0.05, hspace=0.05) 57 | 58 | for i, sample in enumerate(samples): 59 | ax = plt.subplot(gs[i]) 60 | plt.axis('off') 61 | ax.set_xticklabels([]) 62 | ax.set_yticklabels([]) 63 | ax.set_aspect('equal') 64 | plt.imshow(sample) 65 | return fig 66 | 67 | class cifar(): 68 | def __init__(self): 69 | datapath = prefix + 'cifar10' 70 | self.z_dim = 100 71 | self.size = 64 72 | self.channel = 3 73 | self.data = glob(os.path.join(datapath, '*')) 74 | 75 | self.batch_count = 0 76 | 77 | def __call__(self,batch_size): 78 | batch_number = len(self.data)/batch_size 79 | if self.batch_count < batch_number-2: 80 | self.batch_count += 1 81 | else: 82 | self.batch_count = 0 83 | 84 | path_list = self.data[self.batch_count*batch_size:(self.batch_count+1)*batch_size] 85 | 86 | batch = [get_img(img_path, False, 128, self.size) for img_path in path_list] 87 | batch_imgs = np.array(batch).astype(np.float32) 88 | 89 | return batch_imgs 90 | 91 | def data2fig(self, samples): 92 | fig = plt.figure(figsize=(4, 4)) 93 | gs = gridspec.GridSpec(4, 4) 94 | gs.update(wspace=0.05, hspace=0.05) 95 | 96 | for i, sample in enumerate(samples): 97 | ax = plt.subplot(gs[i]) 98 | plt.axis('off') 99 | ax.set_xticklabels([]) 100 | ax.set_yticklabels([]) 101 | ax.set_aspect('equal') 102 | plt.imshow(sample) 103 | return fig 104 | 105 | 106 | class mnist(): 107 | def __init__(self): 108 | datapath = prefix + 'mnist' 109 | self.z_dim = 100 110 | self.size = 64 111 | self.channel = 1 112 | self.data = input_data.read_data_sets(datapath, one_hot=True) 113 | 114 | def __call__(self,batch_size): 115 | batch_imgs = np.zeros([batch_size, self.size, self.size, self.channel]) 116 | 117 | batch_x,y = self.data.train.next_batch(batch_size) 118 | batch_x = np.reshape(batch_x, (batch_size, 28, 28, self.channel)) 119 | for i in range(batch_size): 120 | img = batch_x[i,:,:,0] 121 | batch_imgs[i,:,:,0] = scipy.misc.imresize(img, [self.size, self.size]) 122 | batch_imgs /= 255. 123 | return batch_imgs, y 124 | 125 | def data2fig(self, samples): 126 | fig = plt.figure(figsize=(4, 4)) 127 | gs = gridspec.GridSpec(4, 4) 128 | gs.update(wspace=0.05, hspace=0.05) 129 | 130 | for i, sample in enumerate(samples): 131 | ax = plt.subplot(gs[i]) 132 | plt.axis('off') 133 | ax.set_xticklabels([]) 134 | ax.set_yticklabels([]) 135 | ax.set_aspect('equal') 136 | plt.imshow(sample.reshape(self.size,self.size), cmap='Greys_r') 137 | return fig 138 | 139 | 140 | if __name__ == '__main__': 141 | data = mnist() 142 | imgs,_ = data(20) 143 | 144 | fig = mnist.data2fig(imgs[:16,:,:]) 145 | plt.savefig('Samples/test.png', bbox_inches='tight') 146 | plt.close(fig) 147 | -------------------------------------------------------------------------------- /began.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.examples.tutorials.mnist import input_data 3 | import numpy as np 4 | import matplotlib as mpl 5 | mpl.use('Agg') 6 | import matplotlib.pyplot as plt 7 | import matplotlib.gridspec as gridspec 8 | import os,sys 9 | 10 | sys.path.append('utils') 11 | from nets import * 12 | from datas import * 13 | 14 | def sample_z(m, n): 15 | return np.random.uniform(-1., 1., size=[m, n]) 16 | 17 | class BEGAN(): 18 | def __init__(self, generator, discriminator, data): 19 | self.generator = generator 20 | self.discriminator = discriminator 21 | self.data = data 22 | 23 | # data 24 | self.z_dim = self.data.z_dim 25 | self.size = self.data.size 26 | self.channel = self.data.channel 27 | 28 | self.X = tf.placeholder(tf.float32, shape=[None, self.size, self.size, self.channel]) 29 | self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim]) 30 | 31 | # began parameters 32 | self.k_t = tf.placeholder(tf.float32, shape=[]) # weighting parameter which constantly updates during training 33 | gamma = 0.75 # diversity ratio, used to control model equibilibrium. 34 | lambda_k = 0.001 # learning rate for k. Berthelot et al. use 0.001 35 | 36 | # nets 37 | self.G_sample = self.generator(self.z) 38 | 39 | self.D_real = self.discriminator(self.X) 40 | self.D_fake = self.discriminator(self.G_sample, reuse = True) 41 | 42 | # loss 43 | L_real = tf.reduce_mean(tf.abs(self.X - self.D_real)) 44 | L_fake = tf.reduce_mean(tf.abs(self.G_sample - self.D_fake)) 45 | 46 | self.D_loss = L_real - self.k_t * L_fake 47 | self.G_loss = L_fake 48 | 49 | self.k_tn = self.k_t + lambda_k * (gamma*L_real - L_fake) 50 | self.M_global = L_real + tf.abs(gamma*L_real - L_fake) 51 | 52 | # solver 53 | self.learning_rate = tf.placeholder(tf.float32, shape=[]) 54 | self.D_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.D_loss, var_list=self.discriminator.vars) 55 | self.G_solver = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.G_loss, var_list=self.generator.vars) 56 | 57 | self.saver = tf.train.Saver() 58 | gpu_options = tf.GPUOptions(allow_growth=True) 59 | self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 60 | self.model_name = 'Models/began.ckpt' 61 | 62 | def train(self, sample_dir, training_epoches = 500000, batch_size = 16): 63 | fig_count = 0 64 | self.sess.run(tf.global_variables_initializer()) 65 | #self.saver.restore(self.sess, self.model_name) 66 | 67 | k_tn = 0 68 | learning_rate_initial = 1e-4 69 | for epoch in range(training_epoches): 70 | learning_rate = learning_rate_initial * pow(0.5, epoch // 50000) 71 | # update D and G 72 | X_b = self.data(batch_size) 73 | _, _, k_tn = self.sess.run( 74 | [self.D_solver, self.G_solver, self.k_tn], 75 | feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.k_t: min(max(k_tn, 0.), 1.), self.learning_rate: learning_rate} 76 | ) 77 | # save img, model. print loss 78 | if epoch % 100 == 0 or epoch < 100: 79 | D_loss_curr, G_loss_curr, M_global_curr = self.sess.run( 80 | [self.D_loss, self.G_loss, self.M_global], 81 | feed_dict={self.X: X_b, self.z: sample_z(batch_size, self.z_dim), self.k_t: min(max(k_tn, 0.), 1.)}) 82 | print('Iter: {}; D loss: {:.4}; G_loss: {:.4}; M_global: {:.4}; k_t: {:.6}; learning_rate:{:.8}'.format(epoch, D_loss_curr, G_loss_curr, M_global_curr, min(max(k_tn, 0.), 1.), learning_rate)) 83 | 84 | if epoch % 1000 == 0: 85 | X_s, real, samples = self.sess.run([self.X, self.D_real, self.G_sample], feed_dict={self.X: X_b[:16,:,:,:], self.z: sample_z(16, self.z_dim)}) 86 | 87 | fig = self.data.data2fig(X_s) 88 | plt.savefig('{}/{}.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight') 89 | plt.close(fig) 90 | 91 | fig = self.data.data2fig(real) 92 | plt.savefig('{}/{}_d.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight') 93 | plt.close(fig) 94 | 95 | fig = self.data.data2fig(samples) 96 | plt.savefig('{}/{}_r.png'.format(sample_dir, str(fig_count).zfill(3)), bbox_inches='tight') 97 | plt.close(fig) 98 | 99 | fig_count += 1 100 | 101 | if epoch % 5000 == 0: 102 | self.saver.save(self.sess, self.model_name) 103 | 104 | if __name__ == '__main__': 105 | 106 | # constraint GPU 107 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 108 | 109 | # save generated images 110 | sample_dir = 'Samples/began' 111 | if not os.path.exists(sample_dir): 112 | os.makedirs(sample_dir) 113 | 114 | # param 115 | generator = G_conv() 116 | discriminator = D_autoencoder() 117 | 118 | data = cifar() 119 | 120 | # run 121 | began = BEGAN(generator, discriminator, data) 122 | began.train(sample_dir) 123 | 124 | -------------------------------------------------------------------------------- /utils/nets.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib as tc 3 | import tensorflow.contrib.layers as tcl 4 | 5 | def lrelu(x, leak=0.2, name="lrelu"): 6 | with tf.variable_scope(name): 7 | f1 = 0.5 * (1 + leak) 8 | f2 = 0.5 * (1 - leak) 9 | return f1 * x + f2 * abs(x) 10 | 11 | class G_conv(object): 12 | def __init__(self, channel=3, name='G_conv'): 13 | self.name = name 14 | self.size = 64/16 15 | self.channel = channel 16 | 17 | def __call__(self, z): 18 | with tf.variable_scope(self.name) as scope: 19 | g = tcl.fully_connected(z, self.size * self.size * 512, activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm) 20 | g = tf.reshape(g, (-1, self.size, self.size, 512)) # size 21 | g = tcl.conv2d_transpose(g, 256, 3, stride=2, # size*2 22 | activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 23 | g = tcl.conv2d_transpose(g, 128, 3, stride=2, # size*4 24 | activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 25 | g = tcl.conv2d_transpose(g, 64, 3, stride=2, # size*8 32x32x64 26 | activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 27 | 28 | g = tcl.conv2d_transpose(g, self.channel, 3, stride=2, # size*16 29 | activation_fn=tf.nn.sigmoid, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 30 | return g 31 | @property 32 | def vars(self): 33 | return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name) 34 | 35 | 36 | class D_conv(object): 37 | def __init__(self, name='D_conv'): 38 | self.name = name 39 | 40 | def __call__(self, x, reuse=False): 41 | with tf.variable_scope(self.name) as scope: 42 | if reuse: 43 | scope.reuse_variables() 44 | size = 64 45 | d = tcl.conv2d(x, num_outputs=size, kernel_size=3, # bzx64x64x3 -> bzx32x32x64 46 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 47 | d = tcl.conv2d(d, num_outputs=size * 2, kernel_size=3, # 16x16x128 48 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 49 | d = tcl.conv2d(d, num_outputs=size * 4, kernel_size=3, # 8x8x256 50 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 51 | d = tcl.conv2d(d, num_outputs=size * 8, kernel_size=3, # 4x4x512 52 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 53 | 54 | d = tcl.fully_connected(tcl.flatten(d), 256, activation_fn=lrelu, weights_initializer=tf.random_normal_initializer(0, 0.02)) 55 | d = tcl.fully_connected(d, 1, activation_fn=None, weights_initializer=tf.random_normal_initializer(0, 0.02)) 56 | 57 | return d 58 | 59 | @property 60 | def vars(self): 61 | return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name) 62 | 63 | # for ebgan and began 64 | class D_autoencoder(object): 65 | def __init__(self, n_hidden=256, name='D_autoencoder'): 66 | self.name = name 67 | self.n_hidden = n_hidden 68 | 69 | def __call__(self, x, reuse=False): 70 | with tf.variable_scope(self.name) as scope: 71 | if reuse: 72 | scope.reuse_variables() 73 | # --- conv 74 | size = 64 75 | d = tcl.conv2d(x, num_outputs=size, kernel_size=3, # bzx64x64x3 -> bzx32x32x64 76 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 77 | d = tcl.conv2d(d, num_outputs=size * 2, kernel_size=3, # 16x16x128 78 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 79 | d = tcl.conv2d(d, num_outputs=size * 4, kernel_size=3, # 8x8x256 80 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 81 | d = tcl.conv2d(d, num_outputs=size * 8, kernel_size=3, # 4x4x512 82 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 83 | 84 | h = tcl.fully_connected(tcl.flatten(d), self.n_hidden, activation_fn=lrelu, weights_initializer=tf.random_normal_initializer(0, 0.02)) 85 | 86 | # -- deconv 87 | d = tcl.fully_connected(h, 4 * 4 * 512, activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm) 88 | d = tf.reshape(d, (-1, 4, 4, 512)) # size 89 | d = tcl.conv2d_transpose(d, 256, 3, stride=2, # size*2 90 | activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 91 | d = tcl.conv2d_transpose(d, 128, 3, stride=2, # size*4 92 | activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 93 | d = tcl.conv2d_transpose(d, 64, 3, stride=2, # size*8 94 | activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 95 | 96 | d = tcl.conv2d_transpose(d, 3, 3, stride=2, # size*16 97 | activation_fn=tf.nn.sigmoid, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 98 | return d 99 | 100 | @property 101 | def vars(self): 102 | return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name) 103 | 104 | # for vae 105 | class D_vae(object): 106 | def __init__(self, name='D_vae'): 107 | self.name = name 108 | 109 | def __call__(self, x, reuse=False): 110 | with tf.variable_scope(self.name) as scope: 111 | if reuse: 112 | scope.reuse_variables() 113 | size = 64 114 | d = tcl.conv2d(x, num_outputs=size, kernel_size=3, # bzx64x64x3 -> bzx32x32x64 115 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 116 | d = tcl.conv2d(d, num_outputs=size * 2, kernel_size=3, # 16x16x128 117 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 118 | d = tcl.conv2d(d, num_outputs=size * 4, kernel_size=3, # 8x8x256 119 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 120 | d = tcl.conv2d(d, num_outputs=size * 8, kernel_size=3, # 4x4x512 121 | stride=2, activation_fn=lrelu, normalizer_fn=tcl.batch_norm, padding='SAME', weights_initializer=tf.random_normal_initializer(0, 0.02)) 122 | 123 | d = tcl.fully_connected(tcl.flatten(d), 256, activation_fn=lrelu, weights_initializer=tf.random_normal_initializer(0, 0.02)) 124 | mu = tcl.fully_connected(d, 100, activation_fn=None, weights_initializer=tf.random_normal_initializer(0, 0.02)) 125 | sigma = tcl.fully_connected(d, 100, activation_fn=None, weights_initializer=tf.random_normal_initializer(0, 0.02)) 126 | 127 | return mu, sigma 128 | 129 | @property 130 | def vars(self): 131 | return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name) 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | All have been tested with python2.7+ and tensorflow1.0+ in linux. 2 | 3 | * Samples: save generated data, each folder contains a figure to show the results. 4 | * utils: contains 2 files 5 | * data.py: prepreocessing data. 6 | * nets.py: Generator and Discriminator are saved here. 7 | 8 | 9 | For research purpose, 10 | **Network architecture**: all GANs used the same network architecture(the Discriminator of EBGAN and BEGAN are the combination of traditional D and G) 11 | **Learning rate**: all initialized by 1e-4 and decayed by a factor of 2 each 5000 epoches (Maybe it is unfair for some GANs, but the influences are small, so I ignored) 12 | **Dataset**: celebA cropped with 128 and resized to 64, users should copy all celebA images to `./Datas/celebA` for training 13 | 14 | - [x] DCGAN 15 | - [x] EBGAN 16 | - [x] WGAN 17 | - [x] BEGAN 18 | And for comparsion, I added VAE here. 19 | - [x] VAE 20 | 21 | The generated results are shown in the end of this page. 22 | 23 | *************** 24 | 25 | 26 | # Theories 27 | 28 | :sparkles:DCGAN 29 | -------- 30 | **Main idea: Techniques(of architecture) to stabilize GAN** 31 | [Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks](https://arxiv.org/pdf/1511.06434.pdf)[2015] 32 | 33 | **Loss Function** (the same as Vanilla GAN) 34 | 35 | ![DCGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/GAN_loss.png) 36 | 37 | 38 | **Architecture guidelines for stable Deep Convolutional GANs** 39 | 40 | * Replace any pooling layers with strided convolutions (discriminator) and fractional-strided convolutions (generator). 41 | * Use batchnorm in both the generator and the discriminator 42 | * Remove fully connected hidden layers for deeper architectures. Just use average pooling at the end. 43 | * Use ReLU activation in generator for all layers except for the output, which uses Tanh. 44 | * Use LeakyReLU activation in the discriminator for all layers. 45 | 46 | *************** 47 | 48 | :sparkles:EBGAN 49 | -------- 50 | **Main idea: Views the discriminator as an energy function** 51 | [Energy-based Generative Adversarial Network](https://arxiv.org/pdf/1609.03126.pdf)[2016] 52 | (Here introduce EBGAN just for BEGAN, they use the same network structure) 53 | 54 | What is energy function? 55 | ![EBGAN_structure](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/Energy_based_model.png) 56 | The figure is from [LeCun, Yann, et al. "A tutorial on energy-based learning." ](http://yann.lecun.com/exdb/publis/pdf/lecun-06.pdf) 57 | 58 | In EBGAN, we want the Discriminator to distinguish the real images and the generated(fake) images. How? A simple idea is to set X as the real image and Y as the reconstructed image, and then minimize the energy of X and Y. So we need a auto-encoder to get Y from X, and a measure to calcuate the energy (here are MSE, so simple). 59 | Finally we get the structure of Discriminator as shown below. 60 | 61 | ![EBGAN_structure](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/EBGAN_structure.png) 62 | 63 | So the task of D is to minimize the MSE of real image and the corresponding reconstructed image, and maximize the MSE of fake image from the G and the corresponding reconstructed fake image. And G is to do the adversarial task: minimize the MSE of fake images... 64 | Then obviously the loss function can be written as: 65 | ![EBGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/EBGAN_loss1.png) 66 | 67 | And for comparison with BEGAN, we can set the D only as the auto-encoder and L(*) for the MSE loss. 68 | **Loss Function** 69 | ![EBGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/EBGAN_loss2.png) 70 | 71 | m is a positive margin here, when L(G(z)) is close to zero, the L_D is L(x) + m, which means to train D more heavily, and on the contrary, when L(G(z))>m, the L_D is L(x), which means the the D loosens the judgement of the fake images. 72 | 73 | 74 | Finally, there is a quetion for EBGAN, why use auto-encoder in D instead of the traditonal one? What are the benifits? 75 | I have not read the paper carefully, but one reason I think is that (said in the paper) auto-encoders have the ability to learn an energy manifold without supervision or negative examples. So, rather than simply judge the real or fake of images, the new D can catch the primary distribution of data then distinguish them. And the generated result shown in EBGAN also illustrated that(my understanding): the generated images of celebA from dcgan can hardly distinguish the face and the complex background, but the images from EBGAN focus more heavily on generating faces. 76 | 77 | *************** 78 | 79 | :sparkles:Wasserstein GAN 80 | -------- 81 | **Main idea: Stabilize the training by using Wasserstein-1 distance instead of Jenson-Shannon(JS) divergence** 82 | GAN before using JS divergence has the problem of non-overlapping, leading to mode collapse and convergence difficulty. 83 | Use EM distance or Wasserstein-1 distance, so GAN can solve the two problems above without particular architecture (like dcgan). 84 | [Wasserstein GAN](https://arxiv.org/pdf/1701.07875.pdf)[2017] 85 | 86 | **Mathmatics Analysis** 87 | Why JS divergence has problems? pleas see [Towards Principled Methods for Training Generative Adversarial Networks](https://arxiv.org/pdf/1701.04862.pdf) 88 | 89 | Anyway, this highlights the fact that **the KL, JS, and TV distances are not sensible 90 | cost functions** when learning distributions supported by low dimensional manifolds. 91 | 92 | so the author use Wasserstein distance 93 | ![WGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/WGAN_loss1.png) 94 | Apparently, the G is to maximize the distance, while the D is to minimize the distance. 95 | 96 | However, it is difficult to directly calculate the original formula, ||f||_L<=1 is hard to express. So the authors change it to the clip of varibales in D after some mathematical analysis, then the Wasserstein distance version of GAN loss function can be: 97 | **Loss Function** 98 | ![WGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/WGAN_loss2.png) 99 | 100 | 101 | **Algorithm guidelines for stable GANs** 102 | 103 | * No log in the loss. The output of D is no longer a probability, hence we do not apply sigmoid at the output of D 104 | > 105 | G_loss = -tf.reduce_mean(D_fake) 106 | D_loss = tf.reduce_mean(D_fake) - tf.reduce_mean(D_real) 107 | * Clip the weight of D (0.01) 108 | > 109 | self.clip_D = [var.assign(tf.clip_by_value(var, -0.01, 0.01)) for var in self.discriminator.vars] 110 | * Train D more than G (5:1) 111 | * Use RMSProp instead of ADAM 112 | * Lower learning rate (0.00005) 113 | 114 | 115 | **************** 116 | 117 | :sparkles: BEGAN 118 | -------- 119 | **Main idea: Match auto-encoder loss distributions using a loss derived from the Wasserstein distance** 120 | [BEGAN: Boundary Equilibrium Generative Adversarial Networks](https://arxiv.org/pdf/1703.10717.pdf)[2017] 121 | 122 | **Mathmatics Analysis** 123 | We have already introduced the structure of EBGAN, which is also used in BEGAN. 124 | Then, instead of calculating the Wasserstein distance of the samples distribution in WGAN, BEGAN calculates the wasserstein distance of loss distribution. 125 | (The mathematical analysis in BEGAN I think is more clear and intuitive than in WGAN) 126 | So, simply replace the E of L, we get the loss function: 127 | ![BEGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/BEGAN_loss1.png) 128 | 129 | Then, the most intereting part is comming: 130 | a new hyper-paramer to control the trade-off between image diversity and visual quality. 131 | ![BEGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/BEGAN_loss2.png) 132 | Lower values of γ lead to lower image diversity because the discriminator focuses more heavily on auto-encoding real images. 133 | 134 | The final loss function is: 135 | **Loss Function** 136 | ![BEGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/BEGAN_loss3.png) 137 | 138 | The intuition behind the function is easy to understand: 139 | (Here I describe my understanding roughly...) 140 | (1). In the beginning, the G and D are initialized randomly and k_0 = 0, so the L_real is larger than L_fake, leading to a short increase of k. 141 | (2). After several iterations, the D easily learned how to reconstruct the real data, so gamma x L_real - L_fake is negative, k decreased to 0, now D is only to reconstruct the real data and G is to learn real data distrubition so as to minimize the reconstruction error in D. 142 | (3). Along with the improvement of the ability of G to generate images like real data, L_fake becomes smaller and k becomes larger, so D focuses more on discriminating the real and fake data, then G trained more following. 143 | (4). In the end, k becomes a constant, which means gamma x L_real - L_fake=0, so the optimization is done. 144 | 145 | 146 | And the global loss is defined the addition of L_real (how well D learns the distribution of real data) and |gamma*L_real - L_fake| (how closed of the generated data from G and the real data) 147 | ![BEGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/BEGAN_loss4.png) 148 | 149 | 150 | I set gamma=0.75, learning rate of k = 0.001, then the learning curve of loss and k is shown below. 151 | ![BEGAN_loss](https://raw.githubusercontent.com/YadiraF/Images/master/GAN/BEGAN_curve.png) 152 | 153 | 154 | 155 | # Results 156 | 157 | DCGAN 158 | ![DCGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/dcgan/497.png) 159 | 160 | EBGAN (not trained enough) 161 | ![EBGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/ebgan/109_r.png) 162 | 163 | WGAN (not trained enough) 164 | ![WGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/wgan/260.png) 165 | 166 | BEGAN: gamma=0.75 learning rate of k=0.001 167 | ![BEGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/began_n/369_r.png) 168 | 169 | BEGAN: gamma= 0.5 learning rate of k = 0.002 170 | ![BEGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/began/228_r.png) 171 | 172 | VAE 173 | ![BEGAN_samples](https://raw.githubusercontent.com/YadiraF/GAN_Theories/master/Samples/vae/499_s.png) 174 | 175 | 176 | # References 177 | http://wiseodd.github.io/techblog/2016/12/10/variational-autoencoder/ (a good blog to introduce VAE) 178 | https://github.com/wiseodd/generative-models/tree/master/GAN 179 | https://github.com/artcg/BEGAN 180 | 181 | 182 | # Others 183 | Tensorflow style: https://www.tensorflow.org/community/style_guide 184 | 185 | 186 | 187 | A good website to convert latex equation to img(then insert into README): 188 | http://www.sciweavers.org/free-online-latex-equation-editor 189 | --------------------------------------------------------------------------------