├── README.md ├── gaussian-toy-regularized.py └── gaussian-toy-unrolled.py /README.md: -------------------------------------------------------------------------------- 1 | # Gradient Regularized GAN 2 | Code for the regularizer proposed in the NIPS 2017 paper on "Gradient descent GAN optimization is locally stable" 3 | arXiv:1706.04156 4 | 5 | The code for the Gaussian experiments is based on https://github.com/poolio/unrolled_gan but modified to reflect the parameters specified in the Unrolled GANs paper arxiv:1611.02163. The experiments for our paper were performed on Tensorflow 1.1.0 and Keras 1.2. 6 | 7 | Run ```python .py``` where `````` is either ```gaussian-toy-unrolled.py``` or ```gaussian-toy-regularized.py``` to run the unrolled GAN and the gradient-norm-regularized GAN respectively. 8 | 9 | 10 | -------------------------------------------------------------------------------- /gaussian-toy-regularized.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import tensorflow as tf 3 | import numpy as np 4 | import matplotlib as mpl 5 | mpl.use('Agg') 6 | from matplotlib.pyplot import * 7 | import seaborn as sns 8 | from tqdm import tqdm 9 | ds = tf.contrib.distributions 10 | slim = tf.contrib.slim 11 | graph_replace = tf.contrib.graph_editor.graph_replace 12 | 13 | from keras.optimizers import Adam 14 | 15 | 16 | 17 | 18 | def sample_mog(batch_size, n_mixture=8, std=0.02, radius=2.0): 19 | thetas = np.linspace(0, 2 * np.pi * (n_mixture-1)/float(n_mixture), n_mixture) 20 | xs, ys = radius * np.sin(thetas), radius * np.cos(thetas) 21 | cat = ds.Categorical(tf.zeros(n_mixture)) 22 | comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())] 23 | data = ds.Mixture(cat, comps) 24 | return data.sample(batch_size) 25 | 26 | 27 | 28 | params = dict( 29 | batch_size=512, 30 | disc_learning_rate=1e-4, 31 | gen_learning_rate=1e-4, 32 | beta1=0.5, 33 | epsilon=1e-8, 34 | max_iter=100001, 35 | viz_every=1000, 36 | z_dim=256, 37 | x_dim=2, 38 | unrolling_steps=0, 39 | regularizer_weight=0.5, 40 | ) 41 | 42 | 43 | 44 | 45 | def extract_step_dict(update_ops): 46 | """Extract variables and their new values from Assign and AssignAdd ops. 47 | 48 | Args: 49 | update_ops: list of Assign and AssignAdd ops, typically computed using Keras' opt.get_updates() 50 | 51 | Returns: 52 | dict mapping from variables to their step updates 53 | """ 54 | name_to_var = {v.name: v for v in tf.global_variables()} 55 | updates = OrderedDict() 56 | for update in update_ops: 57 | var_name = update.op.inputs[0].name 58 | var = name_to_var[var_name] 59 | value = update.op.inputs[1] 60 | if update.op.type == 'Assign': 61 | updates[var] = value - var 62 | elif update.op.type == 'AssignAdd': 63 | updates[var] = value 64 | else: 65 | raise ValueError("Update op type (%s) must be of type Assign or AssignAdd"%update_op.op.type) 66 | return updates 67 | 68 | 69 | 70 | 71 | def generator(z, output_dim=2, n_hidden=128, n_layer=2): 72 | with tf.variable_scope("generator"): 73 | h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu) 74 | x = slim.fully_connected(h, output_dim, activation_fn=None) 75 | return x 76 | 77 | def discriminator(x, n_hidden=128, n_layer=1, reuse=False): 78 | with tf.variable_scope("discriminator", reuse=reuse): 79 | h = slim.stack(tf.divide(x,4.0), slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu) 80 | log_d = slim.fully_connected(h, 1, activation_fn=None) 81 | return log_d 82 | 83 | tf.reset_default_graph() 84 | 85 | data = sample_mog(params['batch_size']) 86 | 87 | noise = ds.Normal(tf.zeros(params['z_dim']), 88 | tf.ones(params['z_dim'])).sample(params['batch_size']) 89 | # Construct generator and discriminator nets 90 | with slim.arg_scope([slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=0.8)): 91 | samples = generator(noise, output_dim=params['x_dim']) 92 | real_score = discriminator(data) 93 | fake_score = discriminator(samples, reuse=True) 94 | 95 | # D maximizes this, G minimizes this + a regularizer 96 | V = -tf.reduce_mean( 97 | tf.nn.sigmoid_cross_entropy_with_logits(logits=real_score, labels=tf.ones_like(real_score)) + 98 | tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_score, labels=tf.zeros_like(fake_score))) 99 | 100 | 101 | 102 | gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generator") 103 | disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator") 104 | 105 | # Vanilla discriminator update 106 | d_opt = Adam(lr=params['disc_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon']) 107 | d_updates = d_opt.get_updates(disc_vars, [], -V) 108 | d_train_op = tf.group(*d_updates, name="d_train_op") 109 | step_dict = extract_step_dict(d_updates) 110 | 111 | # Penalty 112 | d_gradient_norm_sq = tf.square(tf.global_norm(tf.gradients(V, disc_vars))) 113 | 114 | 115 | g_opt = Adam(lr=params['gen_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon']) 116 | g_updates = g_opt.get_updates(gen_vars, [], V +params['regularizer_weight']*d_gradient_norm_sq) 117 | g_train_op = tf.group(*g_updates, name="g_train_op") 118 | 119 | 120 | # Or alternatively: 121 | ''' 122 | d_opt = tf.train.AdamOptimizer(params['disc_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon']) 123 | g_opt = tf.train.AdamOptimizer(params['gen_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon']) 124 | d_train_op = d_opt.minimize(-V, var_list=disc_vars) 125 | g_train_op = g_opt.minimize(V+params['regularizer_weight']*d_gradient_norm_sq, var_list=gen_vars) 126 | ''' 127 | 128 | 129 | 130 | norm_d = tf.global_norm(tf.gradients(V, disc_vars)) 131 | norm_g = tf.global_norm(tf.gradients(V, gen_vars)) 132 | 133 | sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) 134 | sess.run(tf.global_variables_initializer()) 135 | 136 | 137 | xmax = 3 138 | fs = [] 139 | frames = [] 140 | np_samples = [] 141 | ds = [] # first gradients 142 | gs = [] 143 | n_batches_viz = 10 144 | viz_every = params['viz_every'] 145 | for i in tqdm(range(params['max_iter'])): 146 | sess.run(d_train_op) 147 | sess.run(g_train_op) 148 | d, g, f, = sess.run([norm_d, norm_g, V]) 149 | fs.append(f) 150 | ds.append(d) 151 | gs.append(g) 152 | if i % viz_every == 0: 153 | np_samples.append(np.vstack([sess.run(samples) for _ in range(n_batches_viz)])) 154 | xx, yy = sess.run([samples, data]) 155 | fig = figure(figsize=(5,5)) 156 | scatter(xx[:, 0], xx[:, 1], edgecolor='none') 157 | scatter(yy[:, 0], yy[:, 1], c='g', edgecolor='none') 158 | axis('off') 159 | 160 | fig.savefig('fig'+str(i)+'.pdf') 161 | close(fig) 162 | 163 | 164 | np.savetxt("d_norm.out",np.array(ds)) 165 | np.savetxt("g_norm.out",np.array(gs)) 166 | fig = figure() 167 | ax = subplot(111) 168 | ax.set_ylabel('Discriminator Gradient L2 Norm') 169 | ax.set_xlabel('Iteration') 170 | plot(range(len(ds)), ds) 171 | fig.savefig('d_norm.pdf') 172 | fig = figure() 173 | ax = subplot(111) 174 | plot(range(len(gs)), gs) 175 | ax.set_ylabel('Generator Gradient L2 Norm') 176 | ax.set_xlabel('Iteration') 177 | fig.savefig('g_norm.pdf') 178 | 179 | 180 | 181 | 182 | np_samples_ = np_samples[::1] 183 | cols = len(np_samples_) 184 | bg_color = sns.color_palette('Greens', n_colors=256)[0] 185 | fig=figure(figsize=(2*cols, 2)) 186 | for i, samps in enumerate(np_samples_): 187 | if i == 0: 188 | ax = subplot(1,cols,1) 189 | else: 190 | subplot(1,cols,i+1, sharex=ax, sharey=ax) 191 | ax2 = sns.kdeplot(samps[:, 0], samps[:, 1], shade=True, cmap='Greens', n_levels=20, clip=[[-xmax,xmax]]*2) 192 | ax2.set_axis_bgcolor(bg_color) 193 | xticks([]); yticks([]) 194 | title('step %d'%(i*viz_every)) 195 | ax.set_ylabel('%d unrolling steps'%params['unrolling_steps']) 196 | gcf().tight_layout() 197 | fig.savefig('series.pdf') 198 | 199 | np.savetxt("loss.out",np.array(fs)) 200 | fig=figure() 201 | fs = np.array(fs) 202 | plot(range(len(fs)),fs) 203 | ax = subplot(111) 204 | ax.set_ylabel('Loss') 205 | ax.set_xlabel('Iteration') 206 | fig.savefig('loss.pdf') 207 | -------------------------------------------------------------------------------- /gaussian-toy-unrolled.py: -------------------------------------------------------------------------------- 1 | import setGPU 2 | from collections import OrderedDict 3 | import tensorflow as tf 4 | import numpy as np 5 | import matplotlib as mpl 6 | mpl.use('Agg') 7 | from matplotlib.pyplot import * 8 | import seaborn as sns 9 | from tqdm import tqdm 10 | ds = tf.contrib.distributions 11 | slim = tf.contrib.slim 12 | graph_replace = tf.contrib.graph_editor.graph_replace 13 | from setproctitle import setproctitle 14 | setproctitle('unrolled-gan') 15 | 16 | 17 | from keras.optimizers import Adam 18 | 19 | 20 | 21 | 22 | def sample_mog(batch_size, n_mixture=8, std=0.02, radius=2.0): 23 | thetas = np.linspace(0, 2 * np.pi * (n_mixture-1)/float(n_mixture), n_mixture) 24 | xs, ys = radius * np.sin(thetas), radius * np.cos(thetas) 25 | cat = ds.Categorical(tf.zeros(n_mixture)) 26 | comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())] 27 | data = ds.Mixture(cat, comps) 28 | return data.sample(batch_size) 29 | 30 | 31 | 32 | params = dict( 33 | batch_size=512, 34 | disc_learning_rate=1e-4, 35 | gen_learning_rate=1e-4, 36 | beta1=0.5, 37 | epsilon=1e-8, 38 | max_iter=100001, 39 | viz_every=1000, 40 | z_dim=256, 41 | x_dim=2, 42 | unrolling_steps=15, 43 | ) 44 | 45 | 46 | 47 | 48 | 49 | def extract_update_dict(update_ops): 50 | """Extract variables and their new values from Assign and AssignAdd ops. 51 | 52 | Args: 53 | update_ops: list of Assign and AssignAdd ops, typically computed using Keras' opt.get_updates() 54 | 55 | Returns: 56 | dict mapping from variable values to their updated value 57 | """ 58 | name_to_var = {v.name: v for v in tf.global_variables()} 59 | updates = OrderedDict() 60 | for update in update_ops: 61 | var_name = update.op.inputs[0].name 62 | var = name_to_var[var_name] 63 | value = update.op.inputs[1] 64 | if update.op.type == 'Assign': 65 | updates[var.value()] = value 66 | elif update.op.type == 'AssignAdd': 67 | updates[var.value()] = var + value 68 | else: 69 | raise ValueError("Update op type (%s) must be of type Assign or AssignAdd"%update_op.op.type) 70 | return updates 71 | 72 | 73 | 74 | def generator(z, output_dim=2, n_hidden=128, n_layer=2): 75 | with tf.variable_scope("generator"): 76 | h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu) 77 | x = slim.fully_connected(h, output_dim, activation_fn=None) 78 | return x 79 | 80 | def discriminator(x, n_hidden=128, n_layer=1, reuse=False): 81 | with tf.variable_scope("discriminator", reuse=reuse): 82 | h = slim.stack(tf.divide(x,4.0), slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu) 83 | log_d = slim.fully_connected(h, 1, activation_fn=None) 84 | return log_d 85 | 86 | tf.reset_default_graph() 87 | 88 | data = sample_mog(params['batch_size']) 89 | 90 | noise = ds.Normal(tf.zeros(params['z_dim']), 91 | tf.ones(params['z_dim'])).sample(params['batch_size']) 92 | # Construct generator and discriminator nets 93 | with slim.arg_scope([slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=0.8)): 94 | samples = generator(noise, output_dim=params['x_dim']) 95 | real_score = discriminator(data) 96 | fake_score = discriminator(samples, reuse=True) 97 | 98 | # Saddle objective 99 | loss = tf.reduce_mean( 100 | tf.nn.sigmoid_cross_entropy_with_logits(logits=real_score, labels=tf.ones_like(real_score)) + 101 | tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_score, labels=tf.zeros_like(fake_score))) 102 | 103 | gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generator") 104 | disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator") 105 | 106 | # Vanilla discriminator update 107 | d_opt = Adam(lr=params['disc_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon']) 108 | updates = d_opt.get_updates(disc_vars, [], loss) 109 | d_train_op = tf.group(*updates, name="d_train_op") 110 | 111 | # Unroll optimization of the discrimiantor 112 | if params['unrolling_steps'] > 0: 113 | # Get dictionary mapping from variables to their update value after one optimization step 114 | update_dict = extract_update_dict(updates) 115 | cur_update_dict = update_dict 116 | for i in range(params['unrolling_steps'] - 1): 117 | # Compute variable updates given the previous iteration's updated variable 118 | cur_update_dict = graph_replace(update_dict, cur_update_dict) 119 | # Final unrolled loss uses the parameters at the last time step 120 | unrolled_loss = graph_replace(loss, cur_update_dict) 121 | else: 122 | unrolled_loss = loss 123 | 124 | # Optimize the generator on the unrolled loss 125 | g_train_opt = tf.train.AdamOptimizer(params['gen_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon']) 126 | g_train_op = g_train_opt.minimize(-unrolled_loss, var_list=gen_vars) 127 | 128 | 129 | 130 | 131 | norm_d = tf.global_norm(tf.gradients(loss, disc_vars)) 132 | norm_g = tf.global_norm(tf.gradients(loss, gen_vars)) 133 | 134 | sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) 135 | sess.run(tf.global_variables_initializer()) 136 | 137 | 138 | xmax = 3 139 | fs = [] 140 | frames = [] 141 | np_samples = [] 142 | ds = [] # first gradients 143 | gs = [] 144 | n_batches_viz = 10 145 | viz_every = params['viz_every'] 146 | for i in tqdm(range(params['max_iter'])): 147 | sess.run(d_train_op) 148 | sess.run(g_train_op) 149 | d, g, f, _ = sess.run([norm_d, norm_g, loss, unrolled_loss]) 150 | fs.append(f) 151 | ds.append(d) 152 | gs.append(g) 153 | if i % viz_every == 0: 154 | np_samples.append(np.vstack([sess.run(samples) for _ in range(n_batches_viz)])) 155 | xx, yy = sess.run([samples, data]) 156 | fig = figure(figsize=(5,5)) 157 | scatter(xx[:, 0], xx[:, 1], edgecolor='none') 158 | scatter(yy[:, 0], yy[:, 1], c='g', edgecolor='none') 159 | axis('off') 160 | #if generate_movie: 161 | # frames.append(mplfig_to_npimage(fig)) 162 | #show() 163 | fig.savefig('fig'+str(i)+'.pdf') 164 | close(fig) 165 | 166 | 167 | np.savetxt("d_norm.out",np.array(ds)) 168 | np.savetxt("g_norm.out",np.array(gs)) 169 | fig = figure() 170 | ax = subplot(111) 171 | ax.set_ylabel('Discriminator Gradient L2 Norm') 172 | ax.set_xlabel('Iteration') 173 | plot(range(len(ds)), ds) 174 | fig.savefig('d_norm.pdf') 175 | fig = figure() 176 | ax = subplot(111) 177 | plot(range(len(gs)), gs) 178 | ax.set_ylabel('Generator Gradient L2 Norm') 179 | ax.set_xlabel('Iteration') 180 | fig.savefig('g_norm.pdf') 181 | 182 | 183 | 184 | 185 | np_samples_ = np_samples[::1] 186 | cols = len(np_samples_) 187 | bg_color = sns.color_palette('Greens', n_colors=256)[0] 188 | fig=figure(figsize=(2*cols, 2)) 189 | for i, samps in enumerate(np_samples_): 190 | if i == 0: 191 | ax = subplot(1,cols,1) 192 | else: 193 | subplot(1,cols,i+1, sharex=ax, sharey=ax) 194 | ax2 = sns.kdeplot(samps[:, 0], samps[:, 1], shade=True, cmap='Greens', n_levels=20, clip=[[-xmax,xmax]]*2) 195 | ax2.set_axis_bgcolor(bg_color) 196 | xticks([]); yticks([]) 197 | title('step %d'%(i*viz_every)) 198 | ax.set_ylabel('%d unrolling steps'%params['unrolling_steps']) 199 | gcf().tight_layout() 200 | fig.savefig('series.pdf') 201 | 202 | np.savetxt("loss.out",np.array(fs)) 203 | fig=figure() 204 | fs = np.array(fs) 205 | plot(range(len(fs)),fs) 206 | ax = subplot(111) 207 | ax.set_ylabel('Loss') 208 | ax.set_xlabel('Iteration') 209 | fig.savefig('loss.pdf') 210 | --------------------------------------------------------------------------------