├── README.md
├── gaussian-toy-regularized.py
└── gaussian-toy-unrolled.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Gradient Regularized GAN
 2 | Code for the regularizer proposed in the NIPS 2017 paper on "Gradient descent GAN optimization is locally stable"
 3 | <a href="https://arxiv.org/abs/1706.04156">arXiv:1706.04156</a>
 4 | 
 5 | The code for the Gaussian experiments is based on https://github.com/poolio/unrolled_gan but modified to reflect the parameters specified in the Unrolled GANs paper <a href="https://arxiv.org/abs/1611.02163">arxiv:1611.02163</a>. The experiments for our paper were performed on Tensorflow 1.1.0 and Keras 1.2. 
 6 | 
 7 | Run ```python <filename>.py``` where ```<filename>``` is either ```gaussian-toy-unrolled.py``` or ```gaussian-toy-regularized.py``` to run the unrolled GAN and the gradient-norm-regularized GAN respectively.
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/gaussian-toy-regularized.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import matplotlib as mpl
  5 | mpl.use('Agg')
  6 | from matplotlib.pyplot import *
  7 | import seaborn as sns
  8 | from tqdm import tqdm
  9 | ds = tf.contrib.distributions
 10 | slim = tf.contrib.slim
 11 | graph_replace = tf.contrib.graph_editor.graph_replace
 12 |  
 13 | from keras.optimizers import Adam
 14 |  
 15 |  
 16 |  
 17 |  
 18 | def sample_mog(batch_size, n_mixture=8, std=0.02, radius=2.0):
 19 |     thetas = np.linspace(0, 2 * np.pi * (n_mixture-1)/float(n_mixture), n_mixture)
 20 |     xs, ys = radius * np.sin(thetas), radius * np.cos(thetas)
 21 |     cat = ds.Categorical(tf.zeros(n_mixture))
 22 |     comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())]
 23 |     data = ds.Mixture(cat, comps)
 24 |     return data.sample(batch_size)
 25 |  
 26 |  
 27 |  
 28 | params = dict(
 29 |     batch_size=512,
 30 |     disc_learning_rate=1e-4,
 31 |     gen_learning_rate=1e-4,
 32 |     beta1=0.5,
 33 |     epsilon=1e-8,
 34 |     max_iter=100001,
 35 |     viz_every=1000,
 36 |     z_dim=256,
 37 |     x_dim=2,
 38 |     unrolling_steps=0,
 39 |     regularizer_weight=0.5,
 40 | )
 41 | 
 42 | 
 43 |  
 44 | 
 45 | def extract_step_dict(update_ops):
 46 |     """Extract variables and their new values from Assign and AssignAdd ops.
 47 |     
 48 |     Args:
 49 |         update_ops: list of Assign and AssignAdd ops, typically computed using Keras' opt.get_updates()
 50 | 
 51 |     Returns:
 52 |         dict mapping from variables to their step updates
 53 |     """
 54 |     name_to_var = {v.name: v for v in tf.global_variables()}
 55 |     updates = OrderedDict()
 56 |     for update in update_ops:
 57 |         var_name = update.op.inputs[0].name
 58 |         var = name_to_var[var_name]
 59 |         value = update.op.inputs[1]
 60 |         if update.op.type == 'Assign':
 61 |             updates[var] = value - var
 62 |         elif update.op.type == 'AssignAdd':
 63 |             updates[var] =  value
 64 |         else:
 65 |             raise ValueError("Update op type (%s) must be of type Assign or AssignAdd"%update_op.op.type)
 66 |     return updates
 67 | 
 68 | 
 69 |  
 70 |  
 71 | def generator(z, output_dim=2, n_hidden=128, n_layer=2):
 72 |     with tf.variable_scope("generator"):
 73 |         h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu)
 74 |         x = slim.fully_connected(h, output_dim, activation_fn=None)
 75 |     return x
 76 |  
 77 | def discriminator(x, n_hidden=128, n_layer=1, reuse=False):
 78 |     with tf.variable_scope("discriminator", reuse=reuse):
 79 |         h = slim.stack(tf.divide(x,4.0), slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu)
 80 |         log_d = slim.fully_connected(h, 1, activation_fn=None)
 81 |     return log_d
 82 |  
 83 | tf.reset_default_graph()
 84 |  
 85 | data = sample_mog(params['batch_size'])
 86 |  
 87 | noise = ds.Normal(tf.zeros(params['z_dim']), 
 88 |                   tf.ones(params['z_dim'])).sample(params['batch_size'])
 89 | # Construct generator and discriminator nets
 90 | with slim.arg_scope([slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=0.8)):
 91 |     samples = generator(noise, output_dim=params['x_dim'])
 92 |     real_score = discriminator(data)
 93 |     fake_score = discriminator(samples, reuse=True)
 94 |      
 95 | # D maximizes this, G minimizes this + a regularizer
 96 | V = -tf.reduce_mean(
 97 |     tf.nn.sigmoid_cross_entropy_with_logits(logits=real_score, labels=tf.ones_like(real_score)) +
 98 |     tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_score, labels=tf.zeros_like(fake_score)))
 99 |  
100 | 
101 | 
102 | gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generator")
103 | disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
104 |  
105 | # Vanilla discriminator update
106 | d_opt = Adam(lr=params['disc_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon'])
107 | d_updates = d_opt.get_updates(disc_vars, [], -V)
108 | d_train_op = tf.group(*d_updates, name="d_train_op")
109 | step_dict = extract_step_dict(d_updates)
110 | 
111 | # Penalty
112 | d_gradient_norm_sq = tf.square(tf.global_norm(tf.gradients(V, disc_vars)))
113 | 
114 | 
115 | g_opt = Adam(lr=params['gen_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon'])
116 | g_updates = g_opt.get_updates(gen_vars, [], V +params['regularizer_weight']*d_gradient_norm_sq)
117 | g_train_op = tf.group(*g_updates, name="g_train_op")
118 | 
119 | 
120 | # Or alternatively:
121 | '''
122 | d_opt = tf.train.AdamOptimizer(params['disc_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon'])
123 | g_opt = tf.train.AdamOptimizer(params['gen_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon'])
124 | d_train_op = d_opt.minimize(-V, var_list=disc_vars)
125 | g_train_op = g_opt.minimize(V+params['regularizer_weight']*d_gradient_norm_sq, var_list=gen_vars)
126 | '''
127 | 
128 | 
129 | 
130 | norm_d = tf.global_norm(tf.gradients(V, disc_vars))
131 | norm_g = tf.global_norm(tf.gradients(V, gen_vars))
132 |  
133 | sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))
134 | sess.run(tf.global_variables_initializer())
135 |  
136 | 
137 | xmax = 3
138 | fs = []
139 | frames = []
140 | np_samples = []
141 | ds = [] # first gradients 
142 | gs = []
143 | n_batches_viz = 10
144 | viz_every = params['viz_every']
145 | for i in tqdm(range(params['max_iter'])):
146 |     sess.run(d_train_op)
147 |     sess.run(g_train_op)
148 |     d, g, f, = sess.run([norm_d, norm_g, V])
149 |     fs.append(f)    
150 |     ds.append(d)
151 |     gs.append(g)
152 |     if i % viz_every == 0:
153 |         np_samples.append(np.vstack([sess.run(samples) for _ in range(n_batches_viz)]))
154 |         xx, yy = sess.run([samples, data])
155 |         fig = figure(figsize=(5,5))
156 |         scatter(xx[:, 0], xx[:, 1], edgecolor='none')
157 |         scatter(yy[:, 0], yy[:, 1], c='g', edgecolor='none')
158 |         axis('off')
159 | 
160 |         fig.savefig('fig'+str(i)+'.pdf') 
161 |         close(fig)
162 |  
163 | 
164 | np.savetxt("d_norm.out",np.array(ds))
165 | np.savetxt("g_norm.out",np.array(gs))
166 | fig = figure()        
167 | ax = subplot(111)
168 | ax.set_ylabel('Discriminator Gradient L2 Norm')
169 | ax.set_xlabel('Iteration')
170 | plot(range(len(ds)), ds)
171 | fig.savefig('d_norm.pdf')
172 | fig = figure()
173 | ax = subplot(111)
174 | plot(range(len(gs)), gs)
175 | ax.set_ylabel('Generator Gradient L2 Norm')
176 | ax.set_xlabel('Iteration')
177 | fig.savefig('g_norm.pdf')
178 | 
179 | 
180 | 
181 | 
182 | np_samples_ = np_samples[::1]
183 | cols = len(np_samples_)
184 | bg_color  = sns.color_palette('Greens', n_colors=256)[0]
185 | fig=figure(figsize=(2*cols, 2))
186 | for i, samps in enumerate(np_samples_):
187 |     if i == 0:
188 |         ax = subplot(1,cols,1)
189 |     else:
190 |         subplot(1,cols,i+1, sharex=ax, sharey=ax)
191 |     ax2 = sns.kdeplot(samps[:, 0], samps[:, 1], shade=True, cmap='Greens', n_levels=20, clip=[[-xmax,xmax]]*2)
192 |     ax2.set_axis_bgcolor(bg_color)
193 |     xticks([]); yticks([])
194 |     title('step %d'%(i*viz_every))
195 | ax.set_ylabel('%d unrolling steps'%params['unrolling_steps'])
196 | gcf().tight_layout()
197 | fig.savefig('series.pdf')
198 | 
199 | np.savetxt("loss.out",np.array(fs))
200 | fig=figure()
201 | fs = np.array(fs)
202 | plot(range(len(fs)),fs)
203 | ax = subplot(111)
204 | ax.set_ylabel('Loss')
205 | ax.set_xlabel('Iteration')
206 | fig.savefig('loss.pdf')
207 | 


--------------------------------------------------------------------------------
/gaussian-toy-unrolled.py:
--------------------------------------------------------------------------------
  1 | import setGPU
  2 | from collections import OrderedDict
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import matplotlib as mpl
  6 | mpl.use('Agg')
  7 | from matplotlib.pyplot import *
  8 | import seaborn as sns
  9 | from tqdm import tqdm
 10 | ds = tf.contrib.distributions
 11 | slim = tf.contrib.slim
 12 | graph_replace = tf.contrib.graph_editor.graph_replace
 13 | from setproctitle import setproctitle 
 14 | setproctitle('unrolled-gan')
 15 | 
 16 | 
 17 | from keras.optimizers import Adam
 18 |  
 19 |  
 20 |  
 21 |  
 22 | def sample_mog(batch_size, n_mixture=8, std=0.02, radius=2.0):
 23 |     thetas = np.linspace(0, 2 * np.pi * (n_mixture-1)/float(n_mixture), n_mixture)
 24 |     xs, ys = radius * np.sin(thetas), radius * np.cos(thetas)
 25 |     cat = ds.Categorical(tf.zeros(n_mixture))
 26 |     comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())]
 27 |     data = ds.Mixture(cat, comps)
 28 |     return data.sample(batch_size)
 29 |  
 30 |  
 31 |  
 32 | params = dict(
 33 |     batch_size=512,
 34 |     disc_learning_rate=1e-4,
 35 |     gen_learning_rate=1e-4,
 36 |     beta1=0.5,
 37 |     epsilon=1e-8,
 38 |     max_iter=100001,
 39 |     viz_every=1000,
 40 |     z_dim=256,
 41 |     x_dim=2,
 42 |     unrolling_steps=15,
 43 | )
 44 |  
 45 |  
 46 |  
 47 |  
 48 |  
 49 | def extract_update_dict(update_ops):
 50 |     """Extract variables and their new values from Assign and AssignAdd ops.
 51 |      
 52 |     Args:
 53 |         update_ops: list of Assign and AssignAdd ops, typically computed using Keras' opt.get_updates()
 54 |  
 55 |     Returns:
 56 |         dict mapping from variable values to their updated value
 57 |     """
 58 |     name_to_var = {v.name: v for v in tf.global_variables()}
 59 |     updates = OrderedDict()
 60 |     for update in update_ops:
 61 |         var_name = update.op.inputs[0].name
 62 |         var = name_to_var[var_name]
 63 |         value = update.op.inputs[1]
 64 |         if update.op.type == 'Assign':
 65 |             updates[var.value()] = value
 66 |         elif update.op.type == 'AssignAdd':
 67 |             updates[var.value()] = var + value
 68 |         else:
 69 |             raise ValueError("Update op type (%s) must be of type Assign or AssignAdd"%update_op.op.type)
 70 |     return updates
 71 |  
 72 |  
 73 |  
 74 | def generator(z, output_dim=2, n_hidden=128, n_layer=2):
 75 |     with tf.variable_scope("generator"):
 76 |         h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu)
 77 |         x = slim.fully_connected(h, output_dim, activation_fn=None)
 78 |     return x
 79 |  
 80 | def discriminator(x, n_hidden=128, n_layer=1, reuse=False):
 81 |     with tf.variable_scope("discriminator", reuse=reuse):
 82 |         h = slim.stack(tf.divide(x,4.0), slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu)
 83 |         log_d = slim.fully_connected(h, 1, activation_fn=None)
 84 |     return log_d
 85 |  
 86 | tf.reset_default_graph()
 87 |  
 88 | data = sample_mog(params['batch_size'])
 89 |  
 90 | noise = ds.Normal(tf.zeros(params['z_dim']), 
 91 |                   tf.ones(params['z_dim'])).sample(params['batch_size'])
 92 | # Construct generator and discriminator nets
 93 | with slim.arg_scope([slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=0.8)):
 94 |     samples = generator(noise, output_dim=params['x_dim'])
 95 |     real_score = discriminator(data)
 96 |     fake_score = discriminator(samples, reuse=True)
 97 |      
 98 | # Saddle objective    
 99 | loss = tf.reduce_mean(
100 |     tf.nn.sigmoid_cross_entropy_with_logits(logits=real_score, labels=tf.ones_like(real_score)) +
101 |     tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_score, labels=tf.zeros_like(fake_score)))
102 |  
103 | gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generator")
104 | disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
105 |  
106 | # Vanilla discriminator update
107 | d_opt = Adam(lr=params['disc_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon'])
108 | updates = d_opt.get_updates(disc_vars, [], loss)
109 | d_train_op = tf.group(*updates, name="d_train_op")
110 |  
111 | # Unroll optimization of the discrimiantor
112 | if params['unrolling_steps'] > 0:
113 |     # Get dictionary mapping from variables to their update value after one optimization step
114 |     update_dict = extract_update_dict(updates)
115 |     cur_update_dict = update_dict
116 |     for i in range(params['unrolling_steps'] - 1):
117 |         # Compute variable updates given the previous iteration's updated variable
118 |         cur_update_dict = graph_replace(update_dict, cur_update_dict)
119 |     # Final unrolled loss uses the parameters at the last time step
120 |     unrolled_loss = graph_replace(loss, cur_update_dict)
121 | else:
122 |     unrolled_loss = loss
123 |  
124 | # Optimize the generator on the unrolled loss
125 | g_train_opt = tf.train.AdamOptimizer(params['gen_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon'])
126 | g_train_op = g_train_opt.minimize(-unrolled_loss, var_list=gen_vars)
127 |  
128 | 
129 |  
130 |  
131 | norm_d = tf.global_norm(tf.gradients(loss, disc_vars))
132 | norm_g = tf.global_norm(tf.gradients(loss, gen_vars))
133 |  
134 | sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))
135 | sess.run(tf.global_variables_initializer())
136 |  
137 | 
138 | xmax = 3
139 | fs = []
140 | frames = []
141 | np_samples = []
142 | ds = [] # first gradients 
143 | gs = []
144 | n_batches_viz = 10
145 | viz_every = params['viz_every']
146 | for i in tqdm(range(params['max_iter'])):
147 |     sess.run(d_train_op)
148 |     sess.run(g_train_op)
149 |     d, g, f, _ = sess.run([norm_d, norm_g, loss, unrolled_loss])
150 |     fs.append(f)    
151 |     ds.append(d)
152 |     gs.append(g)
153 |     if i % viz_every == 0:
154 |         np_samples.append(np.vstack([sess.run(samples) for _ in range(n_batches_viz)]))
155 |         xx, yy = sess.run([samples, data])
156 |         fig = figure(figsize=(5,5))
157 |         scatter(xx[:, 0], xx[:, 1], edgecolor='none')
158 |         scatter(yy[:, 0], yy[:, 1], c='g', edgecolor='none')
159 |         axis('off')
160 |         #if generate_movie:
161 |         #    frames.append(mplfig_to_npimage(fig))
162 |         #show()  
163 |         fig.savefig('fig'+str(i)+'.pdf') 
164 |         close(fig)
165 |  
166 | 
167 | np.savetxt("d_norm.out",np.array(ds))
168 | np.savetxt("g_norm.out",np.array(gs))
169 | fig = figure()        
170 | ax = subplot(111)
171 | ax.set_ylabel('Discriminator Gradient L2 Norm')
172 | ax.set_xlabel('Iteration')
173 | plot(range(len(ds)), ds)
174 | fig.savefig('d_norm.pdf')
175 | fig = figure()
176 | ax = subplot(111)
177 | plot(range(len(gs)), gs)
178 | ax.set_ylabel('Generator Gradient L2 Norm')
179 | ax.set_xlabel('Iteration')
180 | fig.savefig('g_norm.pdf')
181 | 
182 | 
183 | 
184 | 
185 | np_samples_ = np_samples[::1]
186 | cols = len(np_samples_)
187 | bg_color  = sns.color_palette('Greens', n_colors=256)[0]
188 | fig=figure(figsize=(2*cols, 2))
189 | for i, samps in enumerate(np_samples_):
190 |     if i == 0:
191 |         ax = subplot(1,cols,1)
192 |     else:
193 |         subplot(1,cols,i+1, sharex=ax, sharey=ax)
194 |     ax2 = sns.kdeplot(samps[:, 0], samps[:, 1], shade=True, cmap='Greens', n_levels=20, clip=[[-xmax,xmax]]*2)
195 |     ax2.set_axis_bgcolor(bg_color)
196 |     xticks([]); yticks([])
197 |     title('step %d'%(i*viz_every))
198 | ax.set_ylabel('%d unrolling steps'%params['unrolling_steps'])
199 | gcf().tight_layout()
200 | fig.savefig('series.pdf')
201 | 
202 | np.savetxt("loss.out",np.array(fs))
203 | fig=figure()
204 | fs = np.array(fs)
205 | plot(range(len(fs)),fs)
206 | ax = subplot(111)
207 | ax.set_ylabel('Loss')
208 | ax.set_xlabel('Iteration')
209 | fig.savefig('loss.pdf')
210 | 


--------------------------------------------------------------------------------