├── README.md
├── gaussian-toy-regularized.py
└── gaussian-toy-unrolled.py
/README.md:
--------------------------------------------------------------------------------
1 | # Gradient Regularized GAN
2 | Code for the regularizer proposed in the NIPS 2017 paper on "Gradient descent GAN optimization is locally stable"
3 | arXiv:1706.04156
4 |
5 | The code for the Gaussian experiments is based on https://github.com/poolio/unrolled_gan but modified to reflect the parameters specified in the Unrolled GANs paper arxiv:1611.02163. The experiments for our paper were performed on Tensorflow 1.1.0 and Keras 1.2.
6 |
7 | Run ```python .py``` where `````` is either ```gaussian-toy-unrolled.py``` or ```gaussian-toy-regularized.py``` to run the unrolled GAN and the gradient-norm-regularized GAN respectively.
8 |
9 |
10 |
--------------------------------------------------------------------------------
/gaussian-toy-regularized.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 | import tensorflow as tf
3 | import numpy as np
4 | import matplotlib as mpl
5 | mpl.use('Agg')
6 | from matplotlib.pyplot import *
7 | import seaborn as sns
8 | from tqdm import tqdm
9 | ds = tf.contrib.distributions
10 | slim = tf.contrib.slim
11 | graph_replace = tf.contrib.graph_editor.graph_replace
12 |
13 | from keras.optimizers import Adam
14 |
15 |
16 |
17 |
18 | def sample_mog(batch_size, n_mixture=8, std=0.02, radius=2.0):
19 | thetas = np.linspace(0, 2 * np.pi * (n_mixture-1)/float(n_mixture), n_mixture)
20 | xs, ys = radius * np.sin(thetas), radius * np.cos(thetas)
21 | cat = ds.Categorical(tf.zeros(n_mixture))
22 | comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())]
23 | data = ds.Mixture(cat, comps)
24 | return data.sample(batch_size)
25 |
26 |
27 |
28 | params = dict(
29 | batch_size=512,
30 | disc_learning_rate=1e-4,
31 | gen_learning_rate=1e-4,
32 | beta1=0.5,
33 | epsilon=1e-8,
34 | max_iter=100001,
35 | viz_every=1000,
36 | z_dim=256,
37 | x_dim=2,
38 | unrolling_steps=0,
39 | regularizer_weight=0.5,
40 | )
41 |
42 |
43 |
44 |
45 | def extract_step_dict(update_ops):
46 | """Extract variables and their new values from Assign and AssignAdd ops.
47 |
48 | Args:
49 | update_ops: list of Assign and AssignAdd ops, typically computed using Keras' opt.get_updates()
50 |
51 | Returns:
52 | dict mapping from variables to their step updates
53 | """
54 | name_to_var = {v.name: v for v in tf.global_variables()}
55 | updates = OrderedDict()
56 | for update in update_ops:
57 | var_name = update.op.inputs[0].name
58 | var = name_to_var[var_name]
59 | value = update.op.inputs[1]
60 | if update.op.type == 'Assign':
61 | updates[var] = value - var
62 | elif update.op.type == 'AssignAdd':
63 | updates[var] = value
64 | else:
65 | raise ValueError("Update op type (%s) must be of type Assign or AssignAdd"%update_op.op.type)
66 | return updates
67 |
68 |
69 |
70 |
71 | def generator(z, output_dim=2, n_hidden=128, n_layer=2):
72 | with tf.variable_scope("generator"):
73 | h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu)
74 | x = slim.fully_connected(h, output_dim, activation_fn=None)
75 | return x
76 |
77 | def discriminator(x, n_hidden=128, n_layer=1, reuse=False):
78 | with tf.variable_scope("discriminator", reuse=reuse):
79 | h = slim.stack(tf.divide(x,4.0), slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu)
80 | log_d = slim.fully_connected(h, 1, activation_fn=None)
81 | return log_d
82 |
83 | tf.reset_default_graph()
84 |
85 | data = sample_mog(params['batch_size'])
86 |
87 | noise = ds.Normal(tf.zeros(params['z_dim']),
88 | tf.ones(params['z_dim'])).sample(params['batch_size'])
89 | # Construct generator and discriminator nets
90 | with slim.arg_scope([slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=0.8)):
91 | samples = generator(noise, output_dim=params['x_dim'])
92 | real_score = discriminator(data)
93 | fake_score = discriminator(samples, reuse=True)
94 |
95 | # D maximizes this, G minimizes this + a regularizer
96 | V = -tf.reduce_mean(
97 | tf.nn.sigmoid_cross_entropy_with_logits(logits=real_score, labels=tf.ones_like(real_score)) +
98 | tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_score, labels=tf.zeros_like(fake_score)))
99 |
100 |
101 |
102 | gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generator")
103 | disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
104 |
105 | # Vanilla discriminator update
106 | d_opt = Adam(lr=params['disc_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon'])
107 | d_updates = d_opt.get_updates(disc_vars, [], -V)
108 | d_train_op = tf.group(*d_updates, name="d_train_op")
109 | step_dict = extract_step_dict(d_updates)
110 |
111 | # Penalty
112 | d_gradient_norm_sq = tf.square(tf.global_norm(tf.gradients(V, disc_vars)))
113 |
114 |
115 | g_opt = Adam(lr=params['gen_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon'])
116 | g_updates = g_opt.get_updates(gen_vars, [], V +params['regularizer_weight']*d_gradient_norm_sq)
117 | g_train_op = tf.group(*g_updates, name="g_train_op")
118 |
119 |
120 | # Or alternatively:
121 | '''
122 | d_opt = tf.train.AdamOptimizer(params['disc_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon'])
123 | g_opt = tf.train.AdamOptimizer(params['gen_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon'])
124 | d_train_op = d_opt.minimize(-V, var_list=disc_vars)
125 | g_train_op = g_opt.minimize(V+params['regularizer_weight']*d_gradient_norm_sq, var_list=gen_vars)
126 | '''
127 |
128 |
129 |
130 | norm_d = tf.global_norm(tf.gradients(V, disc_vars))
131 | norm_g = tf.global_norm(tf.gradients(V, gen_vars))
132 |
133 | sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))
134 | sess.run(tf.global_variables_initializer())
135 |
136 |
137 | xmax = 3
138 | fs = []
139 | frames = []
140 | np_samples = []
141 | ds = [] # first gradients
142 | gs = []
143 | n_batches_viz = 10
144 | viz_every = params['viz_every']
145 | for i in tqdm(range(params['max_iter'])):
146 | sess.run(d_train_op)
147 | sess.run(g_train_op)
148 | d, g, f, = sess.run([norm_d, norm_g, V])
149 | fs.append(f)
150 | ds.append(d)
151 | gs.append(g)
152 | if i % viz_every == 0:
153 | np_samples.append(np.vstack([sess.run(samples) for _ in range(n_batches_viz)]))
154 | xx, yy = sess.run([samples, data])
155 | fig = figure(figsize=(5,5))
156 | scatter(xx[:, 0], xx[:, 1], edgecolor='none')
157 | scatter(yy[:, 0], yy[:, 1], c='g', edgecolor='none')
158 | axis('off')
159 |
160 | fig.savefig('fig'+str(i)+'.pdf')
161 | close(fig)
162 |
163 |
164 | np.savetxt("d_norm.out",np.array(ds))
165 | np.savetxt("g_norm.out",np.array(gs))
166 | fig = figure()
167 | ax = subplot(111)
168 | ax.set_ylabel('Discriminator Gradient L2 Norm')
169 | ax.set_xlabel('Iteration')
170 | plot(range(len(ds)), ds)
171 | fig.savefig('d_norm.pdf')
172 | fig = figure()
173 | ax = subplot(111)
174 | plot(range(len(gs)), gs)
175 | ax.set_ylabel('Generator Gradient L2 Norm')
176 | ax.set_xlabel('Iteration')
177 | fig.savefig('g_norm.pdf')
178 |
179 |
180 |
181 |
182 | np_samples_ = np_samples[::1]
183 | cols = len(np_samples_)
184 | bg_color = sns.color_palette('Greens', n_colors=256)[0]
185 | fig=figure(figsize=(2*cols, 2))
186 | for i, samps in enumerate(np_samples_):
187 | if i == 0:
188 | ax = subplot(1,cols,1)
189 | else:
190 | subplot(1,cols,i+1, sharex=ax, sharey=ax)
191 | ax2 = sns.kdeplot(samps[:, 0], samps[:, 1], shade=True, cmap='Greens', n_levels=20, clip=[[-xmax,xmax]]*2)
192 | ax2.set_axis_bgcolor(bg_color)
193 | xticks([]); yticks([])
194 | title('step %d'%(i*viz_every))
195 | ax.set_ylabel('%d unrolling steps'%params['unrolling_steps'])
196 | gcf().tight_layout()
197 | fig.savefig('series.pdf')
198 |
199 | np.savetxt("loss.out",np.array(fs))
200 | fig=figure()
201 | fs = np.array(fs)
202 | plot(range(len(fs)),fs)
203 | ax = subplot(111)
204 | ax.set_ylabel('Loss')
205 | ax.set_xlabel('Iteration')
206 | fig.savefig('loss.pdf')
207 |
--------------------------------------------------------------------------------
/gaussian-toy-unrolled.py:
--------------------------------------------------------------------------------
1 | import setGPU
2 | from collections import OrderedDict
3 | import tensorflow as tf
4 | import numpy as np
5 | import matplotlib as mpl
6 | mpl.use('Agg')
7 | from matplotlib.pyplot import *
8 | import seaborn as sns
9 | from tqdm import tqdm
10 | ds = tf.contrib.distributions
11 | slim = tf.contrib.slim
12 | graph_replace = tf.contrib.graph_editor.graph_replace
13 | from setproctitle import setproctitle
14 | setproctitle('unrolled-gan')
15 |
16 |
17 | from keras.optimizers import Adam
18 |
19 |
20 |
21 |
22 | def sample_mog(batch_size, n_mixture=8, std=0.02, radius=2.0):
23 | thetas = np.linspace(0, 2 * np.pi * (n_mixture-1)/float(n_mixture), n_mixture)
24 | xs, ys = radius * np.sin(thetas), radius * np.cos(thetas)
25 | cat = ds.Categorical(tf.zeros(n_mixture))
26 | comps = [ds.MultivariateNormalDiag([xi, yi], [std, std]) for xi, yi in zip(xs.ravel(), ys.ravel())]
27 | data = ds.Mixture(cat, comps)
28 | return data.sample(batch_size)
29 |
30 |
31 |
32 | params = dict(
33 | batch_size=512,
34 | disc_learning_rate=1e-4,
35 | gen_learning_rate=1e-4,
36 | beta1=0.5,
37 | epsilon=1e-8,
38 | max_iter=100001,
39 | viz_every=1000,
40 | z_dim=256,
41 | x_dim=2,
42 | unrolling_steps=15,
43 | )
44 |
45 |
46 |
47 |
48 |
49 | def extract_update_dict(update_ops):
50 | """Extract variables and their new values from Assign and AssignAdd ops.
51 |
52 | Args:
53 | update_ops: list of Assign and AssignAdd ops, typically computed using Keras' opt.get_updates()
54 |
55 | Returns:
56 | dict mapping from variable values to their updated value
57 | """
58 | name_to_var = {v.name: v for v in tf.global_variables()}
59 | updates = OrderedDict()
60 | for update in update_ops:
61 | var_name = update.op.inputs[0].name
62 | var = name_to_var[var_name]
63 | value = update.op.inputs[1]
64 | if update.op.type == 'Assign':
65 | updates[var.value()] = value
66 | elif update.op.type == 'AssignAdd':
67 | updates[var.value()] = var + value
68 | else:
69 | raise ValueError("Update op type (%s) must be of type Assign or AssignAdd"%update_op.op.type)
70 | return updates
71 |
72 |
73 |
74 | def generator(z, output_dim=2, n_hidden=128, n_layer=2):
75 | with tf.variable_scope("generator"):
76 | h = slim.stack(z, slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu)
77 | x = slim.fully_connected(h, output_dim, activation_fn=None)
78 | return x
79 |
80 | def discriminator(x, n_hidden=128, n_layer=1, reuse=False):
81 | with tf.variable_scope("discriminator", reuse=reuse):
82 | h = slim.stack(tf.divide(x,4.0), slim.fully_connected, [n_hidden] * n_layer, activation_fn=tf.nn.relu)
83 | log_d = slim.fully_connected(h, 1, activation_fn=None)
84 | return log_d
85 |
86 | tf.reset_default_graph()
87 |
88 | data = sample_mog(params['batch_size'])
89 |
90 | noise = ds.Normal(tf.zeros(params['z_dim']),
91 | tf.ones(params['z_dim'])).sample(params['batch_size'])
92 | # Construct generator and discriminator nets
93 | with slim.arg_scope([slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=0.8)):
94 | samples = generator(noise, output_dim=params['x_dim'])
95 | real_score = discriminator(data)
96 | fake_score = discriminator(samples, reuse=True)
97 |
98 | # Saddle objective
99 | loss = tf.reduce_mean(
100 | tf.nn.sigmoid_cross_entropy_with_logits(logits=real_score, labels=tf.ones_like(real_score)) +
101 | tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_score, labels=tf.zeros_like(fake_score)))
102 |
103 | gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generator")
104 | disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
105 |
106 | # Vanilla discriminator update
107 | d_opt = Adam(lr=params['disc_learning_rate'], beta_1=params['beta1'], epsilon=params['epsilon'])
108 | updates = d_opt.get_updates(disc_vars, [], loss)
109 | d_train_op = tf.group(*updates, name="d_train_op")
110 |
111 | # Unroll optimization of the discrimiantor
112 | if params['unrolling_steps'] > 0:
113 | # Get dictionary mapping from variables to their update value after one optimization step
114 | update_dict = extract_update_dict(updates)
115 | cur_update_dict = update_dict
116 | for i in range(params['unrolling_steps'] - 1):
117 | # Compute variable updates given the previous iteration's updated variable
118 | cur_update_dict = graph_replace(update_dict, cur_update_dict)
119 | # Final unrolled loss uses the parameters at the last time step
120 | unrolled_loss = graph_replace(loss, cur_update_dict)
121 | else:
122 | unrolled_loss = loss
123 |
124 | # Optimize the generator on the unrolled loss
125 | g_train_opt = tf.train.AdamOptimizer(params['gen_learning_rate'], beta1=params['beta1'], epsilon=params['epsilon'])
126 | g_train_op = g_train_opt.minimize(-unrolled_loss, var_list=gen_vars)
127 |
128 |
129 |
130 |
131 | norm_d = tf.global_norm(tf.gradients(loss, disc_vars))
132 | norm_g = tf.global_norm(tf.gradients(loss, gen_vars))
133 |
134 | sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))
135 | sess.run(tf.global_variables_initializer())
136 |
137 |
138 | xmax = 3
139 | fs = []
140 | frames = []
141 | np_samples = []
142 | ds = [] # first gradients
143 | gs = []
144 | n_batches_viz = 10
145 | viz_every = params['viz_every']
146 | for i in tqdm(range(params['max_iter'])):
147 | sess.run(d_train_op)
148 | sess.run(g_train_op)
149 | d, g, f, _ = sess.run([norm_d, norm_g, loss, unrolled_loss])
150 | fs.append(f)
151 | ds.append(d)
152 | gs.append(g)
153 | if i % viz_every == 0:
154 | np_samples.append(np.vstack([sess.run(samples) for _ in range(n_batches_viz)]))
155 | xx, yy = sess.run([samples, data])
156 | fig = figure(figsize=(5,5))
157 | scatter(xx[:, 0], xx[:, 1], edgecolor='none')
158 | scatter(yy[:, 0], yy[:, 1], c='g', edgecolor='none')
159 | axis('off')
160 | #if generate_movie:
161 | # frames.append(mplfig_to_npimage(fig))
162 | #show()
163 | fig.savefig('fig'+str(i)+'.pdf')
164 | close(fig)
165 |
166 |
167 | np.savetxt("d_norm.out",np.array(ds))
168 | np.savetxt("g_norm.out",np.array(gs))
169 | fig = figure()
170 | ax = subplot(111)
171 | ax.set_ylabel('Discriminator Gradient L2 Norm')
172 | ax.set_xlabel('Iteration')
173 | plot(range(len(ds)), ds)
174 | fig.savefig('d_norm.pdf')
175 | fig = figure()
176 | ax = subplot(111)
177 | plot(range(len(gs)), gs)
178 | ax.set_ylabel('Generator Gradient L2 Norm')
179 | ax.set_xlabel('Iteration')
180 | fig.savefig('g_norm.pdf')
181 |
182 |
183 |
184 |
185 | np_samples_ = np_samples[::1]
186 | cols = len(np_samples_)
187 | bg_color = sns.color_palette('Greens', n_colors=256)[0]
188 | fig=figure(figsize=(2*cols, 2))
189 | for i, samps in enumerate(np_samples_):
190 | if i == 0:
191 | ax = subplot(1,cols,1)
192 | else:
193 | subplot(1,cols,i+1, sharex=ax, sharey=ax)
194 | ax2 = sns.kdeplot(samps[:, 0], samps[:, 1], shade=True, cmap='Greens', n_levels=20, clip=[[-xmax,xmax]]*2)
195 | ax2.set_axis_bgcolor(bg_color)
196 | xticks([]); yticks([])
197 | title('step %d'%(i*viz_every))
198 | ax.set_ylabel('%d unrolling steps'%params['unrolling_steps'])
199 | gcf().tight_layout()
200 | fig.savefig('series.pdf')
201 |
202 | np.savetxt("loss.out",np.array(fs))
203 | fig=figure()
204 | fs = np.array(fs)
205 | plot(range(len(fs)),fs)
206 | ax = subplot(111)
207 | ax.set_ylabel('Loss')
208 | ax.set_xlabel('Iteration')
209 | fig.savefig('loss.pdf')
210 |
--------------------------------------------------------------------------------