├── README.md
├── __init__.py
├── adv_auto_mnist.py
├── dataset
└── download_mnist.sh
├── figure
├── x_gaussian_8.png
└── z_uniform_2.png
├── load_data.py
├── models
├── __init__.py
└── adv_autoencoder.py
├── source
├── __init__.py
├── costs
│ ├── __init__.py
│ ├── adversarial_autoenc_loss.py
│ ├── binary_cross_entropy_loss.py
│ ├── cross_entropy_loss.py
│ ├── quadratic_loss.py
│ └── weight_decay.py
├── layers
│ ├── __init__.py
│ ├── batch_normalization.py
│ ├── layer.py
│ ├── linear.py
│ ├── lrelu.py
│ ├── relu.py
│ ├── sigmoid.py
│ ├── softmax.py
│ └── softplus.py
└── optimizers
│ ├── __init__.py
│ ├── adagrad.py
│ ├── adam.py
│ ├── momentum_sgd.py
│ ├── optimizer.py
│ └── sgd.py
├── train.py
└── train_adv_autoenc.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | #Implementation of Adversarial Autoenocder (arXiv : http://arxiv.org/abs/1511.05644)
2 |
3 | Python code for training Adversarial Autoenocder with Theano.
4 |
5 | You can train example models of Adversarial Autoencoder on MNIST on the ipython notebook.
6 |
7 | See https://github.com/takerum/adversarial_autoencoder/blob/master/train_adv_autoenc.ipynb .
8 |
9 | - The hidden code z of the trained adversarial autoencoder with 2-D uniform prior distribution.
10 |
11 |
12 |
13 | - The input x sampled from the trained adversarial autoencoder with 8-D gaussian prior distribution.
14 |
15 |
16 |
17 | ##Required libraries:
18 | python 2.7, theano 0.7.0
19 |
20 | ##References:
21 | [1] Alireza Makhzani, Jonathon Shlens, Navdeep Jaitly and Ian Goodfellow. Adversarial Autoenocders. arXiv preprint (http://arxiv.org/abs/1511.05644).
22 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | __author__ = 'TakeruMiyato'
3 |
4 | import source
5 | import models
--------------------------------------------------------------------------------
/adv_auto_mnist.py:
--------------------------------------------------------------------------------
1 | from models.adv_autoencoder import AdversarialAutoencoder
2 | import source.layers as L
3 | from theano.tensor.shared_randomstreams import RandomStreams
4 | import numpy,theano
5 | import theano.tensor as T
6 |
7 | def get_normalized_vector(v):
8 | v = v / (1e-20 + T.max(T.abs_(v), axis=1, keepdims=True))
9 | v_2 = T.sum(v**2,axis=1,keepdims=True)
10 | return v / T.sqrt(1e-6+v_2)
11 |
12 |
13 | class AdversarialAutoencoderMNIST(AdversarialAutoencoder):
14 |
15 | def __init__(self,latent_dim=2,z_prior='gaussian'):
16 |
17 | self.z_prior = z_prior
18 |
19 | self.enc_l1 = L.Linear((784,1000))
20 | self.enc_b1 = L.BatchNormalization(1000)
21 | self.enc_l2 = L.Linear((1000,1000))
22 | self.enc_b2 = L.BatchNormalization(1000)
23 | self.enc_l3 = L.Linear((1000,latent_dim))
24 | self.enc_b3 = L.BatchNormalization(latent_dim)
25 |
26 | self.dec_l1 = L.Linear((latent_dim,1000))
27 | self.dec_b1 = L.BatchNormalization(1000)
28 | self.dec_l2 = L.Linear((1000,1000))
29 | self.dec_b2 = L.BatchNormalization(1000)
30 | self.dec_l3 = L.Linear((1000,784))
31 |
32 | self.D_l1 = L.Linear((latent_dim,500))
33 | self.D_b1 = L.BatchNormalization(500)
34 | self.D_l2 = L.Linear((500,500))
35 | self.D_b2 = L.BatchNormalization(500)
36 | self.D_l3 = L.Linear((500,1))
37 |
38 | self.model_params = self.enc_l1.params + self.enc_l2.params + self.enc_l3.params \
39 | + self.dec_l1.params + self.dec_l2.params + self.dec_l3.params \
40 | + self.enc_b1.params + self.enc_b2.params + self.enc_b3.params \
41 | + self.dec_b1.params + self.dec_b2.params
42 | self.D_params = self.D_l1.params + self.D_l2.params + self.D_l3.params
43 | self.rng = RandomStreams(seed=numpy.random.randint(1234))
44 |
45 |
46 | def encode(self,input,train=True):
47 | h = input
48 | h = self.enc_l1(h)
49 | h = self.enc_b1(h,train=train)
50 | h = L.relu(h)
51 | h = self.enc_l2(h)
52 | h = self.enc_b2(h,train=train)
53 | h = L.relu(h)
54 | h = self.enc_l3(h)
55 | h = self.enc_b3(h,train=train)
56 | return h
57 |
58 | def decode(self,input,train=True):
59 | h = input
60 | h = self.dec_l1(h)
61 | h = self.dec_b1(h,train=train)
62 | h = L.relu(h)
63 | h = self.dec_l2(h)
64 | h = self.dec_b2(h,train=train)
65 | h = L.relu(h)
66 | h = self.dec_l3(h)
67 | h = L.sigmoid(h)
68 | return h
69 |
70 | def D(self,input,train=True):
71 | h = input
72 | h = self.D_l1(h)
73 | h = L.relu(h)
74 | h = self.D_l2(h)
75 | h = L.relu(h)
76 | h = self.D_l3(h)
77 | h = L.sigmoid(h)
78 | return h
79 |
80 | def sample_from_prior(self,z):
81 |
82 | ###### gausssian #######
83 | if(self.z_prior is 'gaussian'):
84 | return 1.0*self.rng.normal(size=z.shape,dtype=theano.config.floatX)
85 |
86 | ###### uniform ########
87 | elif(self.z_prior is 'uniform'):
88 | v = get_normalized_vector(self.rng.normal(size=z.shape,dtype=theano.config.floatX))
89 | r = T.power(self.rng.uniform(size=z.sum(axis=1,keepdims=True).shape,low=0,high=1.0,dtype=theano.config.floatX),1./z.shape[1])
90 | r = T.patternbroadcast(r,[False,True])
91 | return 2.0*r*v
92 |
93 | else:
94 | raise NotImplementedError()
95 |
96 |
--------------------------------------------------------------------------------
/dataset/download_mnist.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | wget -c http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
3 | gzip -d mnist.pkl.gz
--------------------------------------------------------------------------------
/figure/x_gaussian_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/takerum/adversarial_autoencoder/0af027e8cfa1ec90011c58edec44bca4a2b95117/figure/x_gaussian_8.png
--------------------------------------------------------------------------------
/figure/z_uniform_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/takerum/adversarial_autoencoder/0af027e8cfa1ec90011c58edec44bca4a2b95117/figure/z_uniform_2.png
--------------------------------------------------------------------------------
/load_data.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import cPickle
3 | import os
4 | import theano
5 |
6 | def load_mnist_dataset():
7 | dataset = cPickle.load(open('dataset/mnist.pkl','rb'))
8 | train_set_x = numpy.concatenate((dataset[0][0],dataset[1][0]),axis=0)
9 | train_set_y = numpy.concatenate((dataset[0][1],dataset[1][1]),axis=0)
10 | return ((train_set_x,train_set_y),(dataset[2][0],dataset[2][1]))
11 |
12 | def _shared_dataset(data_xy):
13 | data_x, data_y = data_xy
14 | shared_x = theano.shared(numpy.asarray(data_x,
15 | dtype=theano.config.floatX), borrow=True)
16 | shared_y = theano.shared(numpy.asarray(data_y,
17 | dtype='int32'), borrow=True)
18 | return shared_x, shared_y
19 |
20 | def load_mnist_full():
21 | dataset = load_mnist_dataset()
22 |
23 | train_set_x, train_set_y = dataset[0]
24 | test_set_x, test_set_y = dataset[1]
25 |
26 | train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
27 | test_set_x, test_set_y = _shared_dataset((test_set_x, test_set_y))
28 |
29 | return [(train_set_x, train_set_y), (test_set_x, test_set_y)]
30 |
31 | def load_mnist_for_validation(n_v = 10000):
32 | dataset = load_mnist_dataset()
33 |
34 | train_set_x, train_set_y = dataset[0]
35 |
36 | randix = numpy.random.permutation(train_set_x.shape[0])
37 |
38 | valid_set_x = train_set_x[randix[:n_v]]
39 | valid_set_y = train_set_y[randix[:n_v]]
40 | train_set_x = train_set_x[randix[n_v:]]
41 | train_set_y = train_set_y[randix[n_v:]]
42 |
43 | train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
44 | valid_set_x, valid_set_y = _shared_dataset((valid_set_x, valid_set_y))
45 |
46 | return [(train_set_x, train_set_y), (valid_set_x, valid_set_y)]
47 |
48 | def load_mnist_for_semi_sup(n_l=1000, n_v=1000):
49 | dataset = load_mnist_dataset()
50 |
51 | _train_set_x, _train_set_y = dataset[0]
52 |
53 | rand_ind = numpy.random.permutation(_train_set_x.shape[0])
54 | _train_set_x = _train_set_x[rand_ind]
55 | _train_set_y = _train_set_y[rand_ind]
56 |
57 | s_c = n_l / 10.0
58 | train_set_x = numpy.zeros((n_l, 28 ** 2))
59 | train_set_y = numpy.zeros(n_l)
60 | for i in xrange(10):
61 | ind = numpy.where(_train_set_y == i)[0]
62 | train_set_x[i * s_c:(i + 1) * s_c, :] = _train_set_x[ind[0:s_c], :]
63 | train_set_y[i * s_c:(i + 1) * s_c] = _train_set_y[ind[0:s_c]]
64 | _train_set_x = numpy.delete(_train_set_x, ind[0:s_c], 0)
65 | _train_set_y = numpy.delete(_train_set_y, ind[0:s_c])
66 |
67 | print rand_ind
68 | rand_ind = numpy.random.permutation(train_set_x.shape[0])
69 | train_set_x = train_set_x[rand_ind]
70 | train_set_y = train_set_y[rand_ind]
71 | valid_set_x = _train_set_x[:n_v]
72 | valid_set_y = _train_set_y[:n_v]
73 | # ul_train_set_x = _train_set_x[n_v:]
74 | train_set_ul_x = numpy.concatenate((train_set_x, _train_set_x[n_v:]), axis=0)
75 | train_set_ul_x = train_set_ul_x[numpy.random.permutation(train_set_ul_x.shape[0])]
76 | ul_train_set_y = _train_set_y[n_v:] # dummy
77 |
78 | train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
79 | train_set_ul_x, ul_train_set_y = _shared_dataset((train_set_ul_x, ul_train_set_y))
80 | valid_set_x, valid_set_y = _shared_dataset((valid_set_x, valid_set_y))
81 |
82 | return [(train_set_x, train_set_y, train_set_ul_x), (valid_set_x, valid_set_y)]
83 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from adversarial_autoencoder.source.models import adv_autoencoder
2 |
3 | AdversarialAutoenocder = adv_autoencoder.AdversarialAutoencoder
--------------------------------------------------------------------------------
/models/adv_autoencoder.py:
--------------------------------------------------------------------------------
1 |
2 | class AdversarialAutoencoder(object):
3 |
4 | def __init__(self):
5 | self.model_params=None
6 | self.D_params=None
7 | raise NotImplementedError()
8 |
9 | ##### define encoder function #####
10 | def encode_train(self,input):
11 | return self.encode(input=input,train=True)
12 | def encode_test(self,input):
13 | return self.encode(input=input,train=False)
14 |
15 | def encode(self,input,train=True):
16 | raise NotImplementedError()
17 |
18 | ##### define decoder function #####
19 | def decode_train(self,input):
20 | return self.decode(input=input,train=True)
21 | def decode_test(self,input):
22 | return self.decode(input=input,train=False)
23 |
24 | def decode(self,input,train=True):
25 | raise NotImplementedError()
26 |
27 | ##### define discriminator function #####
28 | def D_train(self,input):
29 | return self.D(input=input,train=True)
30 | def D_test(self,input):
31 | return self.D(input=input,train=False)
32 |
33 | def D(self,input,train=True):
34 | raise NotImplementedError()
35 |
--------------------------------------------------------------------------------
/source/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | __author__ = 'TakeruMiyato'
3 |
4 | import layers
5 | import costs
6 | import optimizers
7 |
--------------------------------------------------------------------------------
/source/costs/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | import cross_entropy_loss
3 | import quadratic_loss
4 | import weight_decay
5 | import binary_cross_entropy_loss
6 | import adversarial_autoenc_loss
7 |
8 | cross_entropy_loss = cross_entropy_loss.cross_entropy_loss
9 | binary_cross_entropy_loss = binary_cross_entropy_loss.binary_cross_entropy_loss
10 | quadratic_loss = quadratic_loss.quadratic_loss
11 | weight_decay = weight_decay.weight_decay
12 | adversarial_autoenc_loss = adversarial_autoenc_loss.adversarial_autoenc_loss
13 |
--------------------------------------------------------------------------------
/source/costs/adversarial_autoenc_loss.py:
--------------------------------------------------------------------------------
1 | import theano
2 | import theano.tensor as T
3 | import numpy
4 | from binary_cross_entropy_loss import _binary_cross_entropy_loss
5 | from quadratic_loss import _quadratic_loss
6 |
7 | def adversarial_autoenc_loss(x,enc_f,dec_f,disc_f,p_z_sampler,
8 | obj_type,
9 | lamb=numpy.asarray(1.0,dtype=theano.config.floatX)):
10 |
11 | z_q = enc_f(x)
12 | z_p = p_z_sampler(z_q)
13 |
14 | adv_loss = adversarial_loss(z_q=z_q , z_p=z_p ,disc_f=disc_f)
15 | recon_loss = reconstruction_loss(x=x ,z=z_q ,dec_f=dec_f ,obj_type=obj_type)
16 |
17 | return recon_loss - lamb*adv_loss, recon_loss, adv_loss
18 |
19 |
20 | def adversarial_loss(z_p,z_q,disc_f):
21 | y_q = disc_f(z_q)
22 | y_p = disc_f(z_p)
23 | return -T.mean(T.log(y_p) + (T.log(1-y_q)))
24 |
25 | def reconstruction_loss(x,z,dec_f,obj_type='QE'):
26 | x_ = dec_f(z)
27 | if obj_type == 'QE':
28 | return _quadratic_loss(x_,x)
29 | elif obj_type == 'CE':
30 | return _binary_cross_entropy_loss(x_,x)
31 |
32 |
--------------------------------------------------------------------------------
/source/costs/binary_cross_entropy_loss.py:
--------------------------------------------------------------------------------
1 | import theano.tensor as T
2 |
3 | def binary_cross_entropy_loss(x,t,forward_func):
4 | print "costs/binary_cross_entropy_loss"
5 | y = forward_func(x)
6 | return _binary_cross_entropy_loss(y,t)
7 |
8 | def _binary_cross_entropy_loss(y,t):
9 | return -T.mean(T.sum(t*T.log(y) + (1-t)*T.log(1-y),axis=1))
10 |
--------------------------------------------------------------------------------
/source/costs/cross_entropy_loss.py:
--------------------------------------------------------------------------------
1 | import theano.tensor as T
2 |
3 | def cross_entropy_loss(x,t,forward_func):
4 | print "costs/cross_entropy_loss"
5 | y = forward_func(x)
6 | return _cross_entropy_loss(y,t)
7 |
8 | def _cross_entropy_loss(y,t):
9 | if(t.ndim==1):
10 | return -T.mean(T.log(y)[T.arange(t.shape[0]), t])
11 | elif(t.ndim==2):
12 | return -T.mean(T.sum(t*T.log(y),axis=1))
13 |
--------------------------------------------------------------------------------
/source/costs/quadratic_loss.py:
--------------------------------------------------------------------------------
1 | import theano.tensor as T
2 |
3 | def quadratic_loss(x,t,forward_func):
4 | print "costs/quadratic_loss"
5 | y = forward_func(x)
6 | return _quadratic_loss(y,t)
7 |
8 | def _quadratic_loss(y,t):
9 | return T.mean(T.sum((y-t)**2,axis=1))
10 |
--------------------------------------------------------------------------------
/source/costs/weight_decay.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import theano
3 | import theano.tensor as T
4 |
5 |
6 | def weight_decay(params,coeff):
7 | print "costs/weight_decay"
8 | cost = 0
9 | for param in params:
10 | cost += T.sum(param**2)
11 | return theano.shared(numpy.array(coeff).astype(theano.config.floatX))*cost
12 |
--------------------------------------------------------------------------------
/source/layers/__init__.py:
--------------------------------------------------------------------------------
1 | import relu,lrelu
2 | import linear
3 | import sigmoid
4 | import softmax
5 | import batch_normalization
6 |
7 | Linear = linear.Linear
8 | BatchNormalization = batch_normalization.BatchNormalization
9 |
10 | relu = relu.relu
11 | lrelu = lrelu.lrelu
12 | sigmoid = sigmoid.sigmoid
13 | softmax = softmax.softmax
14 |
15 |
--------------------------------------------------------------------------------
/source/layers/batch_normalization.py:
--------------------------------------------------------------------------------
1 | import theano
2 | import theano.tensor as T
3 | import numpy
4 |
5 | from layer import Layer
6 |
7 | class BatchNormalization(Layer):
8 |
9 | def __init__(self,size,moving_avg_ratio=0.9,initial_gamma=None,initial_beta=None):
10 |
11 | self.params = []
12 | self.moving_avg_ratio = theano.shared(numpy.array(moving_avg_ratio).astype(theano.config.floatX))
13 | self.finetune_N = theano.shared(0)
14 | if(initial_gamma != None):
15 | assert initial_gamma.shape == (size,) or initial_gamma.shape == (1,size,1)
16 | gamma_values = initial_gamma.reshape((1,size,1))
17 | else:
18 | gamma_values = numpy.ones(shape=(1,size,1),dtype=theano.config.floatX)
19 | self.gamma = theano.shared(gamma_values)
20 | self.params.append(self.gamma)
21 |
22 | if(initial_beta != None):
23 | assert initial_beta.shape == (size,) or initial_beta.shape == (1,size,1)
24 | beta_values = initial_beta.reshape((1,size,1))
25 | else:
26 | beta_values = numpy.zeros(shape=(1,size,1),dtype=theano.config.floatX)
27 | self.beta = theano.shared(beta_values)
28 | self.params.append(self.beta)
29 |
30 | est_var_values = numpy.ones((1,size,1),dtype=theano.config.floatX)
31 | est_mean_values = numpy.zeros((1,size,1),dtype=theano.config.floatX)
32 | self.est_var = theano.shared(est_var_values)
33 | self.est_mean = theano.shared(est_mean_values)
34 |
35 | def __call__(self,inputs,train=True,update_batch_stat=True,finetune=False):
36 | return self.forward(inputs,train=train,update_batch_stat=update_batch_stat,finetune=finetune)
37 |
38 | def forward(self,input_org,train=True,update_batch_stat=True,finetune=False):
39 | ldim,cdim,rdim = self._internal_shape(input_org)
40 | input = input_org.reshape((ldim,cdim,rdim))
41 | if (train):
42 | mean = T.mean(input, axis=(0, 2), keepdims=True )
43 | var = T.mean((input-mean)**2, axis=(0, 2), keepdims=True)
44 |
45 | if(update_batch_stat):
46 | finetune_N = theano.clone(self.finetune_N, share_inputs=False)
47 | if(finetune):
48 | finetune_N.default_update = finetune_N+1
49 | ratio = T.cast(1-1.0/(finetune_N+1),theano.config.floatX)
50 | else:
51 | finetune_N.default_update = 0
52 | ratio = self.moving_avg_ratio
53 | m = ldim*rdim
54 | scale = T.cast(m/(m-1.0),theano.config.floatX)
55 | est_mean = theano.clone(self.est_mean, share_inputs=False)
56 | est_var = theano.clone(self.est_var, share_inputs=False)
57 | est_mean.default_update = T.cast(ratio*self.est_mean + (1-ratio)*mean,theano.config.floatX)
58 | est_var.default_update = T.cast(ratio*self.est_var + (1-ratio)*scale*var,theano.config.floatX)
59 | mean += 0 * est_mean
60 | var += 0 * est_var
61 | #self.normalized_input = (input - self._pbc(mean))/T.sqrt(1e-6+self._pbc(var))
62 | #output = self._pbc(self.gamma) * self.normalized_input + self._pbc(self.beta)
63 | output = self._pbc (self.gamma) * (input - self._pbc(mean)) \
64 | / T.sqrt(1e-6+self._pbc(var)) + self._pbc(self.beta)
65 |
66 | else:
67 | output = self._pbc(self.gamma) * (input - self._pbc(self.est_mean)) \
68 | / T.sqrt(1e-6+self._pbc(self.est_var)) + self._pbc(self.beta)
69 |
70 | return output.reshape(input_org.shape)
71 |
72 | def _pbc(self,x):
73 | return T.patternbroadcast(x,(True,False,True))
74 |
75 | def _internal_shape(self, x):
76 | ldim = x.shape[0]
77 | cdim = self.gamma.size
78 | rdim = x.size // (ldim * cdim)
79 | return ldim, cdim, rdim
80 |
--------------------------------------------------------------------------------
/source/layers/layer.py:
--------------------------------------------------------------------------------
1 |
2 | class Layer(object):
3 |
4 | def __call__(self, input):
5 | return self.forward(input)
6 |
7 | def forward(self,input):
8 | raise NotImplementedError()
9 |
--------------------------------------------------------------------------------
/source/layers/linear.py:
--------------------------------------------------------------------------------
1 | import theano
2 | import theano.tensor as T
3 | import numpy
4 |
5 | from layer import Layer
6 |
7 | class Linear(Layer):
8 |
9 | def __init__(self,size,use_bias=True,initial_W=None,initial_b=None):
10 | self.use_bias = use_bias
11 | self.params = []
12 |
13 | if(initial_W is not None):
14 | assert initial_W.shape == size
15 | W_values = initial_W
16 | else:
17 | W_values = numpy.random.normal(0, numpy.sqrt(1. / size[0]), size=size).astype(theano.config.floatX)
18 | self.W = theano.shared(W_values)
19 | self.params.append(self.W)
20 |
21 | if(self.use_bias == True):
22 | if(initial_b is not None):
23 | assert initial_b.shape == size[1]
24 | b_values = initial_b
25 | else:
26 | b_values = numpy.zeros((size[1],)).astype(theano.config.floatX)
27 | self.b = theano.shared(b_values)
28 | self.params.append(self.b)
29 |
30 | def forward(self,input):
31 | input = self._as_mat(input)
32 | output = T.dot(input, self.W)
33 | if(self.use_bias == True):
34 | output += self.b
35 |
36 | return output
37 |
38 | def _as_mat(self,x):
39 | return x.reshape((x.shape[0],x.size//x.shape[0]))
40 |
--------------------------------------------------------------------------------
/source/layers/lrelu.py:
--------------------------------------------------------------------------------
1 | import theano
2 | import theano.tensor as T
3 | import numpy
4 | from layer import Layer
5 |
6 |
7 | class LReLU(Layer):
8 |
9 | def __init__(self,slope):
10 | self.slope = theano.shared(numpy.asarray(slope,theano.config.floatX))
11 |
12 | def forward(self,x):
13 | return T.maximum(self.slope*x, x)
14 |
15 | def lrelu(x,slope=0.1):
16 | return LReLU(slope)(x)
--------------------------------------------------------------------------------
/source/layers/relu.py:
--------------------------------------------------------------------------------
1 | import theano.tensor as T
2 | from layer import Layer
3 |
4 | class ReLU(Layer):
5 |
6 | def forward(self,x):
7 | return T.maximum(0.0, x)
8 |
9 | def relu(x):
10 | return ReLU()(x)
--------------------------------------------------------------------------------
/source/layers/sigmoid.py:
--------------------------------------------------------------------------------
1 | import theano.tensor as T
2 | from layer import Layer
3 |
4 | class Sigmoid(Layer):
5 |
6 | def forward(self,x):
7 | return T.nnet.sigmoid(x)
8 |
9 | def sigmoid(x):
10 | return Sigmoid()(x)
--------------------------------------------------------------------------------
/source/layers/softmax.py:
--------------------------------------------------------------------------------
1 | import theano.tensor as T
2 | from layer import Layer
3 |
4 | class Softmax(Layer):
5 |
6 | def __init__(self,stable):
7 | self.stable = stable
8 |
9 | def forward(self,x):
10 | if(self.stable):
11 | x -= x.max(axis=1,keepdims=True)
12 | e_x = T.exp(x)
13 | out = e_x / e_x.sum(axis=1, keepdims=True)
14 | return out
15 |
16 | def softmax(x,stable=False):
17 | return Softmax(stable=stable)(x)
18 |
19 |
--------------------------------------------------------------------------------
/source/layers/softplus.py:
--------------------------------------------------------------------------------
1 | import theano.tensor as T
2 | from layer import Layer
3 |
4 | class Softplus(Layer):
5 |
6 | def forward(self,x):
7 | return T.nnet.softplus(x)
8 |
9 | def softplus(x):
10 | return Softplus()(x)
--------------------------------------------------------------------------------
/source/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | import sgd
2 | import momentum_sgd
3 | import adagrad
4 | import adam
5 |
6 | SGD = sgd.SGD
7 | MomentumSGD = momentum_sgd.MomentumSGD
8 | AdaGrad = adagrad.AdaGrad
9 | ADAM = adam.ADAM
--------------------------------------------------------------------------------
/source/optimizers/adagrad.py:
--------------------------------------------------------------------------------
1 | from optimizer import Optimizer
2 | from collections import OrderedDict
3 | import theano
4 | import theano.tensor as T
5 | import numpy
6 |
7 | class AdaGrad(Optimizer):
8 | def __init__(self,cost,params,lr=0.1):
9 | self.lr = theano.shared(numpy.array(lr).astype(theano.config.floatX))
10 | super(AdaGrad,self).__init__(cost,params)
11 |
12 | def _updates(self):
13 | updates = OrderedDict()
14 | g_model_params = []
15 | model_adg_rates = []
16 | for param in self.params:
17 | gparam = T.grad(self.cost,wrt=param)
18 | g_model_params.append(gparam)
19 | adg_rate= theano.shared(numpy.ones(param.get_value(borrow=True).shape,dtype=theano.config.floatX))
20 | model_adg_rates.append(adg_rate)
21 |
22 | for param, gparam,adg_rate in zip(self.params, g_model_params,model_adg_rates):
23 | updates[adg_rate] = adg_rate + gparam*gparam
24 | stepped_param = param - (self.lr/T.sqrt(updates[adg_rate]))*gparam
25 | updates[param] = stepped_param
26 |
27 | return updates
28 |
--------------------------------------------------------------------------------
/source/optimizers/adam.py:
--------------------------------------------------------------------------------
1 | from optimizer import Optimizer
2 | from collections import OrderedDict
3 | import theano
4 | import theano.tensor as T
5 | import numpy
6 |
7 | class ADAM(Optimizer):
8 | def __init__(self,cost,params,alpha=0.001):
9 | self.alpha = theano.shared(numpy.array(alpha).astype(theano.config.floatX))
10 | super(ADAM,self).__init__(cost,params)
11 |
12 | def _updates(self):
13 | updates = OrderedDict()
14 | t = theano.shared(numpy.array(1).astype(theano.config.floatX))
15 | alpha = self.alpha
16 | beta_1 = numpy.array(0.9).astype(theano.config.floatX)
17 | beta_2 = numpy.array(0.999).astype(theano.config.floatX)
18 | epsilon = numpy.array(1.0*10**-8.0).astype(theano.config.floatX)
19 | lam = numpy.array(1.0-1.0*10**-8.0).astype(theano.config.floatX)
20 | g_model_params = []
21 | models_m = []
22 | models_v = []
23 | for param in self.params:
24 | gparam = T.grad(self.cost, wrt=param)
25 | g_model_params.append(gparam)
26 | m = theano.shared(numpy.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX))
27 | v = theano.shared(numpy.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX))
28 | models_m.append(m)
29 | models_v.append(v)
30 | for param, gparam, m, v in zip(self.params, g_model_params, models_m, models_v):
31 | beta_1_t = T.cast(beta_1 * lam ** (t - 1), theano.config.floatX)
32 | updates[m] = T.cast(beta_1_t * m + (1 - beta_1_t) * gparam,theano.config.floatX)
33 | updates[v] = T.cast(beta_2 * v + (1 - beta_2) * (gparam * gparam),theano.config.floatX)
34 | m_hat = T.cast(updates[m] / (1 - beta_1 ** t), theano.config.floatX)
35 | v_hat = T.cast(updates[v] / (1 - beta_2 ** t), theano.config.floatX)
36 | updates[param] = param - alpha * m_hat / (T.sqrt(v_hat) + epsilon)
37 | updates[t] = t + 1
38 | return updates
39 |
--------------------------------------------------------------------------------
/source/optimizers/momentum_sgd.py:
--------------------------------------------------------------------------------
1 | from optimizer import Optimizer
2 | from collections import OrderedDict
3 | import theano
4 | import theano.tensor as T
5 | import numpy
6 |
7 | class MomentumSGD(Optimizer):
8 | def __init__(self,cost,params,lr=0.1,momentum_ratio=0.9):
9 | self.lr = theano.shared(numpy.array(lr).astype(theano.config.floatX))
10 | self.ratio = theano.shared(numpy.array(momentum_ratio).astype(theano.config.floatX))
11 | super(MomentumSGD,self).__init__(cost,params)
12 |
13 | def _updates(self):
14 | updates = OrderedDict()
15 | g_model_params = []
16 | g_model_params_mom = []
17 | for param in self.params:
18 | gparam = T.grad(self.cost,wrt=param)
19 | g_model_params.append(gparam)
20 | gparam_mom = theano.shared(numpy.zeros(param.get_value(borrow=True).shape,dtype=theano.config.floatX))
21 | g_model_params_mom.append(gparam_mom)
22 |
23 | for param, gparam_mom, gparam in zip(self.params, g_model_params_mom, g_model_params):
24 | updates[gparam_mom] = self.ratio * gparam_mom + (1. - self.ratio) * self.lr * gparam
25 | updates[param] = param-updates[gparam_mom]
26 |
27 | return updates
28 |
--------------------------------------------------------------------------------
/source/optimizers/optimizer.py:
--------------------------------------------------------------------------------
1 | import theano
2 | import theano.tensor
3 |
4 | class Optimizer(object):
5 |
6 | def __init__(self,cost,params):
7 | self.cost = cost
8 | self.params = params
9 | self.updates = self._updates()
10 |
11 | def _updates(self):
12 | raise NotImplementedError()
--------------------------------------------------------------------------------
/source/optimizers/sgd.py:
--------------------------------------------------------------------------------
1 | from optimizer import Optimizer
2 | from collections import OrderedDict
3 | import theano
4 | import theano.tensor as T
5 | import numpy
6 |
7 | class SGD(Optimizer):
8 | def __init__(self,cost,params,lr=0.1):
9 | self.lr = theano.shared(numpy.array(lr).astype(theano.config.floatX))
10 | super(SGD,self).__init__(cost,params)
11 |
12 | def _updates(self):
13 | updates = OrderedDict()
14 | for param in self.params:
15 | gparam = T.grad(self.cost, wrt=param)
16 | updates[param] = param - self.lr*gparam
17 | return updates
18 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from source import optimizers,costs
4 | import numpy
5 | import theano
6 | import theano.tensor as T
7 | from theano.tensor.shared_randomstreams import RandomStreams
8 | import cPickle
9 | import load_data
10 | from adv_auto_mnist import AdversarialAutoencoderMNIST
11 |
12 | import time
13 | import matplotlib.pyplot as plt
14 | from matplotlib.pyplot import cm
15 |
16 | import os
17 | import errno
18 |
19 | def make_sure_path_exists(path):
20 | try:
21 | os.makedirs(path)
22 | except OSError as exception:
23 | if exception.errno != errno.EEXIST:
24 | raise
25 |
26 |
27 |
28 | def train(latent_dim = 2, #dimension of latent variable z
29 | z_prior = 'gaussian', # 'gaussian' or 'uniform'
30 | lamb = 10., #ratio between reconstruction and adversarial cost
31 | recon_obj_type = 'CE', #objective function on reconstruction ( 'CE'(cross ent.) or 'QE'(quadratic error) )
32 | initlal_learning_rate = 0.002,
33 | learning_rate_decay=1.0,
34 | num_epochs=50,
35 | batch_size=100,
36 | save_filename='trained_model',
37 | seed=1):
38 |
39 |
40 | numpy.random.seed(seed=seed)
41 |
42 | dataset = load_data.load_mnist_full()
43 |
44 | x_train,_ = dataset[0]
45 | x_test,_ = dataset[1]
46 |
47 | model = AdversarialAutoencoderMNIST(latent_dim=latent_dim,z_prior=z_prior)
48 |
49 | x = T.matrix()
50 |
51 | loss_for_training,_,adv_loss_for_training = costs.adversarial_autoenc_loss(x=x,
52 | enc_f=model.encode_train,
53 | dec_f=model.decode_train,
54 | disc_f=model.D_train,
55 | p_z_sampler=model.sample_from_prior,
56 | obj_type=recon_obj_type,
57 | lamb=numpy.asarray(lamb,dtype=theano.config.floatX))
58 |
59 | _,recon_loss,adv_loss = costs.adversarial_autoenc_loss(x=x,
60 | enc_f=model.encode_test,
61 | dec_f=model.decode_test,
62 | disc_f=model.D_test,
63 | p_z_sampler=model.sample_from_prior,
64 | obj_type=recon_obj_type,
65 | lamb=numpy.asarray(lamb,dtype=theano.config.floatX))
66 |
67 | optimizer_recon = optimizers.ADAM(cost=loss_for_training,
68 | params=model.model_params,
69 | alpha=numpy.asarray(initlal_learning_rate,dtype=theano.config.floatX))
70 | optimizer_adv = optimizers.ADAM(cost=adv_loss_for_training,
71 | params=model.D_params,
72 | alpha=numpy.asarray(initlal_learning_rate,dtype=theano.config.floatX))
73 |
74 | index = T.iscalar()
75 |
76 | f_training_model = theano.function(inputs=[index], outputs=loss_for_training, updates=optimizer_recon.updates,
77 | givens={
78 | x:x_train[batch_size*index:batch_size*(index+1)]})
79 | f_training_discriminator = theano.function(inputs=[index], outputs=adv_loss_for_training, updates=optimizer_adv.updates,
80 | givens={
81 | x:x_train[batch_size*index:batch_size*(index+1)]})
82 |
83 |
84 | f_recon_train = theano.function(inputs=[index], outputs=recon_loss,
85 | givens={
86 | x:x_train[batch_size*index:batch_size*(index+1)]})
87 | f_adv_train = theano.function(inputs=[index], outputs=adv_loss,
88 | givens={
89 | x:x_train[batch_size*index:batch_size*(index+1)]})
90 | f_recon_test = theano.function(inputs=[index], outputs=recon_loss,
91 | givens={
92 | x:x_test[batch_size*index:batch_size*(index+1)]})
93 | f_adv_test = theano.function(inputs=[index], outputs=adv_loss,
94 | givens={
95 | x:x_test[batch_size*index:batch_size*(index+1)]})
96 |
97 | f_lr_decay_recon = theano.function(inputs=[],outputs=optimizer_recon.alpha,
98 | updates={optimizer_recon.alpha:theano.shared(numpy.array(learning_rate_decay).astype(theano.config.floatX))*optimizer_recon.alpha})
99 | f_lr_decay_adv = theano.function(inputs=[],outputs=optimizer_adv.alpha,
100 | updates={optimizer_adv.alpha:theano.shared(numpy.array(learning_rate_decay).astype(theano.config.floatX))*optimizer_adv.alpha})
101 |
102 | randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=x_train.shape[0])
103 | f_permute_train_set = theano.function(inputs=[],outputs=x_train,updates={x_train:x_train[randix]})
104 |
105 | statuses = {}
106 | statuses['recon_train'] = []
107 | statuses['adv_train'] = []
108 | statuses['recon_test'] = []
109 | statuses['adv_test'] = []
110 |
111 | n_train = x_train.get_value().shape[0]
112 | n_test = x_test.get_value().shape[0]
113 |
114 | sum_recon_train = numpy.sum(numpy.array([f_recon_train(i) for i in xrange(n_train/batch_size)]))*batch_size
115 | sum_adv_train = numpy.sum(numpy.array([f_adv_train(i) for i in xrange(n_train/batch_size)]))*batch_size
116 | sum_recon_test = numpy.sum(numpy.array([f_recon_test(i) for i in xrange(n_test/batch_size)]))*batch_size
117 | sum_adv_test = numpy.sum(numpy.array([f_adv_test(i) for i in xrange(n_test/batch_size)]))*batch_size
118 | statuses['recon_train'].append(sum_recon_train/n_train)
119 | statuses['adv_train'].append(sum_adv_train/n_train)
120 | statuses['recon_test'].append(sum_recon_test/n_test)
121 | statuses['adv_test'].append(sum_adv_test/n_test)
122 | print "[Epoch]",str(-1)
123 | print "recon_train : " , statuses['recon_train'][-1], "adv_train : ", statuses['adv_train'][-1], \
124 | "recon_test : " , statuses['recon_test'][-1], "adv_test : ", statuses['adv_test'][-1]
125 |
126 | z = model.encode_test(input=x)
127 | f_enc = theano.function(inputs=[],outputs=z,givens={x:dataset[1][0]})
128 | def plot_latent_variable(epoch):
129 | output = f_enc()
130 | plt.figure(figsize=(8,8))
131 | color=cm.rainbow(numpy.linspace(0,1,10))
132 | for l,c in zip(range(10),color):
133 | ix = numpy.where(dataset[1][1].get_value()==l)[0]
134 | plt.scatter(output[ix,0],output[ix,1],c=c,label=l,s=8,linewidth=0)
135 | plt.xlim([-5.0,5.0])
136 | plt.ylim([-5.0,5.0])
137 | plt.legend(fontsize=15)
138 | plt.savefig('z_epoch' + str(epoch) + '.pdf')
139 |
140 | print "training..."
141 | make_sure_path_exists("./trained_model")
142 |
143 | for epoch in xrange(num_epochs):
144 | cPickle.dump((model,statuses),open('./trained_model/'+'tmp-' + save_filename,'wb'),cPickle.HIGHEST_PROTOCOL)
145 | f_permute_train_set()
146 | ### update parameters ###
147 | for i in xrange(n_train/batch_size):
148 | ### Optimize model and discriminator alternately ###
149 | f_training_discriminator(i)
150 | f_training_model(i)
151 | #########################
152 |
153 | if(latent_dim == 2):
154 | plot_latent_variable(epoch=epoch)
155 |
156 | sum_recon_train = numpy.sum(numpy.array([f_recon_train(i) for i in xrange(n_train/batch_size)]))*batch_size
157 | sum_adv_train = numpy.sum(numpy.array([f_adv_train(i) for i in xrange(n_train/batch_size)]))*batch_size
158 | sum_recon_test = numpy.sum(numpy.array([f_recon_test(i) for i in xrange(n_test/batch_size)]))*batch_size
159 | sum_adv_test = numpy.sum(numpy.array([f_adv_test(i) for i in xrange(n_test/batch_size)]))*batch_size
160 | statuses['recon_train'].append(sum_recon_train/n_train)
161 | statuses['adv_train'].append(sum_adv_train/n_train)
162 | statuses['recon_test'].append(sum_recon_test/n_test)
163 | statuses['adv_test'].append(sum_adv_test/n_test)
164 | print "[Epoch]",str(epoch)
165 | print "recon_train : " , statuses['recon_train'][-1], "adv_train : ", statuses['adv_train'][-1], \
166 | "recon_test : " , statuses['recon_test'][-1], "adv_test : ", statuses['adv_test'][-1]
167 |
168 | f_lr_decay_recon()
169 | f_lr_decay_adv()
170 |
171 | make_sure_path_exists("./trained_model")
172 | cPickle.dump((model,statuses),open('./trained_model/'+save_filename,'wb'),cPickle.HIGHEST_PROTOCOL)
173 | return model,statuses
174 |
175 | if __name__=='__main__':
176 | train()
177 |
--------------------------------------------------------------------------------