├── README.md
├── __init__.py
├── adv_auto_mnist.py
├── dataset
    └── download_mnist.sh
├── figure
    ├── x_gaussian_8.png
    └── z_uniform_2.png
├── load_data.py
├── models
    ├── __init__.py
    └── adv_autoencoder.py
├── source
    ├── __init__.py
    ├── costs
    │   ├── __init__.py
    │   ├── adversarial_autoenc_loss.py
    │   ├── binary_cross_entropy_loss.py
    │   ├── cross_entropy_loss.py
    │   ├── quadratic_loss.py
    │   └── weight_decay.py
    ├── layers
    │   ├── __init__.py
    │   ├── batch_normalization.py
    │   ├── layer.py
    │   ├── linear.py
    │   ├── lrelu.py
    │   ├── relu.py
    │   ├── sigmoid.py
    │   ├── softmax.py
    │   └── softplus.py
    └── optimizers
    │   ├── __init__.py
    │   ├── adagrad.py
    │   ├── adam.py
    │   ├── momentum_sgd.py
    │   ├── optimizer.py
    │   └── sgd.py
├── train.py
└── train_adv_autoenc.ipynb


/README.md:
--------------------------------------------------------------------------------
 1 | #Implementation of Adversarial Autoenocder (arXiv : http://arxiv.org/abs/1511.05644)
 2 | 
 3 | Python code for training Adversarial Autoenocder with Theano.
 4 | 
 5 | You can train example models of Adversarial Autoencoder on MNIST on the ipython notebook. 
 6 | 
 7 | See https://github.com/takerum/adversarial_autoencoder/blob/master/train_adv_autoenc.ipynb .
 8 | 
 9 | - The hidden code z of the trained adversarial autoencoder with 2-D uniform prior distribution.
10 | 
11 | <img src="./figure/z_uniform_2.png" width="400"> 
12 | 
13 | - The input x sampled from the trained adversarial autoencoder with 8-D gaussian prior distribution.
14 | 
15 | <img src="./figure/x_gaussian_8.png" width="400">
16 | 
17 | ##Required libraries:
18 | python 2.7, theano 0.7.0
19 | 
20 | ##References:
21 | [1] Alireza Makhzani, Jonathon Shlens, Navdeep Jaitly and Ian Goodfellow. Adversarial Autoenocders. arXiv preprint (http://arxiv.org/abs/1511.05644).
22 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | __author__ = 'TakeruMiyato'
3 | 
4 | import source
5 | import models


--------------------------------------------------------------------------------
/adv_auto_mnist.py:
--------------------------------------------------------------------------------
 1 | from models.adv_autoencoder import AdversarialAutoencoder
 2 | import source.layers as L
 3 | from theano.tensor.shared_randomstreams import RandomStreams
 4 | import numpy,theano
 5 | import theano.tensor as T
 6 | 
 7 | def get_normalized_vector(v):
 8 |     v = v / (1e-20 + T.max(T.abs_(v), axis=1, keepdims=True))
 9 |     v_2 = T.sum(v**2,axis=1,keepdims=True)
10 |     return v / T.sqrt(1e-6+v_2)
11 | 
12 | 
13 | class AdversarialAutoencoderMNIST(AdversarialAutoencoder):
14 | 
15 |     def __init__(self,latent_dim=2,z_prior='gaussian'):
16 | 
17 |         self.z_prior = z_prior
18 | 
19 |         self.enc_l1 = L.Linear((784,1000))
20 |         self.enc_b1 = L.BatchNormalization(1000)
21 |         self.enc_l2 = L.Linear((1000,1000))
22 |         self.enc_b2 = L.BatchNormalization(1000)
23 |         self.enc_l3 = L.Linear((1000,latent_dim))
24 |         self.enc_b3 = L.BatchNormalization(latent_dim)
25 | 
26 |         self.dec_l1 = L.Linear((latent_dim,1000))
27 |         self.dec_b1 = L.BatchNormalization(1000)
28 |         self.dec_l2 = L.Linear((1000,1000))
29 |         self.dec_b2 = L.BatchNormalization(1000)
30 |         self.dec_l3 = L.Linear((1000,784))
31 | 
32 |         self.D_l1 = L.Linear((latent_dim,500))
33 |         self.D_b1 = L.BatchNormalization(500)
34 |         self.D_l2 = L.Linear((500,500))
35 |         self.D_b2 = L.BatchNormalization(500)
36 |         self.D_l3 = L.Linear((500,1))
37 | 
38 |         self.model_params = self.enc_l1.params + self.enc_l2.params + self.enc_l3.params \
39 |                           + self.dec_l1.params + self.dec_l2.params + self.dec_l3.params \
40 |                           + self.enc_b1.params + self.enc_b2.params + self.enc_b3.params \
41 |                           + self.dec_b1.params + self.dec_b2.params
42 |         self.D_params = self.D_l1.params + self.D_l2.params + self.D_l3.params
43 |         self.rng = RandomStreams(seed=numpy.random.randint(1234))
44 | 
45 | 
46 |     def encode(self,input,train=True):
47 |         h = input
48 |         h = self.enc_l1(h)
49 |         h = self.enc_b1(h,train=train)
50 |         h = L.relu(h)
51 |         h = self.enc_l2(h)
52 |         h = self.enc_b2(h,train=train)
53 |         h = L.relu(h)
54 |         h = self.enc_l3(h)
55 |         h = self.enc_b3(h,train=train)
56 |         return h
57 | 
58 |     def decode(self,input,train=True):
59 |         h = input
60 |         h = self.dec_l1(h)
61 |         h = self.dec_b1(h,train=train)
62 |         h = L.relu(h)
63 |         h = self.dec_l2(h)
64 |         h = self.dec_b2(h,train=train)
65 |         h = L.relu(h)
66 |         h = self.dec_l3(h)
67 |         h = L.sigmoid(h)
68 |         return h
69 | 
70 |     def D(self,input,train=True):
71 |         h = input
72 |         h = self.D_l1(h)
73 |         h = L.relu(h)
74 |         h = self.D_l2(h)
75 |         h = L.relu(h)
76 |         h = self.D_l3(h)
77 |         h = L.sigmoid(h)
78 |         return h
79 | 
80 |     def sample_from_prior(self,z):
81 | 
82 |         ###### gausssian #######
83 |         if(self.z_prior is 'gaussian'):
84 |             return 1.0*self.rng.normal(size=z.shape,dtype=theano.config.floatX)
85 | 
86 |         ###### uniform ########
87 |         elif(self.z_prior is 'uniform'):
88 |             v = get_normalized_vector(self.rng.normal(size=z.shape,dtype=theano.config.floatX))
89 |             r = T.power(self.rng.uniform(size=z.sum(axis=1,keepdims=True).shape,low=0,high=1.0,dtype=theano.config.floatX),1./z.shape[1])
90 |             r = T.patternbroadcast(r,[False,True])
91 |             return 2.0*r*v
92 | 
93 |         else:
94 |             raise NotImplementedError()
95 | 
96 | 


--------------------------------------------------------------------------------
/dataset/download_mnist.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | wget -c http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
3 | gzip -d mnist.pkl.gz


--------------------------------------------------------------------------------
/figure/x_gaussian_8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/takerum/adversarial_autoencoder/0af027e8cfa1ec90011c58edec44bca4a2b95117/figure/x_gaussian_8.png


--------------------------------------------------------------------------------
/figure/z_uniform_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/takerum/adversarial_autoencoder/0af027e8cfa1ec90011c58edec44bca4a2b95117/figure/z_uniform_2.png


--------------------------------------------------------------------------------
/load_data.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import cPickle
 3 | import os
 4 | import theano
 5 | 
 6 | def load_mnist_dataset():
 7 |     dataset = cPickle.load(open('dataset/mnist.pkl','rb'))
 8 |     train_set_x = numpy.concatenate((dataset[0][0],dataset[1][0]),axis=0)
 9 |     train_set_y = numpy.concatenate((dataset[0][1],dataset[1][1]),axis=0)
10 |     return ((train_set_x,train_set_y),(dataset[2][0],dataset[2][1]))
11 | 
12 | def _shared_dataset(data_xy):
13 |     data_x, data_y = data_xy
14 |     shared_x = theano.shared(numpy.asarray(data_x,
15 |                                            dtype=theano.config.floatX), borrow=True)
16 |     shared_y = theano.shared(numpy.asarray(data_y,
17 |                                            dtype='int32'), borrow=True)
18 |     return shared_x, shared_y
19 | 
20 | def load_mnist_full():
21 |     dataset = load_mnist_dataset()
22 | 
23 |     train_set_x, train_set_y = dataset[0]
24 |     test_set_x, test_set_y = dataset[1]
25 | 
26 |     train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
27 |     test_set_x, test_set_y = _shared_dataset((test_set_x, test_set_y))
28 | 
29 |     return [(train_set_x, train_set_y), (test_set_x, test_set_y)]
30 | 
31 | def load_mnist_for_validation(n_v = 10000):
32 |     dataset = load_mnist_dataset()
33 | 
34 |     train_set_x, train_set_y = dataset[0]
35 | 
36 |     randix = numpy.random.permutation(train_set_x.shape[0])
37 | 
38 |     valid_set_x = train_set_x[randix[:n_v]]
39 |     valid_set_y = train_set_y[randix[:n_v]]
40 |     train_set_x = train_set_x[randix[n_v:]]
41 |     train_set_y = train_set_y[randix[n_v:]]
42 | 
43 |     train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
44 |     valid_set_x, valid_set_y = _shared_dataset((valid_set_x, valid_set_y))
45 | 
46 |     return [(train_set_x, train_set_y), (valid_set_x, valid_set_y)]
47 | 
48 | def load_mnist_for_semi_sup(n_l=1000, n_v=1000):
49 |     dataset = load_mnist_dataset()
50 | 
51 |     _train_set_x, _train_set_y = dataset[0]
52 | 
53 |     rand_ind = numpy.random.permutation(_train_set_x.shape[0])
54 |     _train_set_x = _train_set_x[rand_ind]
55 |     _train_set_y = _train_set_y[rand_ind]
56 | 
57 |     s_c = n_l / 10.0
58 |     train_set_x = numpy.zeros((n_l, 28 ** 2))
59 |     train_set_y = numpy.zeros(n_l)
60 |     for i in xrange(10):
61 |         ind = numpy.where(_train_set_y == i)[0]
62 |         train_set_x[i * s_c:(i + 1) * s_c, :] = _train_set_x[ind[0:s_c], :]
63 |         train_set_y[i * s_c:(i + 1) * s_c] = _train_set_y[ind[0:s_c]]
64 |         _train_set_x = numpy.delete(_train_set_x, ind[0:s_c], 0)
65 |         _train_set_y = numpy.delete(_train_set_y, ind[0:s_c])
66 | 
67 |     print rand_ind
68 |     rand_ind = numpy.random.permutation(train_set_x.shape[0])
69 |     train_set_x = train_set_x[rand_ind]
70 |     train_set_y = train_set_y[rand_ind]
71 |     valid_set_x = _train_set_x[:n_v]
72 |     valid_set_y = _train_set_y[:n_v]
73 |     # ul_train_set_x = _train_set_x[n_v:]
74 |     train_set_ul_x = numpy.concatenate((train_set_x, _train_set_x[n_v:]), axis=0)
75 |     train_set_ul_x = train_set_ul_x[numpy.random.permutation(train_set_ul_x.shape[0])]
76 |     ul_train_set_y = _train_set_y[n_v:]  # dummy
77 | 
78 |     train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y))
79 |     train_set_ul_x, ul_train_set_y = _shared_dataset((train_set_ul_x, ul_train_set_y))
80 |     valid_set_x, valid_set_y = _shared_dataset((valid_set_x, valid_set_y))
81 | 
82 |     return [(train_set_x, train_set_y, train_set_ul_x), (valid_set_x, valid_set_y)]
83 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from adversarial_autoencoder.source.models import adv_autoencoder
2 | 
3 | AdversarialAutoenocder = adv_autoencoder.AdversarialAutoencoder


--------------------------------------------------------------------------------
/models/adv_autoencoder.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class AdversarialAutoencoder(object):
 3 | 
 4 |     def __init__(self):
 5 |         self.model_params=None
 6 |         self.D_params=None
 7 |         raise NotImplementedError()
 8 | 
 9 |     ##### define encoder function #####
10 |     def encode_train(self,input):
11 |         return self.encode(input=input,train=True)
12 |     def encode_test(self,input):
13 |         return self.encode(input=input,train=False)
14 | 
15 |     def encode(self,input,train=True):
16 |         raise NotImplementedError()
17 | 
18 |     ##### define decoder function #####
19 |     def decode_train(self,input):
20 |         return self.decode(input=input,train=True)
21 |     def decode_test(self,input):
22 |         return self.decode(input=input,train=False)
23 | 
24 |     def decode(self,input,train=True):
25 |         raise NotImplementedError()
26 | 
27 |     ##### define discriminator function #####
28 |     def D_train(self,input):
29 |         return self.D(input=input,train=True)
30 |     def D_test(self,input):
31 |         return self.D(input=input,train=False)
32 | 
33 |     def D(self,input,train=True):
34 |         raise NotImplementedError()
35 | 


--------------------------------------------------------------------------------
/source/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | __author__ = 'TakeruMiyato'
3 | 
4 | import layers
5 | import costs
6 | import optimizers
7 | 


--------------------------------------------------------------------------------
/source/costs/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import cross_entropy_loss
 3 | import quadratic_loss
 4 | import weight_decay
 5 | import binary_cross_entropy_loss
 6 | import adversarial_autoenc_loss
 7 | 
 8 | cross_entropy_loss = cross_entropy_loss.cross_entropy_loss
 9 | binary_cross_entropy_loss = binary_cross_entropy_loss.binary_cross_entropy_loss
10 | quadratic_loss = quadratic_loss.quadratic_loss
11 | weight_decay = weight_decay.weight_decay
12 | adversarial_autoenc_loss = adversarial_autoenc_loss.adversarial_autoenc_loss
13 | 


--------------------------------------------------------------------------------
/source/costs/adversarial_autoenc_loss.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | import theano.tensor as T
 3 | import numpy
 4 | from binary_cross_entropy_loss import _binary_cross_entropy_loss
 5 | from quadratic_loss import _quadratic_loss
 6 | 
 7 | def adversarial_autoenc_loss(x,enc_f,dec_f,disc_f,p_z_sampler,
 8 |                              obj_type,
 9 |                              lamb=numpy.asarray(1.0,dtype=theano.config.floatX)):
10 | 
11 |     z_q = enc_f(x)
12 |     z_p = p_z_sampler(z_q)
13 | 
14 |     adv_loss = adversarial_loss(z_q=z_q , z_p=z_p ,disc_f=disc_f)
15 |     recon_loss = reconstruction_loss(x=x ,z=z_q ,dec_f=dec_f ,obj_type=obj_type)
16 | 
17 |     return recon_loss - lamb*adv_loss, recon_loss, adv_loss
18 | 
19 | 
20 | def adversarial_loss(z_p,z_q,disc_f):
21 |     y_q = disc_f(z_q)
22 |     y_p = disc_f(z_p)
23 |     return -T.mean(T.log(y_p) + (T.log(1-y_q)))
24 | 
25 | def reconstruction_loss(x,z,dec_f,obj_type='QE'):
26 |     x_ = dec_f(z)
27 |     if obj_type == 'QE':
28 |         return _quadratic_loss(x_,x)
29 |     elif obj_type == 'CE':
30 |         return _binary_cross_entropy_loss(x_,x)
31 | 
32 | 


--------------------------------------------------------------------------------
/source/costs/binary_cross_entropy_loss.py:
--------------------------------------------------------------------------------
 1 | import theano.tensor as T
 2 | 
 3 | def binary_cross_entropy_loss(x,t,forward_func):
 4 |     print "costs/binary_cross_entropy_loss"
 5 |     y = forward_func(x)
 6 |     return _binary_cross_entropy_loss(y,t)
 7 | 
 8 | def _binary_cross_entropy_loss(y,t):
 9 |     return -T.mean(T.sum(t*T.log(y) + (1-t)*T.log(1-y),axis=1))
10 | 


--------------------------------------------------------------------------------
/source/costs/cross_entropy_loss.py:
--------------------------------------------------------------------------------
 1 | import theano.tensor as T
 2 | 
 3 | def cross_entropy_loss(x,t,forward_func):
 4 |     print "costs/cross_entropy_loss"
 5 |     y = forward_func(x)
 6 |     return _cross_entropy_loss(y,t)
 7 | 
 8 | def _cross_entropy_loss(y,t):
 9 |     if(t.ndim==1):
10 |         return -T.mean(T.log(y)[T.arange(t.shape[0]), t])
11 |     elif(t.ndim==2):
12 |         return -T.mean(T.sum(t*T.log(y),axis=1))
13 | 


--------------------------------------------------------------------------------
/source/costs/quadratic_loss.py:
--------------------------------------------------------------------------------
 1 | import theano.tensor as T
 2 | 
 3 | def quadratic_loss(x,t,forward_func):
 4 |     print "costs/quadratic_loss"
 5 |     y = forward_func(x)
 6 |     return _quadratic_loss(y,t)
 7 | 
 8 | def _quadratic_loss(y,t):
 9 |     return T.mean(T.sum((y-t)**2,axis=1))
10 | 


--------------------------------------------------------------------------------
/source/costs/weight_decay.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import theano
 3 | import theano.tensor as T
 4 | 
 5 | 
 6 | def weight_decay(params,coeff):
 7 |     print "costs/weight_decay"
 8 |     cost = 0
 9 |     for param in params:
10 |         cost += T.sum(param**2)
11 |     return theano.shared(numpy.array(coeff).astype(theano.config.floatX))*cost
12 | 


--------------------------------------------------------------------------------
/source/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | import relu,lrelu
 2 | import linear
 3 | import sigmoid
 4 | import softmax
 5 | import batch_normalization
 6 | 
 7 | Linear = linear.Linear
 8 | BatchNormalization = batch_normalization.BatchNormalization
 9 | 
10 | relu = relu.relu
11 | lrelu = lrelu.lrelu
12 | sigmoid = sigmoid.sigmoid
13 | softmax = softmax.softmax
14 | 
15 | 


--------------------------------------------------------------------------------
/source/layers/batch_normalization.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | import theano.tensor as T
 3 | import numpy
 4 | 
 5 | from layer import Layer
 6 | 
 7 | class BatchNormalization(Layer):
 8 | 
 9 |     def __init__(self,size,moving_avg_ratio=0.9,initial_gamma=None,initial_beta=None):
10 | 
11 |         self.params = []
12 |         self.moving_avg_ratio = theano.shared(numpy.array(moving_avg_ratio).astype(theano.config.floatX))
13 |         self.finetune_N = theano.shared(0)
14 |         if(initial_gamma != None):
15 |             assert initial_gamma.shape == (size,) or initial_gamma.shape == (1,size,1)
16 |             gamma_values = initial_gamma.reshape((1,size,1))
17 |         else:
18 |             gamma_values = numpy.ones(shape=(1,size,1),dtype=theano.config.floatX)
19 |         self.gamma = theano.shared(gamma_values)
20 |         self.params.append(self.gamma)
21 | 
22 |         if(initial_beta != None):
23 |             assert initial_beta.shape == (size,) or initial_beta.shape == (1,size,1)
24 |             beta_values = initial_beta.reshape((1,size,1))
25 |         else:
26 |             beta_values = numpy.zeros(shape=(1,size,1),dtype=theano.config.floatX)
27 |         self.beta = theano.shared(beta_values)
28 |         self.params.append(self.beta)
29 | 
30 |         est_var_values = numpy.ones((1,size,1),dtype=theano.config.floatX)
31 |         est_mean_values = numpy.zeros((1,size,1),dtype=theano.config.floatX)
32 |         self.est_var = theano.shared(est_var_values)
33 |         self.est_mean = theano.shared(est_mean_values)
34 | 
35 |     def __call__(self,inputs,train=True,update_batch_stat=True,finetune=False):
36 |         return self.forward(inputs,train=train,update_batch_stat=update_batch_stat,finetune=finetune)
37 | 
38 |     def forward(self,input_org,train=True,update_batch_stat=True,finetune=False):
39 |         ldim,cdim,rdim = self._internal_shape(input_org)
40 |         input = input_org.reshape((ldim,cdim,rdim))
41 |         if (train):
42 |             mean = T.mean(input, axis=(0, 2), keepdims=True )
43 |             var = T.mean((input-mean)**2, axis=(0, 2), keepdims=True)
44 | 
45 |             if(update_batch_stat):
46 |                 finetune_N = theano.clone(self.finetune_N, share_inputs=False)
47 |                 if(finetune):
48 |                     finetune_N.default_update = finetune_N+1
49 |                     ratio = T.cast(1-1.0/(finetune_N+1),theano.config.floatX)
50 |                 else:
51 |                     finetune_N.default_update = 0
52 |                     ratio = self.moving_avg_ratio
53 |                 m = ldim*rdim
54 |                 scale = T.cast(m/(m-1.0),theano.config.floatX)
55 |                 est_mean = theano.clone(self.est_mean, share_inputs=False)
56 |                 est_var = theano.clone(self.est_var, share_inputs=False)
57 |                 est_mean.default_update = T.cast(ratio*self.est_mean + (1-ratio)*mean,theano.config.floatX)
58 |                 est_var.default_update = T.cast(ratio*self.est_var + (1-ratio)*scale*var,theano.config.floatX)
59 |                 mean += 0 * est_mean
60 |                 var += 0 * est_var
61 |             #self.normalized_input = (input - self._pbc(mean))/T.sqrt(1e-6+self._pbc(var))
62 |             #output = self._pbc(self.gamma) * self.normalized_input + self._pbc(self.beta)
63 |             output = self._pbc (self.gamma) * (input - self._pbc(mean)) \
64 |                      / T.sqrt(1e-6+self._pbc(var)) + self._pbc(self.beta)
65 | 
66 |         else:
67 |             output = self._pbc(self.gamma) * (input - self._pbc(self.est_mean)) \
68 |                      / T.sqrt(1e-6+self._pbc(self.est_var)) + self._pbc(self.beta)
69 | 
70 |         return output.reshape(input_org.shape)
71 | 
72 |     def _pbc(self,x):
73 |         return T.patternbroadcast(x,(True,False,True))
74 | 
75 |     def _internal_shape(self, x):
76 |         ldim = x.shape[0]
77 |         cdim = self.gamma.size
78 |         rdim = x.size // (ldim * cdim)
79 |         return ldim, cdim, rdim
80 | 


--------------------------------------------------------------------------------
/source/layers/layer.py:
--------------------------------------------------------------------------------
1 | 
2 | class Layer(object):
3 | 
4 |     def __call__(self, input):
5 |         return self.forward(input)
6 | 
7 |     def forward(self,input):
8 |         raise NotImplementedError()
9 | 


--------------------------------------------------------------------------------
/source/layers/linear.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | import theano.tensor as T
 3 | import numpy
 4 | 
 5 | from layer import Layer
 6 | 
 7 | class Linear(Layer):
 8 | 
 9 |     def __init__(self,size,use_bias=True,initial_W=None,initial_b=None):
10 |         self.use_bias = use_bias
11 |         self.params = []
12 | 
13 |         if(initial_W is not None):
14 |             assert initial_W.shape == size
15 |             W_values = initial_W
16 |         else:
17 |             W_values = numpy.random.normal(0, numpy.sqrt(1. / size[0]), size=size).astype(theano.config.floatX)
18 |         self.W = theano.shared(W_values)
19 |         self.params.append(self.W)
20 | 
21 |         if(self.use_bias == True):
22 |             if(initial_b is not None):
23 |                 assert initial_b.shape == size[1]
24 |                 b_values = initial_b
25 |             else:
26 |                 b_values = numpy.zeros((size[1],)).astype(theano.config.floatX)
27 |             self.b = theano.shared(b_values)
28 |             self.params.append(self.b)
29 | 
30 |     def forward(self,input):
31 |         input = self._as_mat(input)
32 |         output = T.dot(input, self.W)
33 |         if(self.use_bias == True):
34 |             output += self.b
35 | 
36 |         return output
37 | 
38 |     def _as_mat(self,x):
39 |         return x.reshape((x.shape[0],x.size//x.shape[0]))
40 | 


--------------------------------------------------------------------------------
/source/layers/lrelu.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | import theano.tensor as T
 3 | import numpy
 4 | from layer import Layer
 5 | 
 6 | 
 7 | class LReLU(Layer):
 8 | 
 9 |     def __init__(self,slope):
10 |         self.slope = theano.shared(numpy.asarray(slope,theano.config.floatX))
11 | 
12 |     def forward(self,x):
13 |         return T.maximum(self.slope*x, x)
14 | 
15 | def lrelu(x,slope=0.1):
16 |     return LReLU(slope)(x)


--------------------------------------------------------------------------------
/source/layers/relu.py:
--------------------------------------------------------------------------------
 1 | import theano.tensor as T
 2 | from layer import Layer
 3 | 
 4 | class ReLU(Layer):
 5 | 
 6 |     def forward(self,x):
 7 |         return T.maximum(0.0, x)
 8 | 
 9 | def relu(x):
10 |     return ReLU()(x)


--------------------------------------------------------------------------------
/source/layers/sigmoid.py:
--------------------------------------------------------------------------------
 1 | import theano.tensor as T
 2 | from layer import Layer
 3 | 
 4 | class Sigmoid(Layer):
 5 | 
 6 |     def forward(self,x):
 7 |         return T.nnet.sigmoid(x)
 8 | 
 9 | def sigmoid(x):
10 |     return Sigmoid()(x)


--------------------------------------------------------------------------------
/source/layers/softmax.py:
--------------------------------------------------------------------------------
 1 | import theano.tensor as T
 2 | from layer import Layer
 3 | 
 4 | class Softmax(Layer):
 5 | 
 6 |     def __init__(self,stable):
 7 |         self.stable = stable
 8 | 
 9 |     def forward(self,x):
10 |         if(self.stable):
11 |             x -= x.max(axis=1,keepdims=True)
12 |         e_x = T.exp(x)
13 |         out = e_x / e_x.sum(axis=1, keepdims=True)   
14 |         return out
15 | 
16 | def softmax(x,stable=False):
17 |     return Softmax(stable=stable)(x)
18 | 
19 | 


--------------------------------------------------------------------------------
/source/layers/softplus.py:
--------------------------------------------------------------------------------
 1 | import theano.tensor as T
 2 | from layer import Layer
 3 | 
 4 | class Softplus(Layer):
 5 | 
 6 |     def forward(self,x):
 7 |         return T.nnet.softplus(x)
 8 | 
 9 | def softplus(x):
10 |     return Softplus()(x)


--------------------------------------------------------------------------------
/source/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | import sgd
2 | import momentum_sgd
3 | import adagrad
4 | import adam
5 | 
6 | SGD = sgd.SGD
7 | MomentumSGD = momentum_sgd.MomentumSGD
8 | AdaGrad = adagrad.AdaGrad
9 | ADAM = adam.ADAM


--------------------------------------------------------------------------------
/source/optimizers/adagrad.py:
--------------------------------------------------------------------------------
 1 | from optimizer import Optimizer
 2 | from collections import OrderedDict
 3 | import theano
 4 | import theano.tensor as T
 5 | import numpy
 6 | 
 7 | class AdaGrad(Optimizer):
 8 |     def __init__(self,cost,params,lr=0.1):
 9 |         self.lr = theano.shared(numpy.array(lr).astype(theano.config.floatX))
10 |         super(AdaGrad,self).__init__(cost,params)
11 | 
12 |     def _updates(self):
13 |         updates = OrderedDict()
14 |         g_model_params = []
15 |         model_adg_rates = []
16 |         for param in self.params:
17 |             gparam = T.grad(self.cost,wrt=param)
18 |             g_model_params.append(gparam)
19 |             adg_rate= theano.shared(numpy.ones(param.get_value(borrow=True).shape,dtype=theano.config.floatX))
20 |             model_adg_rates.append(adg_rate)
21 | 
22 |         for param, gparam,adg_rate in zip(self.params, g_model_params,model_adg_rates):
23 |             updates[adg_rate] = adg_rate + gparam*gparam
24 |             stepped_param = param - (self.lr/T.sqrt(updates[adg_rate]))*gparam
25 |             updates[param] = stepped_param
26 | 
27 |         return updates
28 | 


--------------------------------------------------------------------------------
/source/optimizers/adam.py:
--------------------------------------------------------------------------------
 1 | from optimizer import Optimizer
 2 | from collections import OrderedDict
 3 | import theano
 4 | import theano.tensor as T
 5 | import numpy
 6 | 
 7 | class ADAM(Optimizer):
 8 |     def __init__(self,cost,params,alpha=0.001):
 9 |         self.alpha = theano.shared(numpy.array(alpha).astype(theano.config.floatX))
10 |         super(ADAM,self).__init__(cost,params)
11 | 
12 |     def _updates(self):
13 |         updates = OrderedDict()
14 |         t = theano.shared(numpy.array(1).astype(theano.config.floatX))
15 |         alpha = self.alpha
16 |         beta_1 = numpy.array(0.9).astype(theano.config.floatX)
17 |         beta_2 = numpy.array(0.999).astype(theano.config.floatX)
18 |         epsilon = numpy.array(1.0*10**-8.0).astype(theano.config.floatX)
19 |         lam = numpy.array(1.0-1.0*10**-8.0).astype(theano.config.floatX)
20 |         g_model_params = []
21 |         models_m = []
22 |         models_v = []
23 |         for param in self.params:
24 |             gparam = T.grad(self.cost, wrt=param)
25 |             g_model_params.append(gparam)
26 |             m = theano.shared(numpy.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX))
27 |             v = theano.shared(numpy.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX))
28 |             models_m.append(m)
29 |             models_v.append(v)
30 |         for param, gparam, m, v in zip(self.params, g_model_params, models_m, models_v):
31 |             beta_1_t = T.cast(beta_1 * lam ** (t - 1), theano.config.floatX)
32 |             updates[m] = T.cast(beta_1_t * m + (1 - beta_1_t) * gparam,theano.config.floatX)
33 |             updates[v] = T.cast(beta_2 * v + (1 - beta_2) * (gparam * gparam),theano.config.floatX)
34 |             m_hat = T.cast(updates[m] / (1 - beta_1 ** t), theano.config.floatX)
35 |             v_hat = T.cast(updates[v] / (1 - beta_2 ** t), theano.config.floatX)
36 |             updates[param] = param - alpha * m_hat / (T.sqrt(v_hat) + epsilon)
37 |         updates[t] = t + 1
38 |         return updates
39 | 


--------------------------------------------------------------------------------
/source/optimizers/momentum_sgd.py:
--------------------------------------------------------------------------------
 1 | from optimizer import Optimizer
 2 | from collections import OrderedDict
 3 | import theano
 4 | import theano.tensor as T
 5 | import numpy
 6 | 
 7 | class MomentumSGD(Optimizer):
 8 |     def __init__(self,cost,params,lr=0.1,momentum_ratio=0.9):
 9 |         self.lr = theano.shared(numpy.array(lr).astype(theano.config.floatX))
10 |         self.ratio = theano.shared(numpy.array(momentum_ratio).astype(theano.config.floatX))
11 |         super(MomentumSGD,self).__init__(cost,params)
12 | 
13 |     def _updates(self):
14 |         updates = OrderedDict()
15 |         g_model_params = []
16 |         g_model_params_mom = []
17 |         for param in self.params:
18 |             gparam = T.grad(self.cost,wrt=param)
19 |             g_model_params.append(gparam)
20 |             gparam_mom = theano.shared(numpy.zeros(param.get_value(borrow=True).shape,dtype=theano.config.floatX))
21 |             g_model_params_mom.append(gparam_mom)
22 | 
23 |         for param, gparam_mom, gparam in zip(self.params, g_model_params_mom, g_model_params):
24 |             updates[gparam_mom] = self.ratio * gparam_mom + (1. - self.ratio) * self.lr * gparam
25 |             updates[param] = param-updates[gparam_mom]
26 | 
27 |         return updates
28 | 


--------------------------------------------------------------------------------
/source/optimizers/optimizer.py:
--------------------------------------------------------------------------------
 1 | import theano
 2 | import theano.tensor
 3 | 
 4 | class Optimizer(object):
 5 | 
 6 |     def __init__(self,cost,params):
 7 |         self.cost = cost
 8 |         self.params = params
 9 |         self.updates = self._updates()
10 | 
11 |     def _updates(self):
12 |         raise NotImplementedError()


--------------------------------------------------------------------------------
/source/optimizers/sgd.py:
--------------------------------------------------------------------------------
 1 | from optimizer import Optimizer
 2 | from collections import OrderedDict
 3 | import theano
 4 | import theano.tensor as T
 5 | import numpy
 6 | 
 7 | class SGD(Optimizer):
 8 |     def __init__(self,cost,params,lr=0.1):
 9 |         self.lr = theano.shared(numpy.array(lr).astype(theano.config.floatX))
10 |         super(SGD,self).__init__(cost,params)
11 | 
12 |     def _updates(self):
13 |         updates = OrderedDict()
14 |         for param in self.params:
15 |             gparam = T.grad(self.cost, wrt=param)
16 |             updates[param] = param - self.lr*gparam
17 |         return updates
18 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from source import optimizers,costs
  4 | import numpy
  5 | import theano
  6 | import theano.tensor as T
  7 | from theano.tensor.shared_randomstreams import RandomStreams
  8 | import cPickle
  9 | import load_data
 10 | from adv_auto_mnist import AdversarialAutoencoderMNIST
 11 | 
 12 | import time
 13 | import matplotlib.pyplot as plt
 14 | from matplotlib.pyplot import cm
 15 | 
 16 | import os
 17 | import errno
 18 | 
 19 | def make_sure_path_exists(path):
 20 |     try:
 21 |         os.makedirs(path)
 22 |     except OSError as exception:
 23 |         if exception.errno != errno.EEXIST:
 24 |             raise
 25 | 
 26 | 
 27 | 
 28 | def train(latent_dim = 2, #dimension of latent variable z
 29 |           z_prior = 'gaussian', # 'gaussian' or 'uniform'
 30 |           lamb = 10., #ratio between reconstruction and adversarial cost
 31 |           recon_obj_type = 'CE', #objective function on reconstruction ( 'CE'(cross ent.) or 'QE'(quadratic error) )
 32 |           initlal_learning_rate = 0.002,
 33 |           learning_rate_decay=1.0,
 34 |           num_epochs=50,
 35 |           batch_size=100,
 36 |           save_filename='trained_model',
 37 |           seed=1):
 38 | 
 39 | 
 40 |     numpy.random.seed(seed=seed)
 41 | 
 42 |     dataset = load_data.load_mnist_full()
 43 | 
 44 |     x_train,_ = dataset[0]
 45 |     x_test,_ = dataset[1]
 46 | 
 47 |     model = AdversarialAutoencoderMNIST(latent_dim=latent_dim,z_prior=z_prior)
 48 | 
 49 |     x = T.matrix()
 50 | 
 51 |     loss_for_training,_,adv_loss_for_training = costs.adversarial_autoenc_loss(x=x,
 52 |                                           enc_f=model.encode_train,
 53 |                                           dec_f=model.decode_train,
 54 |                                           disc_f=model.D_train,
 55 |                                           p_z_sampler=model.sample_from_prior,
 56 |                                           obj_type=recon_obj_type,
 57 |                                           lamb=numpy.asarray(lamb,dtype=theano.config.floatX))
 58 | 
 59 |     _,recon_loss,adv_loss = costs.adversarial_autoenc_loss(x=x,
 60 |                                           enc_f=model.encode_test,
 61 |                                           dec_f=model.decode_test,
 62 |                                           disc_f=model.D_test,
 63 |                                           p_z_sampler=model.sample_from_prior,
 64 |                                           obj_type=recon_obj_type,
 65 |                                           lamb=numpy.asarray(lamb,dtype=theano.config.floatX))
 66 | 
 67 |     optimizer_recon = optimizers.ADAM(cost=loss_for_training,
 68 |                                       params=model.model_params,
 69 |                                       alpha=numpy.asarray(initlal_learning_rate,dtype=theano.config.floatX))
 70 |     optimizer_adv = optimizers.ADAM(cost=adv_loss_for_training,
 71 |                                     params=model.D_params,
 72 |                                     alpha=numpy.asarray(initlal_learning_rate,dtype=theano.config.floatX))
 73 | 
 74 |     index = T.iscalar()
 75 | 
 76 |     f_training_model = theano.function(inputs=[index], outputs=loss_for_training, updates=optimizer_recon.updates,
 77 |                               givens={
 78 |                                   x:x_train[batch_size*index:batch_size*(index+1)]})
 79 |     f_training_discriminator = theano.function(inputs=[index], outputs=adv_loss_for_training, updates=optimizer_adv.updates,
 80 |                               givens={
 81 |                                   x:x_train[batch_size*index:batch_size*(index+1)]})
 82 | 
 83 | 
 84 |     f_recon_train = theano.function(inputs=[index], outputs=recon_loss,
 85 |                               givens={
 86 |                                   x:x_train[batch_size*index:batch_size*(index+1)]})
 87 |     f_adv_train = theano.function(inputs=[index], outputs=adv_loss,
 88 |                               givens={
 89 |                                   x:x_train[batch_size*index:batch_size*(index+1)]})
 90 |     f_recon_test = theano.function(inputs=[index], outputs=recon_loss,
 91 |                               givens={
 92 |                                   x:x_test[batch_size*index:batch_size*(index+1)]})
 93 |     f_adv_test = theano.function(inputs=[index], outputs=adv_loss,
 94 |                               givens={
 95 |                                   x:x_test[batch_size*index:batch_size*(index+1)]})
 96 | 
 97 |     f_lr_decay_recon = theano.function(inputs=[],outputs=optimizer_recon.alpha,
 98 |                                  updates={optimizer_recon.alpha:theano.shared(numpy.array(learning_rate_decay).astype(theano.config.floatX))*optimizer_recon.alpha})
 99 |     f_lr_decay_adv = theano.function(inputs=[],outputs=optimizer_adv.alpha,
100 |                                  updates={optimizer_adv.alpha:theano.shared(numpy.array(learning_rate_decay).astype(theano.config.floatX))*optimizer_adv.alpha})
101 | 
102 |     randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=x_train.shape[0])
103 |     f_permute_train_set = theano.function(inputs=[],outputs=x_train,updates={x_train:x_train[randix]})
104 | 
105 |     statuses = {}
106 |     statuses['recon_train'] = []
107 |     statuses['adv_train'] = []
108 |     statuses['recon_test'] = []
109 |     statuses['adv_test'] = []
110 | 
111 |     n_train = x_train.get_value().shape[0]
112 |     n_test = x_test.get_value().shape[0]
113 | 
114 |     sum_recon_train = numpy.sum(numpy.array([f_recon_train(i) for i in xrange(n_train/batch_size)]))*batch_size
115 |     sum_adv_train = numpy.sum(numpy.array([f_adv_train(i) for i in xrange(n_train/batch_size)]))*batch_size
116 |     sum_recon_test = numpy.sum(numpy.array([f_recon_test(i) for i in xrange(n_test/batch_size)]))*batch_size
117 |     sum_adv_test = numpy.sum(numpy.array([f_adv_test(i) for i in xrange(n_test/batch_size)]))*batch_size
118 |     statuses['recon_train'].append(sum_recon_train/n_train)
119 |     statuses['adv_train'].append(sum_adv_train/n_train)
120 |     statuses['recon_test'].append(sum_recon_test/n_test)
121 |     statuses['adv_test'].append(sum_adv_test/n_test)
122 |     print "[Epoch]",str(-1)
123 |     print  "recon_train : " , statuses['recon_train'][-1], "adv_train : ", statuses['adv_train'][-1], \
124 |             "recon_test : " , statuses['recon_test'][-1],  "adv_test : ", statuses['adv_test'][-1]
125 | 
126 |     z = model.encode_test(input=x)
127 |     f_enc = theano.function(inputs=[],outputs=z,givens={x:dataset[1][0]})
128 |     def plot_latent_variable(epoch):
129 |         output = f_enc()
130 |         plt.figure(figsize=(8,8))
131 |         color=cm.rainbow(numpy.linspace(0,1,10))
132 |         for l,c in zip(range(10),color):
133 |             ix = numpy.where(dataset[1][1].get_value()==l)[0]
134 |             plt.scatter(output[ix,0],output[ix,1],c=c,label=l,s=8,linewidth=0)
135 |         plt.xlim([-5.0,5.0])
136 |         plt.ylim([-5.0,5.0])
137 |         plt.legend(fontsize=15)
138 |         plt.savefig('z_epoch' + str(epoch) + '.pdf')
139 | 
140 |     print "training..."
141 |     make_sure_path_exists("./trained_model")
142 | 
143 |     for epoch in xrange(num_epochs):
144 |         cPickle.dump((model,statuses),open('./trained_model/'+'tmp-' + save_filename,'wb'),cPickle.HIGHEST_PROTOCOL)
145 |         f_permute_train_set()
146 |         ### update parameters ###
147 |         for i in xrange(n_train/batch_size):
148 |             ### Optimize model and discriminator alternately ###
149 |             f_training_discriminator(i)
150 |             f_training_model(i)
151 |         #########################
152 | 
153 |         if(latent_dim == 2):
154 |             plot_latent_variable(epoch=epoch)
155 | 
156 |         sum_recon_train = numpy.sum(numpy.array([f_recon_train(i) for i in xrange(n_train/batch_size)]))*batch_size
157 |         sum_adv_train = numpy.sum(numpy.array([f_adv_train(i) for i in xrange(n_train/batch_size)]))*batch_size
158 |         sum_recon_test = numpy.sum(numpy.array([f_recon_test(i) for i in xrange(n_test/batch_size)]))*batch_size
159 |         sum_adv_test = numpy.sum(numpy.array([f_adv_test(i) for i in xrange(n_test/batch_size)]))*batch_size
160 |         statuses['recon_train'].append(sum_recon_train/n_train)
161 |         statuses['adv_train'].append(sum_adv_train/n_train)
162 |         statuses['recon_test'].append(sum_recon_test/n_test)
163 |         statuses['adv_test'].append(sum_adv_test/n_test)
164 |         print "[Epoch]",str(epoch)
165 |         print  "recon_train : " , statuses['recon_train'][-1], "adv_train : ", statuses['adv_train'][-1], \
166 |                 "recon_test : " , statuses['recon_test'][-1],  "adv_test : ", statuses['adv_test'][-1]
167 | 
168 |         f_lr_decay_recon()
169 |         f_lr_decay_adv()
170 | 
171 |     make_sure_path_exists("./trained_model")
172 |     cPickle.dump((model,statuses),open('./trained_model/'+save_filename,'wb'),cPickle.HIGHEST_PROTOCOL)
173 |     return model,statuses
174 | 
175 | if __name__=='__main__':
176 |     train()
177 | 


--------------------------------------------------------------------------------