├── README.md ├── __init__.py ├── adv_auto_mnist.py ├── dataset └── download_mnist.sh ├── figure ├── x_gaussian_8.png └── z_uniform_2.png ├── load_data.py ├── models ├── __init__.py └── adv_autoencoder.py ├── source ├── __init__.py ├── costs │ ├── __init__.py │ ├── adversarial_autoenc_loss.py │ ├── binary_cross_entropy_loss.py │ ├── cross_entropy_loss.py │ ├── quadratic_loss.py │ └── weight_decay.py ├── layers │ ├── __init__.py │ ├── batch_normalization.py │ ├── layer.py │ ├── linear.py │ ├── lrelu.py │ ├── relu.py │ ├── sigmoid.py │ ├── softmax.py │ └── softplus.py └── optimizers │ ├── __init__.py │ ├── adagrad.py │ ├── adam.py │ ├── momentum_sgd.py │ ├── optimizer.py │ └── sgd.py ├── train.py └── train_adv_autoenc.ipynb /README.md: -------------------------------------------------------------------------------- 1 | #Implementation of Adversarial Autoenocder (arXiv : http://arxiv.org/abs/1511.05644) 2 | 3 | Python code for training Adversarial Autoenocder with Theano. 4 | 5 | You can train example models of Adversarial Autoencoder on MNIST on the ipython notebook. 6 | 7 | See https://github.com/takerum/adversarial_autoencoder/blob/master/train_adv_autoenc.ipynb . 8 | 9 | - The hidden code z of the trained adversarial autoencoder with 2-D uniform prior distribution. 10 | 11 | 12 | 13 | - The input x sampled from the trained adversarial autoencoder with 8-D gaussian prior distribution. 14 | 15 | 16 | 17 | ##Required libraries: 18 | python 2.7, theano 0.7.0 19 | 20 | ##References: 21 | [1] Alireza Makhzani, Jonathon Shlens, Navdeep Jaitly and Ian Goodfellow. Adversarial Autoenocders. arXiv preprint (http://arxiv.org/abs/1511.05644). 22 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | __author__ = 'TakeruMiyato' 3 | 4 | import source 5 | import models -------------------------------------------------------------------------------- /adv_auto_mnist.py: -------------------------------------------------------------------------------- 1 | from models.adv_autoencoder import AdversarialAutoencoder 2 | import source.layers as L 3 | from theano.tensor.shared_randomstreams import RandomStreams 4 | import numpy,theano 5 | import theano.tensor as T 6 | 7 | def get_normalized_vector(v): 8 | v = v / (1e-20 + T.max(T.abs_(v), axis=1, keepdims=True)) 9 | v_2 = T.sum(v**2,axis=1,keepdims=True) 10 | return v / T.sqrt(1e-6+v_2) 11 | 12 | 13 | class AdversarialAutoencoderMNIST(AdversarialAutoencoder): 14 | 15 | def __init__(self,latent_dim=2,z_prior='gaussian'): 16 | 17 | self.z_prior = z_prior 18 | 19 | self.enc_l1 = L.Linear((784,1000)) 20 | self.enc_b1 = L.BatchNormalization(1000) 21 | self.enc_l2 = L.Linear((1000,1000)) 22 | self.enc_b2 = L.BatchNormalization(1000) 23 | self.enc_l3 = L.Linear((1000,latent_dim)) 24 | self.enc_b3 = L.BatchNormalization(latent_dim) 25 | 26 | self.dec_l1 = L.Linear((latent_dim,1000)) 27 | self.dec_b1 = L.BatchNormalization(1000) 28 | self.dec_l2 = L.Linear((1000,1000)) 29 | self.dec_b2 = L.BatchNormalization(1000) 30 | self.dec_l3 = L.Linear((1000,784)) 31 | 32 | self.D_l1 = L.Linear((latent_dim,500)) 33 | self.D_b1 = L.BatchNormalization(500) 34 | self.D_l2 = L.Linear((500,500)) 35 | self.D_b2 = L.BatchNormalization(500) 36 | self.D_l3 = L.Linear((500,1)) 37 | 38 | self.model_params = self.enc_l1.params + self.enc_l2.params + self.enc_l3.params \ 39 | + self.dec_l1.params + self.dec_l2.params + self.dec_l3.params \ 40 | + self.enc_b1.params + self.enc_b2.params + self.enc_b3.params \ 41 | + self.dec_b1.params + self.dec_b2.params 42 | self.D_params = self.D_l1.params + self.D_l2.params + self.D_l3.params 43 | self.rng = RandomStreams(seed=numpy.random.randint(1234)) 44 | 45 | 46 | def encode(self,input,train=True): 47 | h = input 48 | h = self.enc_l1(h) 49 | h = self.enc_b1(h,train=train) 50 | h = L.relu(h) 51 | h = self.enc_l2(h) 52 | h = self.enc_b2(h,train=train) 53 | h = L.relu(h) 54 | h = self.enc_l3(h) 55 | h = self.enc_b3(h,train=train) 56 | return h 57 | 58 | def decode(self,input,train=True): 59 | h = input 60 | h = self.dec_l1(h) 61 | h = self.dec_b1(h,train=train) 62 | h = L.relu(h) 63 | h = self.dec_l2(h) 64 | h = self.dec_b2(h,train=train) 65 | h = L.relu(h) 66 | h = self.dec_l3(h) 67 | h = L.sigmoid(h) 68 | return h 69 | 70 | def D(self,input,train=True): 71 | h = input 72 | h = self.D_l1(h) 73 | h = L.relu(h) 74 | h = self.D_l2(h) 75 | h = L.relu(h) 76 | h = self.D_l3(h) 77 | h = L.sigmoid(h) 78 | return h 79 | 80 | def sample_from_prior(self,z): 81 | 82 | ###### gausssian ####### 83 | if(self.z_prior is 'gaussian'): 84 | return 1.0*self.rng.normal(size=z.shape,dtype=theano.config.floatX) 85 | 86 | ###### uniform ######## 87 | elif(self.z_prior is 'uniform'): 88 | v = get_normalized_vector(self.rng.normal(size=z.shape,dtype=theano.config.floatX)) 89 | r = T.power(self.rng.uniform(size=z.sum(axis=1,keepdims=True).shape,low=0,high=1.0,dtype=theano.config.floatX),1./z.shape[1]) 90 | r = T.patternbroadcast(r,[False,True]) 91 | return 2.0*r*v 92 | 93 | else: 94 | raise NotImplementedError() 95 | 96 | -------------------------------------------------------------------------------- /dataset/download_mnist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | wget -c http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz 3 | gzip -d mnist.pkl.gz -------------------------------------------------------------------------------- /figure/x_gaussian_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/takerum/adversarial_autoencoder/0af027e8cfa1ec90011c58edec44bca4a2b95117/figure/x_gaussian_8.png -------------------------------------------------------------------------------- /figure/z_uniform_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/takerum/adversarial_autoencoder/0af027e8cfa1ec90011c58edec44bca4a2b95117/figure/z_uniform_2.png -------------------------------------------------------------------------------- /load_data.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import cPickle 3 | import os 4 | import theano 5 | 6 | def load_mnist_dataset(): 7 | dataset = cPickle.load(open('dataset/mnist.pkl','rb')) 8 | train_set_x = numpy.concatenate((dataset[0][0],dataset[1][0]),axis=0) 9 | train_set_y = numpy.concatenate((dataset[0][1],dataset[1][1]),axis=0) 10 | return ((train_set_x,train_set_y),(dataset[2][0],dataset[2][1])) 11 | 12 | def _shared_dataset(data_xy): 13 | data_x, data_y = data_xy 14 | shared_x = theano.shared(numpy.asarray(data_x, 15 | dtype=theano.config.floatX), borrow=True) 16 | shared_y = theano.shared(numpy.asarray(data_y, 17 | dtype='int32'), borrow=True) 18 | return shared_x, shared_y 19 | 20 | def load_mnist_full(): 21 | dataset = load_mnist_dataset() 22 | 23 | train_set_x, train_set_y = dataset[0] 24 | test_set_x, test_set_y = dataset[1] 25 | 26 | train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y)) 27 | test_set_x, test_set_y = _shared_dataset((test_set_x, test_set_y)) 28 | 29 | return [(train_set_x, train_set_y), (test_set_x, test_set_y)] 30 | 31 | def load_mnist_for_validation(n_v = 10000): 32 | dataset = load_mnist_dataset() 33 | 34 | train_set_x, train_set_y = dataset[0] 35 | 36 | randix = numpy.random.permutation(train_set_x.shape[0]) 37 | 38 | valid_set_x = train_set_x[randix[:n_v]] 39 | valid_set_y = train_set_y[randix[:n_v]] 40 | train_set_x = train_set_x[randix[n_v:]] 41 | train_set_y = train_set_y[randix[n_v:]] 42 | 43 | train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y)) 44 | valid_set_x, valid_set_y = _shared_dataset((valid_set_x, valid_set_y)) 45 | 46 | return [(train_set_x, train_set_y), (valid_set_x, valid_set_y)] 47 | 48 | def load_mnist_for_semi_sup(n_l=1000, n_v=1000): 49 | dataset = load_mnist_dataset() 50 | 51 | _train_set_x, _train_set_y = dataset[0] 52 | 53 | rand_ind = numpy.random.permutation(_train_set_x.shape[0]) 54 | _train_set_x = _train_set_x[rand_ind] 55 | _train_set_y = _train_set_y[rand_ind] 56 | 57 | s_c = n_l / 10.0 58 | train_set_x = numpy.zeros((n_l, 28 ** 2)) 59 | train_set_y = numpy.zeros(n_l) 60 | for i in xrange(10): 61 | ind = numpy.where(_train_set_y == i)[0] 62 | train_set_x[i * s_c:(i + 1) * s_c, :] = _train_set_x[ind[0:s_c], :] 63 | train_set_y[i * s_c:(i + 1) * s_c] = _train_set_y[ind[0:s_c]] 64 | _train_set_x = numpy.delete(_train_set_x, ind[0:s_c], 0) 65 | _train_set_y = numpy.delete(_train_set_y, ind[0:s_c]) 66 | 67 | print rand_ind 68 | rand_ind = numpy.random.permutation(train_set_x.shape[0]) 69 | train_set_x = train_set_x[rand_ind] 70 | train_set_y = train_set_y[rand_ind] 71 | valid_set_x = _train_set_x[:n_v] 72 | valid_set_y = _train_set_y[:n_v] 73 | # ul_train_set_x = _train_set_x[n_v:] 74 | train_set_ul_x = numpy.concatenate((train_set_x, _train_set_x[n_v:]), axis=0) 75 | train_set_ul_x = train_set_ul_x[numpy.random.permutation(train_set_ul_x.shape[0])] 76 | ul_train_set_y = _train_set_y[n_v:] # dummy 77 | 78 | train_set_x, train_set_y = _shared_dataset((train_set_x, train_set_y)) 79 | train_set_ul_x, ul_train_set_y = _shared_dataset((train_set_ul_x, ul_train_set_y)) 80 | valid_set_x, valid_set_y = _shared_dataset((valid_set_x, valid_set_y)) 81 | 82 | return [(train_set_x, train_set_y, train_set_ul_x), (valid_set_x, valid_set_y)] 83 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from adversarial_autoencoder.source.models import adv_autoencoder 2 | 3 | AdversarialAutoenocder = adv_autoencoder.AdversarialAutoencoder -------------------------------------------------------------------------------- /models/adv_autoencoder.py: -------------------------------------------------------------------------------- 1 | 2 | class AdversarialAutoencoder(object): 3 | 4 | def __init__(self): 5 | self.model_params=None 6 | self.D_params=None 7 | raise NotImplementedError() 8 | 9 | ##### define encoder function ##### 10 | def encode_train(self,input): 11 | return self.encode(input=input,train=True) 12 | def encode_test(self,input): 13 | return self.encode(input=input,train=False) 14 | 15 | def encode(self,input,train=True): 16 | raise NotImplementedError() 17 | 18 | ##### define decoder function ##### 19 | def decode_train(self,input): 20 | return self.decode(input=input,train=True) 21 | def decode_test(self,input): 22 | return self.decode(input=input,train=False) 23 | 24 | def decode(self,input,train=True): 25 | raise NotImplementedError() 26 | 27 | ##### define discriminator function ##### 28 | def D_train(self,input): 29 | return self.D(input=input,train=True) 30 | def D_test(self,input): 31 | return self.D(input=input,train=False) 32 | 33 | def D(self,input,train=True): 34 | raise NotImplementedError() 35 | -------------------------------------------------------------------------------- /source/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | __author__ = 'TakeruMiyato' 3 | 4 | import layers 5 | import costs 6 | import optimizers 7 | -------------------------------------------------------------------------------- /source/costs/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import cross_entropy_loss 3 | import quadratic_loss 4 | import weight_decay 5 | import binary_cross_entropy_loss 6 | import adversarial_autoenc_loss 7 | 8 | cross_entropy_loss = cross_entropy_loss.cross_entropy_loss 9 | binary_cross_entropy_loss = binary_cross_entropy_loss.binary_cross_entropy_loss 10 | quadratic_loss = quadratic_loss.quadratic_loss 11 | weight_decay = weight_decay.weight_decay 12 | adversarial_autoenc_loss = adversarial_autoenc_loss.adversarial_autoenc_loss 13 | -------------------------------------------------------------------------------- /source/costs/adversarial_autoenc_loss.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy 4 | from binary_cross_entropy_loss import _binary_cross_entropy_loss 5 | from quadratic_loss import _quadratic_loss 6 | 7 | def adversarial_autoenc_loss(x,enc_f,dec_f,disc_f,p_z_sampler, 8 | obj_type, 9 | lamb=numpy.asarray(1.0,dtype=theano.config.floatX)): 10 | 11 | z_q = enc_f(x) 12 | z_p = p_z_sampler(z_q) 13 | 14 | adv_loss = adversarial_loss(z_q=z_q , z_p=z_p ,disc_f=disc_f) 15 | recon_loss = reconstruction_loss(x=x ,z=z_q ,dec_f=dec_f ,obj_type=obj_type) 16 | 17 | return recon_loss - lamb*adv_loss, recon_loss, adv_loss 18 | 19 | 20 | def adversarial_loss(z_p,z_q,disc_f): 21 | y_q = disc_f(z_q) 22 | y_p = disc_f(z_p) 23 | return -T.mean(T.log(y_p) + (T.log(1-y_q))) 24 | 25 | def reconstruction_loss(x,z,dec_f,obj_type='QE'): 26 | x_ = dec_f(z) 27 | if obj_type == 'QE': 28 | return _quadratic_loss(x_,x) 29 | elif obj_type == 'CE': 30 | return _binary_cross_entropy_loss(x_,x) 31 | 32 | -------------------------------------------------------------------------------- /source/costs/binary_cross_entropy_loss.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as T 2 | 3 | def binary_cross_entropy_loss(x,t,forward_func): 4 | print "costs/binary_cross_entropy_loss" 5 | y = forward_func(x) 6 | return _binary_cross_entropy_loss(y,t) 7 | 8 | def _binary_cross_entropy_loss(y,t): 9 | return -T.mean(T.sum(t*T.log(y) + (1-t)*T.log(1-y),axis=1)) 10 | -------------------------------------------------------------------------------- /source/costs/cross_entropy_loss.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as T 2 | 3 | def cross_entropy_loss(x,t,forward_func): 4 | print "costs/cross_entropy_loss" 5 | y = forward_func(x) 6 | return _cross_entropy_loss(y,t) 7 | 8 | def _cross_entropy_loss(y,t): 9 | if(t.ndim==1): 10 | return -T.mean(T.log(y)[T.arange(t.shape[0]), t]) 11 | elif(t.ndim==2): 12 | return -T.mean(T.sum(t*T.log(y),axis=1)) 13 | -------------------------------------------------------------------------------- /source/costs/quadratic_loss.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as T 2 | 3 | def quadratic_loss(x,t,forward_func): 4 | print "costs/quadratic_loss" 5 | y = forward_func(x) 6 | return _quadratic_loss(y,t) 7 | 8 | def _quadratic_loss(y,t): 9 | return T.mean(T.sum((y-t)**2,axis=1)) 10 | -------------------------------------------------------------------------------- /source/costs/weight_decay.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano 3 | import theano.tensor as T 4 | 5 | 6 | def weight_decay(params,coeff): 7 | print "costs/weight_decay" 8 | cost = 0 9 | for param in params: 10 | cost += T.sum(param**2) 11 | return theano.shared(numpy.array(coeff).astype(theano.config.floatX))*cost 12 | -------------------------------------------------------------------------------- /source/layers/__init__.py: -------------------------------------------------------------------------------- 1 | import relu,lrelu 2 | import linear 3 | import sigmoid 4 | import softmax 5 | import batch_normalization 6 | 7 | Linear = linear.Linear 8 | BatchNormalization = batch_normalization.BatchNormalization 9 | 10 | relu = relu.relu 11 | lrelu = lrelu.lrelu 12 | sigmoid = sigmoid.sigmoid 13 | softmax = softmax.softmax 14 | 15 | -------------------------------------------------------------------------------- /source/layers/batch_normalization.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy 4 | 5 | from layer import Layer 6 | 7 | class BatchNormalization(Layer): 8 | 9 | def __init__(self,size,moving_avg_ratio=0.9,initial_gamma=None,initial_beta=None): 10 | 11 | self.params = [] 12 | self.moving_avg_ratio = theano.shared(numpy.array(moving_avg_ratio).astype(theano.config.floatX)) 13 | self.finetune_N = theano.shared(0) 14 | if(initial_gamma != None): 15 | assert initial_gamma.shape == (size,) or initial_gamma.shape == (1,size,1) 16 | gamma_values = initial_gamma.reshape((1,size,1)) 17 | else: 18 | gamma_values = numpy.ones(shape=(1,size,1),dtype=theano.config.floatX) 19 | self.gamma = theano.shared(gamma_values) 20 | self.params.append(self.gamma) 21 | 22 | if(initial_beta != None): 23 | assert initial_beta.shape == (size,) or initial_beta.shape == (1,size,1) 24 | beta_values = initial_beta.reshape((1,size,1)) 25 | else: 26 | beta_values = numpy.zeros(shape=(1,size,1),dtype=theano.config.floatX) 27 | self.beta = theano.shared(beta_values) 28 | self.params.append(self.beta) 29 | 30 | est_var_values = numpy.ones((1,size,1),dtype=theano.config.floatX) 31 | est_mean_values = numpy.zeros((1,size,1),dtype=theano.config.floatX) 32 | self.est_var = theano.shared(est_var_values) 33 | self.est_mean = theano.shared(est_mean_values) 34 | 35 | def __call__(self,inputs,train=True,update_batch_stat=True,finetune=False): 36 | return self.forward(inputs,train=train,update_batch_stat=update_batch_stat,finetune=finetune) 37 | 38 | def forward(self,input_org,train=True,update_batch_stat=True,finetune=False): 39 | ldim,cdim,rdim = self._internal_shape(input_org) 40 | input = input_org.reshape((ldim,cdim,rdim)) 41 | if (train): 42 | mean = T.mean(input, axis=(0, 2), keepdims=True ) 43 | var = T.mean((input-mean)**2, axis=(0, 2), keepdims=True) 44 | 45 | if(update_batch_stat): 46 | finetune_N = theano.clone(self.finetune_N, share_inputs=False) 47 | if(finetune): 48 | finetune_N.default_update = finetune_N+1 49 | ratio = T.cast(1-1.0/(finetune_N+1),theano.config.floatX) 50 | else: 51 | finetune_N.default_update = 0 52 | ratio = self.moving_avg_ratio 53 | m = ldim*rdim 54 | scale = T.cast(m/(m-1.0),theano.config.floatX) 55 | est_mean = theano.clone(self.est_mean, share_inputs=False) 56 | est_var = theano.clone(self.est_var, share_inputs=False) 57 | est_mean.default_update = T.cast(ratio*self.est_mean + (1-ratio)*mean,theano.config.floatX) 58 | est_var.default_update = T.cast(ratio*self.est_var + (1-ratio)*scale*var,theano.config.floatX) 59 | mean += 0 * est_mean 60 | var += 0 * est_var 61 | #self.normalized_input = (input - self._pbc(mean))/T.sqrt(1e-6+self._pbc(var)) 62 | #output = self._pbc(self.gamma) * self.normalized_input + self._pbc(self.beta) 63 | output = self._pbc (self.gamma) * (input - self._pbc(mean)) \ 64 | / T.sqrt(1e-6+self._pbc(var)) + self._pbc(self.beta) 65 | 66 | else: 67 | output = self._pbc(self.gamma) * (input - self._pbc(self.est_mean)) \ 68 | / T.sqrt(1e-6+self._pbc(self.est_var)) + self._pbc(self.beta) 69 | 70 | return output.reshape(input_org.shape) 71 | 72 | def _pbc(self,x): 73 | return T.patternbroadcast(x,(True,False,True)) 74 | 75 | def _internal_shape(self, x): 76 | ldim = x.shape[0] 77 | cdim = self.gamma.size 78 | rdim = x.size // (ldim * cdim) 79 | return ldim, cdim, rdim 80 | -------------------------------------------------------------------------------- /source/layers/layer.py: -------------------------------------------------------------------------------- 1 | 2 | class Layer(object): 3 | 4 | def __call__(self, input): 5 | return self.forward(input) 6 | 7 | def forward(self,input): 8 | raise NotImplementedError() 9 | -------------------------------------------------------------------------------- /source/layers/linear.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy 4 | 5 | from layer import Layer 6 | 7 | class Linear(Layer): 8 | 9 | def __init__(self,size,use_bias=True,initial_W=None,initial_b=None): 10 | self.use_bias = use_bias 11 | self.params = [] 12 | 13 | if(initial_W is not None): 14 | assert initial_W.shape == size 15 | W_values = initial_W 16 | else: 17 | W_values = numpy.random.normal(0, numpy.sqrt(1. / size[0]), size=size).astype(theano.config.floatX) 18 | self.W = theano.shared(W_values) 19 | self.params.append(self.W) 20 | 21 | if(self.use_bias == True): 22 | if(initial_b is not None): 23 | assert initial_b.shape == size[1] 24 | b_values = initial_b 25 | else: 26 | b_values = numpy.zeros((size[1],)).astype(theano.config.floatX) 27 | self.b = theano.shared(b_values) 28 | self.params.append(self.b) 29 | 30 | def forward(self,input): 31 | input = self._as_mat(input) 32 | output = T.dot(input, self.W) 33 | if(self.use_bias == True): 34 | output += self.b 35 | 36 | return output 37 | 38 | def _as_mat(self,x): 39 | return x.reshape((x.shape[0],x.size//x.shape[0])) 40 | -------------------------------------------------------------------------------- /source/layers/lrelu.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor as T 3 | import numpy 4 | from layer import Layer 5 | 6 | 7 | class LReLU(Layer): 8 | 9 | def __init__(self,slope): 10 | self.slope = theano.shared(numpy.asarray(slope,theano.config.floatX)) 11 | 12 | def forward(self,x): 13 | return T.maximum(self.slope*x, x) 14 | 15 | def lrelu(x,slope=0.1): 16 | return LReLU(slope)(x) -------------------------------------------------------------------------------- /source/layers/relu.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as T 2 | from layer import Layer 3 | 4 | class ReLU(Layer): 5 | 6 | def forward(self,x): 7 | return T.maximum(0.0, x) 8 | 9 | def relu(x): 10 | return ReLU()(x) -------------------------------------------------------------------------------- /source/layers/sigmoid.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as T 2 | from layer import Layer 3 | 4 | class Sigmoid(Layer): 5 | 6 | def forward(self,x): 7 | return T.nnet.sigmoid(x) 8 | 9 | def sigmoid(x): 10 | return Sigmoid()(x) -------------------------------------------------------------------------------- /source/layers/softmax.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as T 2 | from layer import Layer 3 | 4 | class Softmax(Layer): 5 | 6 | def __init__(self,stable): 7 | self.stable = stable 8 | 9 | def forward(self,x): 10 | if(self.stable): 11 | x -= x.max(axis=1,keepdims=True) 12 | e_x = T.exp(x) 13 | out = e_x / e_x.sum(axis=1, keepdims=True) 14 | return out 15 | 16 | def softmax(x,stable=False): 17 | return Softmax(stable=stable)(x) 18 | 19 | -------------------------------------------------------------------------------- /source/layers/softplus.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as T 2 | from layer import Layer 3 | 4 | class Softplus(Layer): 5 | 6 | def forward(self,x): 7 | return T.nnet.softplus(x) 8 | 9 | def softplus(x): 10 | return Softplus()(x) -------------------------------------------------------------------------------- /source/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | import sgd 2 | import momentum_sgd 3 | import adagrad 4 | import adam 5 | 6 | SGD = sgd.SGD 7 | MomentumSGD = momentum_sgd.MomentumSGD 8 | AdaGrad = adagrad.AdaGrad 9 | ADAM = adam.ADAM -------------------------------------------------------------------------------- /source/optimizers/adagrad.py: -------------------------------------------------------------------------------- 1 | from optimizer import Optimizer 2 | from collections import OrderedDict 3 | import theano 4 | import theano.tensor as T 5 | import numpy 6 | 7 | class AdaGrad(Optimizer): 8 | def __init__(self,cost,params,lr=0.1): 9 | self.lr = theano.shared(numpy.array(lr).astype(theano.config.floatX)) 10 | super(AdaGrad,self).__init__(cost,params) 11 | 12 | def _updates(self): 13 | updates = OrderedDict() 14 | g_model_params = [] 15 | model_adg_rates = [] 16 | for param in self.params: 17 | gparam = T.grad(self.cost,wrt=param) 18 | g_model_params.append(gparam) 19 | adg_rate= theano.shared(numpy.ones(param.get_value(borrow=True).shape,dtype=theano.config.floatX)) 20 | model_adg_rates.append(adg_rate) 21 | 22 | for param, gparam,adg_rate in zip(self.params, g_model_params,model_adg_rates): 23 | updates[adg_rate] = adg_rate + gparam*gparam 24 | stepped_param = param - (self.lr/T.sqrt(updates[adg_rate]))*gparam 25 | updates[param] = stepped_param 26 | 27 | return updates 28 | -------------------------------------------------------------------------------- /source/optimizers/adam.py: -------------------------------------------------------------------------------- 1 | from optimizer import Optimizer 2 | from collections import OrderedDict 3 | import theano 4 | import theano.tensor as T 5 | import numpy 6 | 7 | class ADAM(Optimizer): 8 | def __init__(self,cost,params,alpha=0.001): 9 | self.alpha = theano.shared(numpy.array(alpha).astype(theano.config.floatX)) 10 | super(ADAM,self).__init__(cost,params) 11 | 12 | def _updates(self): 13 | updates = OrderedDict() 14 | t = theano.shared(numpy.array(1).astype(theano.config.floatX)) 15 | alpha = self.alpha 16 | beta_1 = numpy.array(0.9).astype(theano.config.floatX) 17 | beta_2 = numpy.array(0.999).astype(theano.config.floatX) 18 | epsilon = numpy.array(1.0*10**-8.0).astype(theano.config.floatX) 19 | lam = numpy.array(1.0-1.0*10**-8.0).astype(theano.config.floatX) 20 | g_model_params = [] 21 | models_m = [] 22 | models_v = [] 23 | for param in self.params: 24 | gparam = T.grad(self.cost, wrt=param) 25 | g_model_params.append(gparam) 26 | m = theano.shared(numpy.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) 27 | v = theano.shared(numpy.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) 28 | models_m.append(m) 29 | models_v.append(v) 30 | for param, gparam, m, v in zip(self.params, g_model_params, models_m, models_v): 31 | beta_1_t = T.cast(beta_1 * lam ** (t - 1), theano.config.floatX) 32 | updates[m] = T.cast(beta_1_t * m + (1 - beta_1_t) * gparam,theano.config.floatX) 33 | updates[v] = T.cast(beta_2 * v + (1 - beta_2) * (gparam * gparam),theano.config.floatX) 34 | m_hat = T.cast(updates[m] / (1 - beta_1 ** t), theano.config.floatX) 35 | v_hat = T.cast(updates[v] / (1 - beta_2 ** t), theano.config.floatX) 36 | updates[param] = param - alpha * m_hat / (T.sqrt(v_hat) + epsilon) 37 | updates[t] = t + 1 38 | return updates 39 | -------------------------------------------------------------------------------- /source/optimizers/momentum_sgd.py: -------------------------------------------------------------------------------- 1 | from optimizer import Optimizer 2 | from collections import OrderedDict 3 | import theano 4 | import theano.tensor as T 5 | import numpy 6 | 7 | class MomentumSGD(Optimizer): 8 | def __init__(self,cost,params,lr=0.1,momentum_ratio=0.9): 9 | self.lr = theano.shared(numpy.array(lr).astype(theano.config.floatX)) 10 | self.ratio = theano.shared(numpy.array(momentum_ratio).astype(theano.config.floatX)) 11 | super(MomentumSGD,self).__init__(cost,params) 12 | 13 | def _updates(self): 14 | updates = OrderedDict() 15 | g_model_params = [] 16 | g_model_params_mom = [] 17 | for param in self.params: 18 | gparam = T.grad(self.cost,wrt=param) 19 | g_model_params.append(gparam) 20 | gparam_mom = theano.shared(numpy.zeros(param.get_value(borrow=True).shape,dtype=theano.config.floatX)) 21 | g_model_params_mom.append(gparam_mom) 22 | 23 | for param, gparam_mom, gparam in zip(self.params, g_model_params_mom, g_model_params): 24 | updates[gparam_mom] = self.ratio * gparam_mom + (1. - self.ratio) * self.lr * gparam 25 | updates[param] = param-updates[gparam_mom] 26 | 27 | return updates 28 | -------------------------------------------------------------------------------- /source/optimizers/optimizer.py: -------------------------------------------------------------------------------- 1 | import theano 2 | import theano.tensor 3 | 4 | class Optimizer(object): 5 | 6 | def __init__(self,cost,params): 7 | self.cost = cost 8 | self.params = params 9 | self.updates = self._updates() 10 | 11 | def _updates(self): 12 | raise NotImplementedError() -------------------------------------------------------------------------------- /source/optimizers/sgd.py: -------------------------------------------------------------------------------- 1 | from optimizer import Optimizer 2 | from collections import OrderedDict 3 | import theano 4 | import theano.tensor as T 5 | import numpy 6 | 7 | class SGD(Optimizer): 8 | def __init__(self,cost,params,lr=0.1): 9 | self.lr = theano.shared(numpy.array(lr).astype(theano.config.floatX)) 10 | super(SGD,self).__init__(cost,params) 11 | 12 | def _updates(self): 13 | updates = OrderedDict() 14 | for param in self.params: 15 | gparam = T.grad(self.cost, wrt=param) 16 | updates[param] = param - self.lr*gparam 17 | return updates 18 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from source import optimizers,costs 4 | import numpy 5 | import theano 6 | import theano.tensor as T 7 | from theano.tensor.shared_randomstreams import RandomStreams 8 | import cPickle 9 | import load_data 10 | from adv_auto_mnist import AdversarialAutoencoderMNIST 11 | 12 | import time 13 | import matplotlib.pyplot as plt 14 | from matplotlib.pyplot import cm 15 | 16 | import os 17 | import errno 18 | 19 | def make_sure_path_exists(path): 20 | try: 21 | os.makedirs(path) 22 | except OSError as exception: 23 | if exception.errno != errno.EEXIST: 24 | raise 25 | 26 | 27 | 28 | def train(latent_dim = 2, #dimension of latent variable z 29 | z_prior = 'gaussian', # 'gaussian' or 'uniform' 30 | lamb = 10., #ratio between reconstruction and adversarial cost 31 | recon_obj_type = 'CE', #objective function on reconstruction ( 'CE'(cross ent.) or 'QE'(quadratic error) ) 32 | initlal_learning_rate = 0.002, 33 | learning_rate_decay=1.0, 34 | num_epochs=50, 35 | batch_size=100, 36 | save_filename='trained_model', 37 | seed=1): 38 | 39 | 40 | numpy.random.seed(seed=seed) 41 | 42 | dataset = load_data.load_mnist_full() 43 | 44 | x_train,_ = dataset[0] 45 | x_test,_ = dataset[1] 46 | 47 | model = AdversarialAutoencoderMNIST(latent_dim=latent_dim,z_prior=z_prior) 48 | 49 | x = T.matrix() 50 | 51 | loss_for_training,_,adv_loss_for_training = costs.adversarial_autoenc_loss(x=x, 52 | enc_f=model.encode_train, 53 | dec_f=model.decode_train, 54 | disc_f=model.D_train, 55 | p_z_sampler=model.sample_from_prior, 56 | obj_type=recon_obj_type, 57 | lamb=numpy.asarray(lamb,dtype=theano.config.floatX)) 58 | 59 | _,recon_loss,adv_loss = costs.adversarial_autoenc_loss(x=x, 60 | enc_f=model.encode_test, 61 | dec_f=model.decode_test, 62 | disc_f=model.D_test, 63 | p_z_sampler=model.sample_from_prior, 64 | obj_type=recon_obj_type, 65 | lamb=numpy.asarray(lamb,dtype=theano.config.floatX)) 66 | 67 | optimizer_recon = optimizers.ADAM(cost=loss_for_training, 68 | params=model.model_params, 69 | alpha=numpy.asarray(initlal_learning_rate,dtype=theano.config.floatX)) 70 | optimizer_adv = optimizers.ADAM(cost=adv_loss_for_training, 71 | params=model.D_params, 72 | alpha=numpy.asarray(initlal_learning_rate,dtype=theano.config.floatX)) 73 | 74 | index = T.iscalar() 75 | 76 | f_training_model = theano.function(inputs=[index], outputs=loss_for_training, updates=optimizer_recon.updates, 77 | givens={ 78 | x:x_train[batch_size*index:batch_size*(index+1)]}) 79 | f_training_discriminator = theano.function(inputs=[index], outputs=adv_loss_for_training, updates=optimizer_adv.updates, 80 | givens={ 81 | x:x_train[batch_size*index:batch_size*(index+1)]}) 82 | 83 | 84 | f_recon_train = theano.function(inputs=[index], outputs=recon_loss, 85 | givens={ 86 | x:x_train[batch_size*index:batch_size*(index+1)]}) 87 | f_adv_train = theano.function(inputs=[index], outputs=adv_loss, 88 | givens={ 89 | x:x_train[batch_size*index:batch_size*(index+1)]}) 90 | f_recon_test = theano.function(inputs=[index], outputs=recon_loss, 91 | givens={ 92 | x:x_test[batch_size*index:batch_size*(index+1)]}) 93 | f_adv_test = theano.function(inputs=[index], outputs=adv_loss, 94 | givens={ 95 | x:x_test[batch_size*index:batch_size*(index+1)]}) 96 | 97 | f_lr_decay_recon = theano.function(inputs=[],outputs=optimizer_recon.alpha, 98 | updates={optimizer_recon.alpha:theano.shared(numpy.array(learning_rate_decay).astype(theano.config.floatX))*optimizer_recon.alpha}) 99 | f_lr_decay_adv = theano.function(inputs=[],outputs=optimizer_adv.alpha, 100 | updates={optimizer_adv.alpha:theano.shared(numpy.array(learning_rate_decay).astype(theano.config.floatX))*optimizer_adv.alpha}) 101 | 102 | randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=x_train.shape[0]) 103 | f_permute_train_set = theano.function(inputs=[],outputs=x_train,updates={x_train:x_train[randix]}) 104 | 105 | statuses = {} 106 | statuses['recon_train'] = [] 107 | statuses['adv_train'] = [] 108 | statuses['recon_test'] = [] 109 | statuses['adv_test'] = [] 110 | 111 | n_train = x_train.get_value().shape[0] 112 | n_test = x_test.get_value().shape[0] 113 | 114 | sum_recon_train = numpy.sum(numpy.array([f_recon_train(i) for i in xrange(n_train/batch_size)]))*batch_size 115 | sum_adv_train = numpy.sum(numpy.array([f_adv_train(i) for i in xrange(n_train/batch_size)]))*batch_size 116 | sum_recon_test = numpy.sum(numpy.array([f_recon_test(i) for i in xrange(n_test/batch_size)]))*batch_size 117 | sum_adv_test = numpy.sum(numpy.array([f_adv_test(i) for i in xrange(n_test/batch_size)]))*batch_size 118 | statuses['recon_train'].append(sum_recon_train/n_train) 119 | statuses['adv_train'].append(sum_adv_train/n_train) 120 | statuses['recon_test'].append(sum_recon_test/n_test) 121 | statuses['adv_test'].append(sum_adv_test/n_test) 122 | print "[Epoch]",str(-1) 123 | print "recon_train : " , statuses['recon_train'][-1], "adv_train : ", statuses['adv_train'][-1], \ 124 | "recon_test : " , statuses['recon_test'][-1], "adv_test : ", statuses['adv_test'][-1] 125 | 126 | z = model.encode_test(input=x) 127 | f_enc = theano.function(inputs=[],outputs=z,givens={x:dataset[1][0]}) 128 | def plot_latent_variable(epoch): 129 | output = f_enc() 130 | plt.figure(figsize=(8,8)) 131 | color=cm.rainbow(numpy.linspace(0,1,10)) 132 | for l,c in zip(range(10),color): 133 | ix = numpy.where(dataset[1][1].get_value()==l)[0] 134 | plt.scatter(output[ix,0],output[ix,1],c=c,label=l,s=8,linewidth=0) 135 | plt.xlim([-5.0,5.0]) 136 | plt.ylim([-5.0,5.0]) 137 | plt.legend(fontsize=15) 138 | plt.savefig('z_epoch' + str(epoch) + '.pdf') 139 | 140 | print "training..." 141 | make_sure_path_exists("./trained_model") 142 | 143 | for epoch in xrange(num_epochs): 144 | cPickle.dump((model,statuses),open('./trained_model/'+'tmp-' + save_filename,'wb'),cPickle.HIGHEST_PROTOCOL) 145 | f_permute_train_set() 146 | ### update parameters ### 147 | for i in xrange(n_train/batch_size): 148 | ### Optimize model and discriminator alternately ### 149 | f_training_discriminator(i) 150 | f_training_model(i) 151 | ######################### 152 | 153 | if(latent_dim == 2): 154 | plot_latent_variable(epoch=epoch) 155 | 156 | sum_recon_train = numpy.sum(numpy.array([f_recon_train(i) for i in xrange(n_train/batch_size)]))*batch_size 157 | sum_adv_train = numpy.sum(numpy.array([f_adv_train(i) for i in xrange(n_train/batch_size)]))*batch_size 158 | sum_recon_test = numpy.sum(numpy.array([f_recon_test(i) for i in xrange(n_test/batch_size)]))*batch_size 159 | sum_adv_test = numpy.sum(numpy.array([f_adv_test(i) for i in xrange(n_test/batch_size)]))*batch_size 160 | statuses['recon_train'].append(sum_recon_train/n_train) 161 | statuses['adv_train'].append(sum_adv_train/n_train) 162 | statuses['recon_test'].append(sum_recon_test/n_test) 163 | statuses['adv_test'].append(sum_adv_test/n_test) 164 | print "[Epoch]",str(epoch) 165 | print "recon_train : " , statuses['recon_train'][-1], "adv_train : ", statuses['adv_train'][-1], \ 166 | "recon_test : " , statuses['recon_test'][-1], "adv_test : ", statuses['adv_test'][-1] 167 | 168 | f_lr_decay_recon() 169 | f_lr_decay_adv() 170 | 171 | make_sure_path_exists("./trained_model") 172 | cPickle.dump((model,statuses),open('./trained_model/'+save_filename,'wb'),cPickle.HIGHEST_PROTOCOL) 173 | return model,statuses 174 | 175 | if __name__=='__main__': 176 | train() 177 | --------------------------------------------------------------------------------