├── data
    ├── __init__.py
    ├── silhouettes
    │   ├── caltech101_silhouettes_28.mat
    │   └── caltech101_silhouettes_28_split1.mat
    ├── import_data_alphadigs.py
    ├── import_data_mnist.py
    └── data_preprocessing.py
├── models
    ├── __init__.py
    ├── autoencoder.py
    ├── loss_functions.py
    ├── vrmax.py
    ├── iwae.py
    └── vae.py
├── __init__.py
├── network
    ├── __init__.py
    ├── classifier.py
    ├── deterministic_layer.py
    ├── network.py
    └── stochastic_layer.py
├── prior
    ├── __init__.py
    ├── GMM.py
    ├── gaussian.py
    └── swiss_roll.py
├── visualization
    ├── samples.py
    ├── __init__.py
    ├── reconstruction.py
    └── utils.py
├── load_save.py
├── init_models.py
├── README.md
└── exp.py


/data/__init__.py:
--------------------------------------------------------------------------------
1 | import data_preprocessing
2 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | import loss_functions
2 | import vae
3 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | import network 
2 | import models
3 | import data
4 | import visualization
5 | import prior
6 | 


--------------------------------------------------------------------------------
/network/__init__.py:
--------------------------------------------------------------------------------
1 | import stochastic_layer
2 | import deterministic_layer
3 | import network
4 | import classifier
5 | 


--------------------------------------------------------------------------------
/data/silhouettes/caltech101_silhouettes_28.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YingzhenLi/vae_renyi_divergence/HEAD/data/silhouettes/caltech101_silhouettes_28.mat


--------------------------------------------------------------------------------
/data/silhouettes/caltech101_silhouettes_28_split1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YingzhenLi/vae_renyi_divergence/HEAD/data/silhouettes/caltech101_silhouettes_28_split1.mat


--------------------------------------------------------------------------------
/prior/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | class Prior(object):
3 | 
4 |     def sample(self, num_samples):
5 |         raise NotImplementedError()
6 |         
7 |     def get_name(self):
8 |         raise NotImplementedError()
9 | 


--------------------------------------------------------------------------------
/visualization/samples.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from __init__ import plot_images   
 4 | 
 5 | def plot_samples(sess, shape, prior, decoder):
 6 |     """
 7 |     Plot the reconstruction of data.
 8 |     """
 9 |     z = prior.sample(100)
10 |     x = decoder.encode(z, sampling = False)
11 |     
12 |     samples = sess.run(x)
13 |     plot_images(samples, shape, '', 'samples')
14 | 


--------------------------------------------------------------------------------
/visualization/__init__.py:
--------------------------------------------------------------------------------
 1 | import reconstruction
 2 | import utils
 3 | 
 4 | def plot_images(images, shape, path, filename):
 5 |      # finally save to file
 6 |     import matplotlib
 7 |     matplotlib.use('Agg')
 8 |     import matplotlib.pyplot as plt
 9 |     
10 |     images = utils.reshape_and_tile_images(images, shape)
11 |     plt.imshow(images, cmap='Greys')
12 |     plt.axis('off')
13 |     plt.savefig(path + filename + ".svg", format="svg")
14 |     plt.close()
15 | 


--------------------------------------------------------------------------------
/visualization/reconstruction.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from __init__ import plot_images   
 4 | 
 5 | def plot_recon(sess, x, shape, encoder, decoder):
 6 |     """
 7 |     Plot the reconstruction of data.
 8 |     """
 9 |     input = tf.placeholder(tf.float32, shape=x.shape)
10 |     z = encoder.encode(input, sampling = False)
11 |     x_recon = decoder.encode(z, sampling = False)
12 |     
13 |     input_recon = sess.run(x_recon, feed_dict = {input: x})
14 |     plot_images(input_recon, shape, '', 'recon_sample')
15 | 


--------------------------------------------------------------------------------
/visualization/utils.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | 
 4 | """
 5 | Function of drawing images, copied from Theano Tutorial.
 6 | """
 7 | 
 8 | def reshape_and_tile_images(array, shape=(28, 28), n_cols=None):
 9 |     if n_cols is None:
10 |         n_cols = int(math.sqrt(array.shape[0]))
11 |     n_rows = int(math.ceil(float(array.shape[0])/n_cols))
12 | 
13 |     def cell(i, j):
14 |         ind = i*n_cols+j
15 |         if i*n_cols+j < array.shape[0]:
16 |             return array[ind].reshape(*shape, order='C')
17 |         else:
18 |             return np.zeros(shape)
19 | 
20 |     def row(i):
21 |         return np.concatenate([cell(i, j) for j in range(n_cols)], axis=1)
22 | 
23 |     return np.concatenate([row(i) for i in range(n_rows)], axis=0)
24 | 


--------------------------------------------------------------------------------
/prior/GMM.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from __init__ import Prior
 4 |         
 5 | class GMM_diag(Prior):
 6 | 
 7 |     def __init__(self, size, Mu_list, Sigma_list, weights = None):
 8 |         self.size = size
 9 |         self.Mu_list = Mu_list
10 |         self.Sigma_list = Sigma_list
11 |         self.num_mixture = Mu_list.shape[0]
12 |         if weights is None:
13 |             weights = np.ones(self.num_mixture)
14 |         self.weights = weights / np.sum(weights)
15 |         
16 |     def sample(self, num_samples):
17 |         # first select the mixture
18 |         i = np.random.choice(self.num_mixture, num_samples, p = self.weights)
19 |         eps = tf.random_normal([num_samples, self.size])
20 |         output = self.Mu_list[i] + eps * self.Sigma_list[i]          
21 |         return output
22 |         
23 |     def get_name(self):
24 |         return 'prior_GMM_diag'
25 |         
26 | 


--------------------------------------------------------------------------------
/network/classifier.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from deterministic_layer import Deterministic_Layer
 4 | 
 5 | def construct_classifier(layer_sizes, activation='softplus'):
 6 |     """
 7 |     Construct the stochastic layer.
 8 |     """
 9 |     D_layers = []
10 |     for l in xrange(len(layer_sizes) - 1):
11 |         if l < len(layer_sizes) - 2:
12 |             func = 'relu'
13 |         else:
14 |             func = activation
15 |         D_layers.append(Deterministic_Layer(layer_sizes[l], layer_sizes[l+1], func))
16 |     
17 |     classifier = Classifier(D_layers)   
18 |     return classifier
19 | 
20 | class Classifier(object):
21 | 
22 |     def __init__(self, D_layers):
23 |         self.D_layers = D_layers
24 |         self.params = []
25 |         for layer in self.D_layers:
26 |             self.params = self.params + layer.params
27 |         
28 |     def encode(self, input):
29 |         output = input
30 |         for layer in self.D_layers:
31 |             output = layer.encode(output)           
32 |         return output
33 |         
34 |     def get_name(self):
35 |         return 'classifier'
36 |         
37 | 


--------------------------------------------------------------------------------
/prior/gaussian.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from __init__ import Prior
 4 |         
 5 | class Gaussian_diag(Prior):
 6 | 
 7 |     def __init__(self, size, Mu, Sigma):
 8 |         self.size = size
 9 |         self.Mu = Mu
10 |         self.Sigma = Sigma
11 |         
12 |     def sample(self, num_samples):
13 |         eps = tf.random_normal([num_samples, self.size])
14 |         output = self.Mu + eps * self.Sigma          
15 |         return output
16 |     
17 |     def update(self, samples):
18 |         # update the parameters by matching empirical moments
19 |         mean, var = tf.nn.moments(samples, axes=[0])
20 |         self.Mu = mean
21 |         self.Sigma = tf.sqrt(var)
22 |         
23 |     def get_name(self):
24 |         return 'prior_gaussian_diag'
25 |         
26 | class Gaussian_full(Prior):
27 | 
28 |     def __init__(self, Mu, Sigma):
29 |         self.Mu = Mu
30 |         self.Sigma = Sigma
31 |         
32 |     def sample(self, num_samples):
33 |         eps = tf.random_normal([num_samples, self.size])
34 |         output = self.Mu + tf.matmul(eps, self.Sigma)          
35 |         return output
36 |     
37 |     def get_name(self):
38 |         return 'prior_gaussian_full'
39 |         
40 | 


--------------------------------------------------------------------------------
/data/import_data_alphadigs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.io import loadmat
 3 | 
 4 | def read(path, num_per_digit_train = 10, SEED = 0):
 5 |  
 6 |  	# load data
 7 | 	mat = loadmat(path + 'binaryalphadigs.mat')
 8 | 	img = mat['dat']
 9 | 	lbl = mat['classlabels']
10 | 
11 | 	num_class = 36
12 | 	num_per_digit_train = min(num_per_digit_train, 39)
13 | 	num_per_digit_test = 39 - num_per_digit_train
14 | 	num_data_train = num_per_digit_train * num_class
15 | 	num_data_test = num_per_digit_test * num_class
16 |     
17 | 	rows = 20; cols = 16
18 | 	img_train =  np.zeros([rows*cols, num_data_train])
19 | 	lbl_train = np.zeros([num_class, num_data_train])
20 | 	img_test =  np.zeros([rows*cols, num_data_test])
21 | 	lbl_test = np.zeros([num_class, num_data_test])
22 | 
23 | 	np.random.seed(SEED)
24 | 	for j in xrange(num_class):
25 | 		ind = np.random.permutation(range(39))
26 | 		for k in xrange(num_per_digit_train):
27 | 			img_train[:, k * num_class + j] = np.ravel(img[j, ind[k]])
28 | 			lbl_train[j, k * num_class + j] = 1
29 | 		for k in xrange(num_per_digit_test):
30 | 			img_test[:, k * num_class + j] = np.ravel(img[j, ind[num_per_digit_train + k]])
31 | 			lbl_test[j, k * num_class + j] = 1
32 | 
33 | 	return img_train, lbl_train, img_test, lbl_test
34 | 	
35 | 


--------------------------------------------------------------------------------
/prior/swiss_roll.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from __init__ import Prior
 4 | 
 5 | class Swiss_Roll(Prior):
 6 | 
 7 |     def __init__(self, size, center, radius):
 8 |         self.size = size
 9 |         self.center = center
10 |         self.radius = radius
11 |         
12 |     def sample(self, num_samples):
13 |         unit = tf.sqrt(tf.random_uniform([num_samples, self.size]))
14 |         r = unit * self.radius
15 |         theta = np.pi * 4.0 * unit
16 |                  
17 |         return output
18 |         
19 |     def get_name(self):
20 |         return 'prior_gaussian_diag'
21 | 
22 | def sample_z_from_swiss_roll_distribution(batchsize, z_dim, label_indices, n_labels, gpu=False):
23 | 	def sample(label, n_labels):
24 | 		uni = np.random.uniform(0.0, 1.0) / float(n_labels) + float(label) / float(n_labels)
25 | 		r = math.sqrt(uni) * 3.0
26 | 		rad = np.pi * 4.0 * math.sqrt(uni)
27 | 		x = r * cos(rad)
28 | 		y = r * sin(rad)
29 | 		return np.array([x, y]).reshape((2,))
30 | 
31 | 	z = np.zeros((batchsize, z_dim), dtype=np.float32)
32 | 	for batch in xrange(batchsize):
33 | 		for zi in xrange(z_dim / 2):
34 | 			z[batch, zi*2:zi*2+2] = sample(label_indices[batch], n_labels)
35 | 	
36 | 	z = Variable(z)
37 | 	if gpu:
38 | 		z.to_gpu()
39 | 	return z
40 | 


--------------------------------------------------------------------------------
/load_save.py:
--------------------------------------------------------------------------------
 1 | import sys, os
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | 
 5 | def path_name(dataset, alpha, num_samples, backward_pass, extra_string = None):
 6 |     
 7 |     path = 'ckpts/' + dataset + '/'
 8 |     if backward_pass == 'max':
 9 |         folder_name = 'max_k%d' % num_samples
10 |     else:
11 |         folder_name = 'alpha%.2f_k%d' % (alpha, num_samples)
12 |     if extra_string is not None:
13 |         folder_name += '_' + extra_string
14 |         
15 |     path = path + folder_name + '/'    
16 |     return path
17 |     
18 | def save_checkpoint(sess, path, checkpoint=1, var_list = None):
19 |     if not os.path.exists(path):
20 |         os.makedirs(path)  
21 |     # save model  
22 |     fname = path + 'checkpoint%d.ckpt' % checkpoint
23 |     saver = tf.train.Saver(var_list) 
24 |     save_path = saver.save(sess, fname)
25 |     print("Model saved in %s" % save_path)
26 |     
27 | def load_checkpoint(sess, path, checkpoint=1):
28 |     # load model      
29 |     try:   
30 |         fname = path + 'checkpoint%d.ckpt' % checkpoint
31 |         saver = tf.train.Saver()        
32 |         saver.restore(sess, fname)
33 |         print("Model restored from %s" % fname)
34 |     except:
35 |         print "Failed to load model from %s" % fname
36 |         
37 | 


--------------------------------------------------------------------------------
/models/autoencoder.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import time
 4 | from scipy.misc import logsumexp
 5 | from network.network import construct_network
 6 | 
 7 | np.random.seed(0)
 8 | tf.set_random_seed(0)
 9 | 
10 | def construct_autoencoder(variables_size, hidden_layers, \
11 |         data_type='real', activation='softplus'):
12 |     """
13 |     Construct an auto-encoder, return both encoder and decoder
14 |     """   
15 |     layer_sizes = []
16 |     l = 0
17 |     
18 |     # first construct the encoder
19 |     for d_layers in hidden_layers:
20 |         sizes = [variables_size[l]]
21 |         sizes.extend(d_layers)
22 |         sizes.append(variables_size[l+1])
23 |         layer_sizes.append(sizes)
24 |         l += 1
25 |     encoder = construct_network(layer_sizes, 'gaussian', 'real', activation, 'q')
26 |     print 'q network architecture:', layer_sizes
27 |     print 'prob. type of q net:', encoder.get_name()
28 |     
29 |     # then construct the decoder
30 |     layer_sizes = [list(reversed(sizes)) for sizes in layer_sizes]
31 |     layer_sizes = list(reversed(layer_sizes))
32 |     decoder = construct_network(layer_sizes, 'gaussian', data_type, activation, 'p')
33 |     print 'p network architecture:', layer_sizes
34 |     print 'prob. type of p net:', decoder.get_name()
35 |     
36 |     return encoder, decoder                                      
37 |                                         
38 | 


--------------------------------------------------------------------------------
/init_models.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import time
 4 | from models.autoencoder import construct_autoencoder
 5 | from network.classifier import construct_classifier
 6 | from prior.gaussian import Gaussian_diag
 7 | from prior.GMM import GMM_diag
 8 | 
 9 | def init_model(variables_size, hidden_layers, data_type, activation = 'softplus'):
10 | 
11 |     # first initialise models
12 |     encoder, decoder = construct_autoencoder(variables_size, hidden_layers, \
13 |                                              data_type, activation)
14 |                                              
15 |     return [encoder, decoder]
16 | 
17 | def init_prior_gaussian(output_size, mu = 0.0, sigma = 1.0):
18 |     prior = Gaussian_diag(output_size, mu, sigma)
19 |     return prior
20 |     
21 | def init_prior_GMM(output_size, mu_list = None, sigma_list = None, weights = None):
22 |     if weights is None:
23 |         num_mixture = 4
24 |         weights = np.ones(num_mixture)
25 |     num_mixture = weights.shape[0]
26 |     if mu_list is None:
27 |         mu_list = np.random.randn(num_mixture, output_size) * 1.0
28 |     if sigma_list is None:
29 |         sigma_list = np.ones([num_mixture, output_size])
30 |     prior = GMM_diag(output_size, mu_list, sigma_list, weights)
31 |     return prior
32 | 
33 | def init_classifier(layer_sizes):
34 |     classifier = construct_classifier(layer_sizes, 'sigmoid')    
35 |     return classifier
36 | 
37 | 
38 | 
39 | 
40 |     
41 |     
42 | 


--------------------------------------------------------------------------------
/data/import_data_mnist.py:
--------------------------------------------------------------------------------
 1 | import os, struct
 2 | from array import array
 3 | import numpy as np
 4 | 
 5 | def read(path, num_per_digit = 0, dataset = "training", seed = 0, digits = None):
 6 |     """
 7 |     Python function for importing the MNIST data set.
 8 |     """
 9 | 
10 |     if dataset is "training":
11 |         fname_img = os.path.join(path, 'train-images-idx3-ubyte')
12 |         fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
13 |     elif dataset is "testing":
14 |         fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
15 |         fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
16 |     else:
17 |         raise ValueError, "dataset must be 'testing' or 'training'"
18 | 
19 |     flbl = open(fname_lbl, 'rb')
20 |     magic_nr, size = struct.unpack(">II", flbl.read(8))
21 |     lbl = array("b", flbl.read())
22 |     flbl.close()
23 | 
24 |     fimg = open(fname_img, 'rb')
25 |     magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
26 |     img = array("B", fimg.read())
27 |     fimg.close()
28 | 
29 |     if digits is None: 
30 |         digits = np.arange(10)
31 |     num_digits = len(digits)
32 |     if dataset == 'training':
33 |         num_per_digit = 6000
34 |     if dataset == 'testing':
35 |         num_per_digit = 1000
36 |     if num_per_digit > 0 and num_per_digit < len(img) / (rows * cols) / num_digits:
37 |     	num_data = num_per_digit * num_digits
38 |     else:
39 | 	num_data = len(img) / (rows * cols)
40 |     images =  np.zeros([rows*cols, num_data])
41 |     labels = np.zeros([10, num_data])
42 |     for j in xrange(num_digits):
43 |     	ind = [ k for k in xrange(size) if lbl[k] == digits[j] ]
44 | 	if len(ind) == 0:
45 | 	    raise ValueError, "invalid digits, should be in range 0-9"   
46 | 	if num_per_digit == 0 or num_per_digit > len(ind):
47 | 		num_per_digit = len(ind)
48 | 		if seed is None:
49 | 			ind = ind[:num_per_digit]  
50 |     	else:
51 |     		np.random.seed(seed)
52 |     		ind = np.random.permutation(ind)[:num_per_digit]  
53 |     	for i in xrange(num_per_digit):
54 | 	    #print i, num_per_digit, len(ind), i* num_digits + j, ind[i]*rows*cols, (ind[i]+1)*rows*cols 
55 |             images[:, i * num_digits + j] = img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]
56 |             labels[lbl[ind[i]], i * num_digits + j] = 1
57 | 
58 |     # here each column in images corresponds to a datapoint
59 |     return images, labels
60 | 


--------------------------------------------------------------------------------
/network/deterministic_layer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | np.random.seed(0)
 5 | tf.set_random_seed(0)
 6 | 
 7 | def init_weights(input_size, output_size, constant=1.0, seed=123): 
 8 |     """ Glorot and Bengio, 2010's initialization of network weights"""
 9 |     scale = constant*np.sqrt(6.0/(input_size + output_size))
10 |     if output_size > 0:
11 |         return tf.random_uniform((input_size, output_size), 
12 |                              minval=-scale, maxval=scale, 
13 |                              dtype=tf.float32, seed=seed)
14 |     else:
15 |         return tf.random_uniform([input_size], 
16 |                              minval=-scale, maxval=scale, 
17 |                              dtype=tf.float32, seed=seed)
18 | 
19 | class Deterministic_Layer(object):
20 |     def __init__(self, input_size, output_size, activation):
21 |         self.input_size = input_size
22 |         self.output_size = output_size       
23 |         # activation function
24 |         self.name = activation
25 |         if activation == 'softplus':
26 |             self._activation = tf.nn.softplus
27 |         if activation == 'relu':
28 |             self._activation = tf.nn.relu
29 |         if activation == 'sigmoid':
30 |             self._activation = tf.sigmoid
31 |         if activation == 'tanh':
32 |             self._activation = tf.tanh
33 |         if activation == 'linear':
34 |             self._activation = lambda x: x
35 |         if activation == 'softmax':
36 |             self._activation = tf.nn.softmax
37 |         # parameters
38 |         W = tf.Variable(init_weights(input_size, output_size))
39 |         b = tf.Variable(tf.zeros([output_size]))
40 |         #b = tf.Variable(init_weights(output_size, 0))
41 |         self.params = [W, b]
42 | 
43 |     def encode(self, input):
44 |         return self._activation(tf.matmul(input, self.params[0]) + self.params[1])
45 |     
46 |     def get_name(self):
47 |         return self.name
48 | 
49 | class MLP(object):
50 |     def __init__(self, D_layers):
51 |         self.D_layers = D_layers
52 |         self.params = []
53 |         for layer in self.D_layers:
54 |             self.params = self.params + layer.params
55 | 
56 |     def encode(self, input):
57 |         output = input
58 |         for layer in self.D_layers:
59 |             output = layer.encode(output)
60 | 
61 |         return output
62 | 
63 |     def get_name(self):
64 |         return 'MLP'
65 | 
66 | 


--------------------------------------------------------------------------------
/network/network.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import time
 4 | from scipy.misc import logsumexp
 5 | from stochastic_layer import construct_Stoc_Layer
 6 | from deterministic_layer import Deterministic_Layer, MLP
 7 | 
 8 | np.random.seed(0)
 9 | tf.set_random_seed(0)
10 | 
11 | class Network(object):
12 |     """
13 |     A stochastic network containing several stochastic layers.
14 |     """
15 |     def __init__(self, S_layers):
16 |         self.S_layers = S_layers
17 |         self.params = []
18 |         for layer in self.S_layers:
19 |             self.params = self.params + layer.params
20 |         
21 |     def encode(self, input, sampling):
22 |         output = input
23 |         for layer in self.S_layers:
24 |             output, _ = layer.encode(output, sampling)
25 |         return output
26 |         
27 |     def encode_and_log_prob(self, input, eval_output = None):
28 |         # evaluate on eval_output if provided
29 |         if eval_output is None:
30 |             eval_output = [None for layer in self.S_layers]
31 |         output = input
32 |         output_list = []
33 |         l = 0
34 |         for layer in self.S_layers:
35 |             output, logprob = layer.encode_and_log_prob(output, eval_output[l])
36 |             if eval_output[l] is not None:
37 |                 output = eval_output[l]
38 |             output_list.append(output)
39 |             if l == 0:
40 |                 logprob_total = logprob
41 |             else:
42 |                 logprob_total = logprob_total + logprob
43 |             l += 1
44 |         return output_list, logprob
45 |     
46 |     def get_prob_type(self):
47 |         return self.S_layers[-1].get_prob_type()
48 | 
49 |     def get_name(self):
50 |         return 'stochastic_network'
51 | 
52 | def construct_network(layer_sizes, prob_type = 'gaussian', data_type='real', \
53 |         activation='softplus', prefix = 'p'):
54 |     """
55 |     Construct a stochastic network.
56 |     """
57 |     S_layers = []
58 |     l = 0
59 |     for sizes in layer_sizes:
60 |         if l == len(layer_sizes) - 1:
61 |             if data_type == 'real': prob_type = 'gaussian'
62 |             if data_type == 'bool': prob_type = 'bernoulli'
63 |         print prefix, l, data_type, prob_type
64 |         S_layers.append(construct_Stoc_Layer(sizes, prob_type, activation, prefix))
65 |         l += 1
66 |     network = Network(S_layers)
67 |     return network  
68 |     
69 | def construct_mlp(layer_sizes, activation = 'softplus'):
70 |     """
71 |     Construct a deterministic network
72 |     """
73 |     D_layers = []
74 |     L = len(layer_sizes) - 1
75 |     for l in xrange(L):
76 |         D_layers.append(Deterministic_Layer(layer_sizes[l], layer_sizes[l+1], activation))
77 |     
78 |     mlp = MLP(D_layers)
79 |     return mlp        
80 |             
81 | 


--------------------------------------------------------------------------------
/models/loss_functions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | import time
 4 | from scipy.misc import logsumexp
 5 | 
 6 | np.random.seed(0)
 7 | tf.set_random_seed(0)
 8 | 
 9 | def reconstruction_loss(input, encoder, decoder, num_samples):
10 |     """
11 |     Compute log p(x|z) and log q(z|x)
12 |     """
13 |     # compute log_q
14 |     x_rep = tf.tile(input, [num_samples, 1])
15 |     z_list, logq = encoder.encode_and_log_prob(x_rep)
16 |     # compute log_p    
17 |     samples = list(reversed(z_list[:-1]))
18 |     samples.append(x_rep)
19 |     _, logpxz = decoder.encode_and_log_prob(z_list[-1], eval_output = samples)
20 |     
21 |     return logpxz, logq, z_list
22 | 
23 | def reconstruction_mse_loss(input, encoder, decoder, sampling = False):
24 |     z = encoder.encode(input, sampling)
25 |     input_recon = decoder.encode(z, sampling)
26 |     loss = tf.square(input - input_recon)
27 |     return tf.reduce_mean(tf.reduce_sum(loss, 1)), z
28 |     
29 | def reconstruction_cross_entropy(input, encoder, decoder, sampling = False):
30 |     z = encoder.encode(input, sampling)
31 |     input_recon = decoder.encode(z, sampling)
32 |     loss = -input * tf.log(tf.clip_by_value(input_recon, 1e-9, 1.0)) \
33 |            - (1.0 - input) * tf.log(tf.clip_by_value(1 - input_recon, 1e-9, 1.0))
34 |     return tf.reduce_mean(tf.reduce_sum(loss, 1)), z
35 | 
36 | def log_prior(z, prob_type = 'gaussian'):
37 |     if prob_type == 'gaussian':
38 |         return log_prior_gaussian(z)
39 |     if prob_type == 'bernoulli':
40 |         return log_prior_bernoulli(z)
41 |     if prob_type == 'bernoulli_sym':
42 |         return log_prior_bernoulli_sym(z)
43 |     if prob_type == 'softmax':
44 |         return log_prior_softmax(z, int(z.get_shape()[0]))
45 |     
46 | def log_prior_gaussian(z, Mu = 0.0, Sigma = 1.0):
47 |     logprob = -(0.5 * np.log(2 * np.pi) + tf.log(Sigma)) \
48 |                   - 0.5 * ((z - Mu) / Sigma) ** 2
49 |     return tf.reduce_sum(logprob, 1)
50 |                   
51 | def log_prior_bernoulli(z, Mu = 0.5):
52 |     logprob = z * tf.log(tf.clip_by_value(Mu, 1e-9, 1.0)) \
53 |                   + (1 - z) * tf.log(tf.clip_by_value(1 - Mu, 1e-9, 1.0))
54 |     return tf.reduce_sum(logprob, 1)
55 |     
56 | def log_prior_bernoulli_sym(z, Mu = 0.5):
57 |     a = (z + 1.0) / 2.0
58 |     logprob = a * tf.log(tf.clip_by_value(Mu, 1e-9, 1.0)) \
59 |                   + (1 - a) * tf.log(tf.clip_by_value(1 - Mu, 1e-9, 1.0))
60 |     return tf.reduce_sum(logprob, 1)
61 | 
62 | def log_prior_softmax(z, N = 2.0):
63 |     # TODO: implement other logits
64 |     logprob = z * tf.log(1.0 / float(N))
65 |     return tf.reduce_sum(logprob, 1)
66 | 
67 | def classification_cross_entropy(y_pred, y_data):    
68 |     loss = -y_data * tf.log(tf.clip_by_value(y_pred, 1e-9, 1.0)) \
69 |            - (1 - y_data) * tf.log(tf.clip_by_value(1 - y_pred, 1e-9, 1.0))
70 |     return tf.reduce_mean(loss)
71 | 
72 | def classification_cross_entropy_softmax(y_pred, y_data):    
73 |     loss = -y_data * tf.log(tf.clip_by_value(y_pred, 1e-9, 1.0))
74 |     return tf.reduce_mean(tf.reduce_sum(loss, 1))
75 | 
76 | def classification_error_one_hot(y_pred, y_data):
77 |     # assume y_data is a one-hot vector
78 |     # and y_pred contains probabilities
79 |     correct_prediction = tf.equal(tf.argmax(y_data,1), tf.argmax(y_pred,1))
80 |     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
81 |     return 1.0 - accuracy
82 | 
83 | def adversarial_loss(y_p, y_q):
84 |     loss = -tf.log(tf.clip_by_value(y_p, 1e-9, 1.0)) \
85 |            - tf.log(tf.clip_by_value(1 - y_q, 1e-9, 1.0))
86 |     return tf.reduce_mean(tf.reduce_sum(loss, 1))
87 |     
88 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Renyi divergence variational inference applied to variational auto-encoders
 2 | 
 3 | **Update 2: 14 Sept 2016**
 4 | 
 5 | There are two ways to implement IWAE/VAE with other alpha settings (except 
 6 | alpha = 1.0 which gives you the vanila VAE). One is to actually compute the
 7 | energy as a scalar and let tensorflow work out the rest for you. The previous 
 8 | naive implementation (see [vae.py](models/vae.py)) did this. The other follows 
 9 | [section 4.2 of the paper](http://arxiv.org/pdf/1602.02311v2.pdf#5), in which
10 | you compute the gradients on **a list of unormalized log importance weights**,
11 | and form the final gradient by computing the weighted average of them.
12 | My internal use numpy code used this strategy.
13 | 
14 | So as another quick update I also provide the second strategy implementation
15 | in tensorflow. Please see [iwae.py](models/iwae.py) for details -- just a few
16 | lines of changes. This is of almost the same flavor as 
17 | [the theano version](https://github.com/yburda/iwae/blob/master/iwae.py#L142), 
18 | except that they treated VAE/IWAE as different cases. In contrast this tensorflow
19 | code **handles both cases in the same way** as justified by the paper.
20 | 
21 | If you want to compare both solutions, use --loss vae for the first and 
22 | --loss iwae for the second. You can specify --alpha for both cases and 
23 | --loss iwae also supports --alpha 1.0 (VAE). 
24 | **Some remarks**: First the runtime for both are roughly the same (I have only 
25 | tested them on my laptop). Second the produced results might differ for 
26 | a few nats. This is due to the numerical issues for logsumexp.
27 | 
28 | Unfortunately, implementing VR-max following similar style of iwae.py still 
29 | does not give you runtime advantage. So I still keep the dirty tryout 
30 | [vrmax.py](models/vrmax.py). Will come back to this -- again stay tuned!
31 | 
32 | ======================================
33 | 
34 | **Update 1: 09 Sept 2016**
35 | 
36 | Recently I found that the previous naive implementation in vae.py does not 
37 | give you time savings with the max trick, when compared to my internel use 
38 | numpy version. This is probably because tensorflow/theano does not 
39 | automatically recognize not to compute the gradients of the samples I dropped. 
40 | 
41 | So as a temporary update I provide a dirty solution (see vrmax.py) that 
42 | collects the max-weight samples and repeats the VAE procedure for them. 
43 | Yes I know it's far from optimized, but at least it already gives you 
44 | ~2x speed-up on CPUs (and maybe 1.5x~1.7x on GPUs depending on your settings).
45 | 
46 | Will come back to this issue -- stay tuned! 
47 | 
48 | ======================================
49 | 
50 | I provide a tensorflow implementation of VAE training with Renyi divergence. 
51 | Math details can be found here:
52 | 
53 | Yingzhen Li and Richard E. Turner. Renyi divergence variational inference. 
54 | (http://arxiv.org/abs/1602.02311)
55 | 
56 | I only included some small dataset for testing. To add in more datasets, 
57 | download them somewhere else and then add them to the data/ directory.
58 | 
59 | For example, you can download:
60 | 
61 | MNIST dataset (http://yann.lecun.com/exdb/mnist/)
62 | 
63 | and include all the data files in directory data/MNIST/
64 | 
65 | OMNIGLOT (https://github.com/yburda/iwae/tree/master/datasets/OMNIGLOT)
66 | 
67 | and include all the data files in directory data/OMNIGLOT/
68 | 
69 | Frey Face (https://github.com/y0ast/Variational-Autoencoder/blob/master/freyfaces.pkl)
70 | 
71 | and include all the data files in directory data/freyface/
72 | 
73 | To have a quick test, run 
74 | 
75 | python exp.py --data [dataset name] --alpha [alpha value] -k [num of samples] 
76 | --dimZ [dimension of the latents]
77 | 
78 | See exp.py file for more options. In particular, alpha = 1.0 returns 
79 | the vanila VAE, alpha = 0.0 gives IWAE. 
80 | 
81 | If you want to see the max trick, add in one more option --backward_pass max.
82 | 


--------------------------------------------------------------------------------
/data/data_preprocessing.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import import_data_alphadigs as alphadigs
  3 | import import_data_mnist as mnist
  4 | import numpy as np
  5 | import cPickle
  6 | import argparse
  7 | from scipy.io import loadmat
  8 | 
  9 | def load_data(dataset, path, ratio = 0.9, seed = 0, return_labels = False):
 10 |     if dataset == 'freyface':
 11 |         data_train, data_test = load_data_freyface(path, ratio, seed)
 12 |     if dataset == 'alphadigits':
 13 |         data_train, data_test, labels_train, labels_test = \
 14 |             load_data_alphadigits(path, ratio, seed)
 15 |     if dataset == 'mnist':
 16 |         data_train, data_test, labels_train, labels_test = \
 17 |             load_data_mnist(path, ratio, seed)
 18 |     if dataset == 'silhouettes':
 19 |         data_train, data_test = load_data_silhouettes(path, ratio, seed)
 20 |     if dataset == 'omniglot':
 21 |         data_train, data_test, labels_train, labels_test = \
 22 |             load_data_omniglot(path, ratio, seed)
 23 |     if return_labels and dataset not in ['freyface', 'silhouettes']:
 24 |         return data_train, data_test, labels_train, labels_test
 25 |     else:
 26 |         return data_train, data_test
 27 | 
 28 | def load_data_freyface(path, ratio = 0.9, seed = 0):
 29 |     # load and split data
 30 |     print "Loading data"
 31 |     f = open(path + 'freyface/freyfaces.pkl','rb')
 32 |     data = cPickle.load(f)
 33 |     data = np.array(data, dtype='f')	# float32
 34 |     f.close()
 35 |     
 36 |     np.random.seed(seed)
 37 |     np.random.shuffle(data)
 38 |     num_train = int(ratio * data.shape[0])
 39 |     data_train = data[:num_train]
 40 |     data_test = data[num_train:]
 41 |     
 42 |     return data_train, data_test
 43 |     
 44 | def load_data_alphadigits(path, ratio = 0.9, seed = 0):
 45 |     # load and split data
 46 |     print "Loading data"
 47 |     data_train, labels_train, data_test, labels_test = \
 48 |         alphadigs.read(path, int(39 * ratio), SEED = seed)
 49 |     # transform to float32
 50 |     data_train = np.array(data_train.T, dtype='f')	# float32
 51 |     data_test = np.array(data_test.T, dtype='f')	# float32
 52 |     labels_train = np.array(labels_train.T, dtype='f')	# float32
 53 |     labels_test = np.array(labels_test.T, dtype='f')	# float32
 54 |     
 55 |     return data_train, data_test, labels_train, labels_test
 56 | 
 57 | def load_data_omniglot(path, ratio = 0.9, seed = 0):
 58 |     # load and split data
 59 |     print "Loading data"
 60 |     mat = loadmat(path + 'OMNIGLOT/chardata.mat')
 61 |     data_train = np.array(mat['data'].T, dtype='f')     # float32
 62 |     data_test = np.array(mat['testdata'].T, dtype='f')  # float32
 63 |     labels_train = np.array(mat['target'].T, dtype='f') # float32
 64 |     labels_test = np.array(mat['testtarget'].T, dtype='f')      # float32
 65 | 
 66 |     return data_train, data_test, labels_train, labels_test
 67 |     
 68 | def load_data_mnist(path, ratio = 0.9, seed = 0, digits = None):
 69 |     # load and split data
 70 |     print "Loading data"
 71 |     path = path + 'MNIST/'
 72 |     data_train, labels_train = mnist.read(path, 0, "training", seed, digits)
 73 |     data_test, labels_test = mnist.read(path, 0, "testing", seed, digits)
 74 |     #data_train = np.array(data >= 0.5 * np.max(data, 0), dtype = int)	# binary
 75 |     #data_test = np.array(data >= 0.5 * np.max(data, 0), dtype = int)	# binary
 76 |     data_train /= 255.0	# real-value
 77 |     data_test /= 255.0	# real-value
 78 |     # transform to float32
 79 |     data_train = np.array(data_train.T, dtype='f')	# float32
 80 |     data_test = np.array(data_test.T, dtype='f')	# float32
 81 |     labels_train = np.array(labels_train.T, dtype='f')	# float32
 82 |     labels_test = np.array(labels_test.T, dtype='f')	# float32
 83 |     return data_train, data_test, labels_train, labels_test
 84 | 
 85 | def load_data_silhouettes(path, ratio = 0.9, seed = 0):
 86 |     import scipy.io
 87 |     imgs_filename = path + 'silhouettes/' \
 88 |         + 'caltech101_silhouettes_28_split1.mat'
 89 |     with open(imgs_filename, 'rb') as f:
 90 |         images = scipy.io.loadmat(imgs_filename)
 91 | 
 92 |         images_train = images['train_data'].astype('float32')
 93 |         images_test = images['test_data'].astype('float32')
 94 |         images_val = images['val_data'].astype('float32')
 95 |         #n_validation = images_val.shape[0]
 96 |         #images_train = np.vstack((images_train, images_val))
 97 |             
 98 |     # flip digits?
 99 |     images_train = 1.0 - images_train
100 |     images_test = 1.0 - images_test
101 | 
102 |     return images_train, images_test#, n_validation    
103 |     
104 | 


--------------------------------------------------------------------------------
/exp.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('../')
  3 | from init_models import *
  4 | import numpy as np
  5 | import argparse
  6 | from data.data_preprocessing import load_data
  7 | from load_save import *
  8 | from visualization.reconstruction import plot_recon
  9 | from visualization.samples import plot_samples
 10 | 
 11 | def main(dataset, dimZ, hidden_layers, n_iters, learning_rate = 0.0005, \
 12 |         batch_size = 100, seed = 0, alpha = 1.0, num_samples = 1, \
 13 |         save = False, backward_pass = 'full', activation = 'softplus', \
 14 |         loss = 'vae', checkpoint = 0):
 15 |     
 16 |     # load data
 17 |     ratio = 0.9   
 18 |     path = 'data/'
 19 |     supervised = False
 20 |     data_train, data_test = load_data(dataset, path, ratio, seed, supervised)
 21 |     if dataset == 'freyface':
 22 |         data_type = 'real'
 23 |     else:
 24 |         data_type = 'bool'
 25 |         
 26 |     # initialise the computation
 27 |     sess = tf.Session()
 28 |     variables_size = [data_train.shape[1], dimZ]
 29 |     
 30 |     # TODO: other training methods coming soon...
 31 |     if backward_pass == 'max':
 32 |         loss = 'vrmax'
 33 |     if loss == 'vae':
 34 |         kwargs = {'alpha': alpha, 'backward_pass': backward_pass}
 35 |         print 'training model: variational auto-encoder' 
 36 |         print 'back propagating all the samples'
 37 |         from models.vae import init_optimizer
 38 |     if loss == 'vrmax':
 39 |         kwargs = {'alpha': alpha, 'backward_pass': backward_pass}
 40 |         print 'training model: VAE with alpha = -infty'
 41 |         print 'back propagating only 1 sample: using the max trick'
 42 |         from models.vrmax import init_optimizer
 43 |     if loss == 'iwae':
 44 |         kwargs = {'alpha': alpha}
 45 |         print 'training model: IWAE style training with alpha = %.2f' % alpha
 46 |         print 'gradient is first computed on every samples,',
 47 |         print 'then averaged with importance weights (smoothed by alpha)',
 48 |         print 'see the VR bound paper, section 4.2 for details.'
 49 |         from models.iwae import init_optimizer
 50 | 
 51 |     models = init_model(variables_size, hidden_layers, data_type, activation)
 52 |     prior = init_prior_gaussian(variables_size[-1])
 53 |     if checkpoint > 0:
 54 |         path = path_name(dataset, alpha, num_samples, backward_pass)       
 55 |         load_checkpoint(sess, path, checkpoint)
 56 |         initialised_var = set(tf.all_variables())
 57 |     else:        
 58 |         initialised_var = set([])
 59 |     
 60 |     fit, score = init_optimizer(models, variables_size[0], batch_size, \
 61 |                                 num_samples, **kwargs)
 62 |     
 63 |     # now check init
 64 |     init_var_list = set(tf.all_variables()) - initialised_var
 65 |     if len(init_var_list) > 0:
 66 |         # Initializing the tensor flow variables
 67 |         init = tf.initialize_variables(var_list = init_var_list)
 68 |         #init = tf.initialize_all_variables()
 69 |         sess.run(init)
 70 |     checkpoint += 1
 71 | 
 72 |     num_iter_trained = 0
 73 |     print "Training..."
 74 |     for n_iter in n_iters:
 75 |         fit(sess, data_train, n_iter, learning_rate)
 76 |         num_iter_trained += n_iter
 77 |         print "Evaluating test data..."
 78 |         lowerbound_test, time_test = \
 79 |             score(sess, data_test, num_samples = 10)
 80 |         print "test data LL (lowerbound) = %.2f, time = %.2fs, iter %d" \
 81 |             % (lowerbound_test, time_test, num_iter_trained)
 82 |     
 83 |     # plot reconstructions
 84 |     if dataset == 'freyface':
 85 |         shape = (28, 20)
 86 |     if 'mnist' in dataset:
 87 |         shape = (28, 28)
 88 |     print 'ploting reconstructions...'
 89 |     recon_input = data_test[:100]
 90 |     plot_recon(sess, recon_input, shape, models[0], models[1])
 91 |     
 92 |     print 'ploting samples from the generative model...'
 93 |     plot_samples(sess, shape, prior, models[1])
 94 |     
 95 |     # save model
 96 |     if save:
 97 |         path = path_name(dataset, alpha, num_samples, backward_pass)       
 98 |         save_checkpoint(sess, path, checkpoint)
 99 | 
100 | if __name__ == '__main__':
101 |     parser = argparse.ArgumentParser(description='Run RVAE experiments.')
102 |     parser.add_argument('--data', '-D', type=str, default='freyface')
103 |     parser.add_argument('--num_layers', '-l', type=int, choices=[1, 2], default=1)
104 |     parser.add_argument('--num_samples', '-k', type=int, default=1)
105 |     parser.add_argument('--alpha', '-a', type=float, default=1.0)
106 |     parser.add_argument('--dimZ', '-Z', type=int, default=5)
107 |     parser.add_argument('--dimH', '-H', type=int, default=200)
108 |     parser.add_argument('--iter', '-i', type=int, default=100)
109 |     parser.add_argument('--save_model', '-s', action='store_true', default=False)
110 |     parser.add_argument('--seed', '-S', type=int, default=0)
111 |     parser.add_argument('--backward_pass', '-b', type=str, default='full')
112 |     parser.add_argument('--learning_rate', type=float, default=0.0005)
113 |     parser.add_argument('--batch_size', type=int, default=100)
114 |     parser.add_argument('--activation', type=str, default='softplus')
115 |     parser.add_argument('--loss', type=str, default='vae')
116 |     parser.add_argument('--checkpoint', type=int, default=0)
117 |     
118 |     args = parser.parse_args()
119 |     if args.dimH > 0:
120 |         hidden_layers = [[args.dimH for i in xrange(args.num_layers)]]
121 |     else:
122 |         hidden_layers = [[]]
123 |     if args.backward_pass not in ['full', 'single', 'max', 'min']:
124 |         args.backward_pass = 'full'
125 |     
126 |     print 'settings:'
127 |     print 'activation function:', args.activation
128 |     print 'dataset:', args.data
129 |     print 'alpha:', args.alpha
130 |     print 'dimZ:', args.dimZ
131 |     print 'hidden layer sizes:', hidden_layers
132 |     print 'num. samples:', args.num_samples
133 |     print 'backward pass method:', args.backward_pass
134 |     print 'learning rate:', args.learning_rate
135 |     print 'batch_size:', args.batch_size
136 |     
137 |     iter_each_round = 10
138 |     num_rounds = args.iter / iter_each_round
139 |     n_iters = list(np.ones(num_rounds, dtype = int) * iter_each_round)
140 |     main(args.data, args.dimZ, hidden_layers, n_iters, args.learning_rate, \
141 |         args.batch_size, args.seed, args.alpha, args.num_samples, \
142 |         args.save_model, args.backward_pass, args.activation,
143 |         args.loss, args.checkpoint)
144 |     
145 | 


--------------------------------------------------------------------------------
/models/vrmax.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import time
  4 | from scipy.misc import logsumexp
  5 | from network.network import construct_network
  6 | from loss_functions import reconstruction_loss
  7 | from loss_functions import log_prior
  8 | from vae import variational_lowerbound
  9 | 
 10 | def vrmax(x, encoder, decoder, num_samples, batch_size):
 11 |     """
 12 |     Compute the VR-max trick
 13 |     """
 14 |     logpxz = 0.0
 15 |     logqzx = 0.0
 16 |     L = len(encoder.S_layers)
 17 |     x_rep = tf.tile(x, [num_samples, 1]) 
 18 |     input = x_rep
 19 | 
 20 |     # do encoding
 21 |     samples = []
 22 |     for l in xrange(L):
 23 |         output, logq = encoder.S_layers[l].encode_and_log_prob(input)
 24 |         logqzx = logqzx + logq
 25 |         samples.append(output)
 26 |         input = output
 27 | 
 28 |     # do decoding
 29 |     samples = list(reversed(samples))
 30 |     samples.append(x_rep)
 31 |     for l in xrange(L):
 32 |         _, logp = decoder.S_layers[l].encode_and_log_prob(samples[l], eval_output = samples[l+1])
 33 |         logpxz = logpxz + logp
 34 | 
 35 |     logpz = log_prior(output, encoder.S_layers[l].get_prob_type())
 36 |     logF = tf.reshape(logpxz + logpz - logqzx, [num_samples, batch_size])
 37 |     
 38 |     # now compute the gradients
 39 |     # first test automatic gradient computation
 40 |     indices = tf.argmax(logF, 0) * batch_size + tf.constant(np.arange(batch_size))
 41 |     samples_max = []
 42 |     for l in xrange(len(samples)): 
 43 |         samples_max.append(tf.gather(samples[l], indices))
 44 |         
 45 |     # NOT VERY EFFICIENT! RE-COMPUTE THE BOUND
 46 |     logpxz_max = 0.0
 47 |     logqzx_max = 0.0
 48 |     logpz_max = 0.0
 49 |     for l in xrange(L):
 50 |         _, logp = decoder.S_layers[l].encode_and_log_prob(samples_max[l], eval_output = samples_max[l+1])
 51 |         logpxz_max = logpxz_max + logp
 52 |         _, logq = encoder.S_layers[L-1-l].encode_and_log_prob(samples_max[l+1], eval_output = samples_max[l])
 53 |         logqzx_max = logqzx_max + logq
 54 |     logpz_max = log_prior(samples_max[0], encoder.S_layers[L-1].get_prob_type())    
 55 |     lowerbound = tf.reduce_mean(logpxz_max + logpz_max - logqzx_max)
 56 |     
 57 |     return lowerbound
 58 |     
 59 | def make_functions_vae(models, input_size, num_samples, batch_size, \
 60 |         alpha = 1.0, backward_pass = 'full'): 
 61 |     encoder, decoder = models  
 62 |  
 63 |     input = tf.placeholder(tf.float32, [batch_size, input_size])
 64 |     lowerbound = vrmax(input, encoder, decoder, num_samples, batch_size)
 65 |                                         
 66 |     learning_rate_ph = tf.placeholder(tf.float32, shape = [])
 67 |     optimizer = \
 68 |             tf.train.AdamOptimizer(learning_rate=learning_rate_ph, \
 69 |                                    beta1=0.9, beta2=0.999, epsilon=10e-8 \
 70 |                                    ).minimize(-lowerbound)
 71 |     
 72 |     def updateParams(sess, X, learning_rate = 0.0005):
 73 |         opt, cost = sess.run((optimizer, lowerbound),
 74 |                            feed_dict={input: X,
 75 |                                       learning_rate_ph:learning_rate})
 76 |         return cost
 77 | 
 78 |     return updateParams, lowerbound                               
 79 |                                         
 80 | def init_optimizer(models, input_size, batch_size = 100, num_samples = 1, **kwargs):
 81 |     
 82 |     encoder = models[0]; decoder = models[1]
 83 |     # vae
 84 |     if 'alpha' not in kwargs:
 85 |         alpha = 1.0
 86 |     else:
 87 |         alpha = kwargs['alpha']
 88 |     if 'backward_pass' not in kwargs:
 89 |         backward_pass = 'full'
 90 |     else:
 91 |         backward_pass = kwargs['backward_pass']
 92 |     updateParams, lowerbound = \
 93 |         make_functions_vae(models, input_size, \
 94 |                            num_samples, batch_size, \
 95 |                            alpha, backward_pass)
 96 | 
 97 |     def fit(sess, X, n_iter = 100, learning_rate = 0.0005, verbose = True):
 98 |         # first make batches of source data
 99 |         [N, dimX] = X.shape        
100 |         N_batch = N / batch_size
101 |         if np.mod(N, batch_size) != 0:
102 |             N_batch += 1      
103 |         print "training the model for %d iterations with lr=%f" % \
104 |             (n_iter, learning_rate)
105 | 
106 |         begin = time.time()
107 |         for iteration in xrange(1, n_iter + 1):
108 |             iteration_lowerbound = 0
109 |             ind_s = np.random.permutation(range(N))
110 | 
111 |             for j in xrange(0, N_batch):
112 |                 indl = j * batch_size
113 |                 indr = (j+1) * batch_size
114 |                 ind = ind_s[indl:min(indr, N)]
115 |                 if indr > N:
116 |                     ind = np.concatenate((ind, ind_s[:(indr-N)]))
117 |                 batch = X[ind]
118 |                 lowerbound = updateParams(sess, batch, learning_rate)
119 |                 iteration_lowerbound += lowerbound * batch_size
120 | 
121 |             if verbose:
122 |                 end = time.time()
123 |                 print("Iteration %d, lowerbound = %.2f, time = %.2fs"
124 |                       % (iteration, iteration_lowerbound / N, end - begin)) 
125 |                 begin = end
126 |                 
127 |         
128 |     def eval_test_ll(sess, X, num_samples):
129 |         lowerbound = sess.run(variational_lowerbound(X, encoder, decoder, num_samples, X.shape[0], 0.0))
130 |         
131 |         return lowerbound
132 | 
133 |     def score(sess, X, num_samples = 100):
134 |         """
135 |         Computer lower bound on data, following the IWAE paper.
136 |         """
137 |         
138 |         begin = time.time()
139 |         print 'num. samples for eval:', num_samples
140 |         
141 |         # compute log_q
142 |         lowerbound_total = 0
143 |         num_data_test = X.shape[0]
144 |         if num_data_test % batch_size == 0:
145 |             num_batch = num_data_test / batch_size
146 |         else:
147 |             num_batch = num_data_test / batch_size + 1
148 |         
149 |         for i in xrange(num_batch):
150 |             indl = i*batch_size
151 |             indr = min((i+1)*batch_size, num_data_test)
152 |             minibatch = X[indl:indr]
153 |             lowerbound = eval_test_ll(sess, minibatch, num_samples)
154 |             lowerbound_total += lowerbound * (indr - indl)
155 |         
156 |         end = time.time()
157 |         time_test = end - begin
158 |         lowerbound_total = lowerbound_total / float(num_data_test)
159 | 
160 |         return lowerbound_total, time_test
161 |      
162 |     return fit, score                              
163 | 


--------------------------------------------------------------------------------
/models/iwae.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import time
  4 | from scipy.misc import logsumexp
  5 | from network.network import construct_network
  6 | from loss_functions import reconstruction_loss
  7 | from loss_functions import log_prior
  8 | from vae import variational_lowerbound
  9 | 
 10 | def iwae(x, encoder, decoder, num_samples, batch_size, alpha = 0.0):
 11 |     """
 12 |     Compute the loss function of VR lowerbound
 13 |     """
 14 |     #logpxz, logqzx, z_list = reconstruction_loss(x, encoder, decoder, num_samples)
 15 |     logpxz = 0.0
 16 |     logqzx = 0.0
 17 |     L = len(encoder.S_layers)
 18 |     x_rep = tf.tile(x, [num_samples, 1]) 
 19 |     input = x_rep
 20 | 
 21 |     # do encoding
 22 |     samples = []
 23 |     for l in xrange(L):
 24 |         output, logq = encoder.S_layers[l].encode_and_log_prob(input)
 25 |         logqzx = logqzx + logq
 26 |         samples.append(output)
 27 |         input = output
 28 | 
 29 |     # do decoding
 30 |     samples = list(reversed(samples))
 31 |     samples.append(x_rep)
 32 |     for l in xrange(L):
 33 |         _, logp = decoder.S_layers[l].encode_and_log_prob(samples[l], eval_output = samples[l+1])
 34 |         logpxz = logpxz + logp
 35 | 
 36 |     logpz = log_prior(output, encoder.S_layers[l].get_prob_type())
 37 |     logF = logpz + logpxz - logqzx
 38 |     
 39 |     # first compute lowerbound
 40 |     K = float(num_samples)
 41 |     logF_matrix = tf.reshape(logF, [num_samples, batch_size]) * (1 - alpha) 
 42 |     logF_max = tf.reduce_max(logF_matrix, 0)
 43 |     logF_matrix -= logF_max
 44 |     logF_normalizer = tf.clip_by_value(tf.reduce_sum(tf.exp(logF_matrix), 0), 1e-9, np.inf)
 45 |     logF_normalizer = tf.log(logF_normalizer)
 46 |     # note here we need to substract log K as we use reduce_sum above
 47 |     if np.abs(alpha - 1.0) > 10e-3:
 48 |         lowerbound = tf.reduce_mean(logF_normalizer + logF_max - tf.log(K)) / (1 - alpha)
 49 |     else:
 50 |         lowerbound = tf.reduce_mean(logF)
 51 |     
 52 |     # now compute the importance weighted version of gradients
 53 |     log_ws = tf.reshape(logF_matrix - logF_normalizer, shape=[-1])
 54 |     ws = tf.stop_gradient(tf.exp(log_ws), name = 'importance_weights_no_grad')
 55 |     params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
 56 |     gradients = tf.gradients(-logF * ws, params)
 57 |     grad = zip(gradients, params)
 58 |    
 59 |     return lowerbound, grad
 60 |     
 61 | def make_functions_vae(models, input_size, num_samples, batch_size, alpha = 0.0): 
 62 |     encoder, decoder = models  
 63 |  
 64 |     input = tf.placeholder(tf.float32, [batch_size, input_size])
 65 |     lowerbound, grad = iwae(input, encoder, decoder, num_samples, batch_size, \
 66 |                                         alpha)
 67 |                                         
 68 |     learning_rate_ph = tf.placeholder(tf.float32, shape = [])
 69 |     optimizer = \
 70 |             tf.train.AdamOptimizer(learning_rate=learning_rate_ph, \
 71 |                                    beta1=0.9, beta2=0.999, epsilon=10e-8 \
 72 |                                    ).apply_gradients(grad)
 73 |     
 74 |     def updateParams(sess, X, learning_rate = 0.0005):
 75 |         opt, cost = sess.run((optimizer, lowerbound),
 76 |                            feed_dict={input: X,
 77 |                                       learning_rate_ph:learning_rate})
 78 |         return cost
 79 | 
 80 |     return updateParams, lowerbound                               
 81 |                                         
 82 | def init_optimizer(models, input_size, batch_size = 100, num_samples = 1, **kwargs):
 83 |     
 84 |     encoder = models[0]; decoder = models[1]
 85 |     # vae
 86 |     if 'alpha' not in kwargs:
 87 |         alpha = 0.0
 88 |     else:
 89 |         alpha = kwargs['alpha']
 90 |     updateParams, lowerbound = \
 91 |         make_functions_vae(models, input_size, \
 92 |                            num_samples, batch_size, \
 93 |                            alpha)
 94 | 
 95 |     def fit(sess, X, n_iter = 100, learning_rate = 0.0005, verbose = True):
 96 |         # first make batches of source data
 97 |         [N, dimX] = X.shape        
 98 |         N_batch = N / batch_size
 99 |         if np.mod(N, batch_size) != 0:
100 |             N_batch += 1      
101 |         print "training the model for %d iterations with lr=%f" % \
102 |             (n_iter, learning_rate)
103 | 
104 |         begin = time.time()
105 |         for iteration in xrange(1, n_iter + 1):
106 |             iteration_lowerbound = 0
107 |             ind_s = np.random.permutation(range(N))
108 | 
109 |             for j in xrange(0, N_batch):
110 |                 indl = j * batch_size
111 |                 indr = (j+1) * batch_size
112 |                 ind = ind_s[indl:min(indr, N)]
113 |                 if indr > N:
114 |                     ind = np.concatenate((ind, ind_s[:(indr-N)]))
115 |                 batch = X[ind]
116 |                 lowerbound = updateParams(sess, batch, learning_rate)
117 |                 iteration_lowerbound += lowerbound * batch_size
118 | 
119 |             if verbose:
120 |                 end = time.time()
121 |                 print("Iteration %d, lowerbound = %.2f, time = %.2fs"
122 |                       % (iteration, iteration_lowerbound / N, end - begin)) 
123 |                 begin = end
124 |                 
125 |         
126 |     def eval_test_ll(sess, X, num_samples):
127 |         lowerbound = sess.run(variational_lowerbound(X, encoder, decoder, num_samples, X.shape[0], 0.0))
128 |         
129 |         return lowerbound
130 | 
131 |     def score(sess, X, num_samples = 100):
132 |         """
133 |         Computer lower bound on data, following the IWAE paper.
134 |         """
135 |         
136 |         begin = time.time()
137 |         print 'num. samples for eval:', num_samples
138 |         
139 |         # compute log_q
140 |         lowerbound_total = 0
141 |         num_data_test = X.shape[0]
142 |         if num_data_test % batch_size == 0:
143 |             num_batch = num_data_test / batch_size
144 |         else:
145 |             num_batch = num_data_test / batch_size + 1
146 |         
147 |         for i in xrange(num_batch):
148 |             indl = i*batch_size
149 |             indr = min((i+1)*batch_size, num_data_test)
150 |             minibatch = X[indl:indr]
151 |             lowerbound = eval_test_ll(sess, minibatch, num_samples)
152 |             lowerbound_total += lowerbound * (indr - indl)
153 |         
154 |         end = time.time()
155 |         time_test = end - begin
156 |         lowerbound_total = lowerbound_total / float(num_data_test)
157 | 
158 |         return lowerbound_total, time_test
159 |      
160 |     return fit, score                              
161 | 


--------------------------------------------------------------------------------
/models/vae.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import time
  4 | from scipy.misc import logsumexp
  5 | from network.network import construct_network
  6 | from loss_functions import reconstruction_loss
  7 | from loss_functions import log_prior
  8 | 
  9 | def variational_lowerbound(x, encoder, decoder, num_samples, batch_size, \
 10 |         alpha = 1.0, backward_pass = 'full'):
 11 |     """
 12 |     Compute the loss function of VR lowerbound
 13 |     """
 14 |     #logpxz, logqzx, z_list = reconstruction_loss(x, encoder, decoder, num_samples)
 15 |     logpxz = 0.0
 16 |     logqzx = 0.0
 17 |     L = len(encoder.S_layers)
 18 |     x_rep = tf.tile(x, [num_samples, 1]) 
 19 |     input = x_rep
 20 | 
 21 |     # do encoding
 22 |     samples = []
 23 |     for l in xrange(L):
 24 |         output, logq = encoder.S_layers[l].encode_and_log_prob(input)
 25 |         logqzx = logqzx + logq
 26 |         samples.append(output)
 27 |         input = output
 28 | 
 29 |     # do decoding
 30 |     samples = list(reversed(samples))
 31 |     samples.append(x_rep)
 32 |     for l in xrange(L):
 33 |         _, logp = decoder.S_layers[l].encode_and_log_prob(samples[l], eval_output = samples[l+1])
 34 |         logpxz = logpxz + logp
 35 | 
 36 |     logpz = log_prior(output, encoder.S_layers[l].get_prob_type())
 37 |     logF = logpz + logpxz - logqzx    
 38 | 
 39 |     if backward_pass == 'max': 
 40 |         logF = tf.reshape(logF, [num_samples, batch_size])           
 41 |         logF = tf.reduce_max(logF, 0)
 42 |         lowerbound = tf.reduce_mean(logF)
 43 |     elif backward_pass == 'min':
 44 |         logF = tf.reshape(logF, [num_samples, batch_size])
 45 |         logF = tf.reduce_min(logF, 0)
 46 |         lowerbound = tf.reduce_mean(logF)
 47 |     elif np.abs(alpha - 1.0) < 10e-3:
 48 |         lowerbound = tf.reduce_mean(logF)
 49 |     else:
 50 |         logF = tf.reshape(logF, [num_samples, batch_size])
 51 |         logF = logF * (1 - alpha)   
 52 |         logF_max = tf.reduce_max(logF, 0)           
 53 |         logF = tf.log(tf.clip_by_value(tf.reduce_mean(tf.exp(logF - logF_max), 0), 1e-9, np.inf))
 54 |         logF = (logF + logF_max) / (1 - alpha)
 55 |         lowerbound = tf.reduce_mean(logF)
 56 |     return lowerbound#, logpz, logpxz, logqzx
 57 |     
 58 | def make_functions_vae(models, input_size, num_samples, batch_size, \
 59 |         alpha = 1.0, backward_pass = 'full'): 
 60 |     encoder, decoder = models  
 61 |  
 62 |     input = tf.placeholder(tf.float32, [batch_size, input_size])
 63 |     lowerbound = variational_lowerbound(input, encoder, decoder, num_samples, batch_size, \
 64 |                                         alpha, backward_pass)
 65 |                                         
 66 |     learning_rate_ph = tf.placeholder(tf.float32, shape = [])
 67 |     optimizer = \
 68 |             tf.train.AdamOptimizer(learning_rate=learning_rate_ph, \
 69 |                                    beta1=0.9, beta2=0.999, epsilon=10e-8 \
 70 |                                    ).minimize(-lowerbound)
 71 |     
 72 |     def updateParams(sess, X, learning_rate = 0.0005):
 73 |         opt, cost = sess.run((optimizer, lowerbound),
 74 |                            feed_dict={input: X,
 75 |                                       learning_rate_ph:learning_rate})
 76 |         return cost
 77 | 
 78 |     return updateParams, lowerbound                               
 79 |                                         
 80 | def init_optimizer(models, input_size, batch_size = 100, num_samples = 1, **kwargs):
 81 |     
 82 |     encoder = models[0]; decoder = models[1]
 83 |     # vae
 84 |     if 'alpha' not in kwargs:
 85 |         alpha = 1.0
 86 |     else:
 87 |         alpha = kwargs['alpha']
 88 |     if 'backward_pass' not in kwargs:
 89 |         backward_pass = 'full'
 90 |     else:
 91 |         backward_pass = kwargs['backward_pass']
 92 |     updateParams, lowerbound = \
 93 |         make_functions_vae(models, input_size, \
 94 |                            num_samples, batch_size, \
 95 |                            alpha, backward_pass)
 96 | 
 97 |     def fit(sess, X, n_iter = 100, learning_rate = 0.0005, verbose = True):
 98 |         # first make batches of source data
 99 |         [N, dimX] = X.shape        
100 |         N_batch = N / batch_size
101 |         if np.mod(N, batch_size) != 0:
102 |             N_batch += 1      
103 |         print "training the model for %d iterations with lr=%f" % \
104 |             (n_iter, learning_rate)
105 | 
106 |         begin = time.time()
107 |         for iteration in xrange(1, n_iter + 1):
108 |             iteration_lowerbound = 0
109 |             ind_s = np.random.permutation(range(N))
110 | 
111 |             for j in xrange(0, N_batch):
112 |                 indl = j * batch_size
113 |                 indr = (j+1) * batch_size
114 |                 ind = ind_s[indl:min(indr, N)]
115 |                 if indr > N:
116 |                     ind = np.concatenate((ind, ind_s[:(indr-N)]))
117 |                 batch = X[ind]
118 |                 lowerbound = updateParams(sess, batch, learning_rate)
119 |                 iteration_lowerbound += lowerbound * batch_size
120 | 
121 |             if verbose:
122 |                 end = time.time()
123 |                 print("Iteration %d, lowerbound = %.2f, time = %.2fs"
124 |                       % (iteration, iteration_lowerbound / N, end - begin)) 
125 |                 begin = end
126 |                 
127 |         
128 |     def eval_test_ll(sess, X, num_samples):
129 |         lowerbound = sess.run(variational_lowerbound(X, encoder, decoder, num_samples, X.shape[0], 0.0))
130 |         
131 |         return lowerbound
132 | 
133 |     def score(sess, X, num_samples = 100):
134 |         """
135 |         Computer lower bound on data, following the IWAE paper.
136 |         """
137 |         
138 |         begin = time.time()
139 |         print 'num. samples for eval:', num_samples
140 |         
141 |         # compute log_q
142 |         lowerbound_total = 0
143 |         num_data_test = X.shape[0]
144 |         if num_data_test % batch_size == 0:
145 |             num_batch = num_data_test / batch_size
146 |         else:
147 |             num_batch = num_data_test / batch_size + 1
148 |         
149 |         for i in xrange(num_batch):
150 |             indl = i*batch_size
151 |             indr = min((i+1)*batch_size, num_data_test)
152 |             minibatch = X[indl:indr]
153 |             lowerbound = eval_test_ll(sess, minibatch, num_samples)
154 |             lowerbound_total += lowerbound * (indr - indl)
155 |         
156 |         end = time.time()
157 |         time_test = end - begin
158 |         lowerbound_total = lowerbound_total / float(num_data_test)
159 | 
160 |         return lowerbound_total, time_test
161 |      
162 |     return fit, score                              
163 | 


--------------------------------------------------------------------------------
/network/stochastic_layer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from deterministic_layer import Deterministic_Layer
  4 | 
  5 | def construct_Stoc_Layer(layer_sizes, prob_type = 'gaussian', activation='softplus', prefix = 'q'):
  6 |     """
  7 |     Construct the stochastic layer.
  8 |     """
  9 |     D_layers = []
 10 |     if len(layer_sizes) > 2:
 11 |         for l in xrange(len(layer_sizes) - 2):
 12 |             D_layers.append(Deterministic_Layer(layer_sizes[l], layer_sizes[l+1], activation))
 13 |     
 14 |     if prob_type == 'gaussian':
 15 |         if prefix == 'p':
 16 |             activation = 'sigmoid'
 17 |         if prefix == 'q':
 18 |             activation = 'linear'
 19 |         Mu_layer = Deterministic_Layer(layer_sizes[-2], layer_sizes[-1], activation)
 20 |         Log_Sigma_layer = Deterministic_Layer(layer_sizes[-2], layer_sizes[-1], 'linear') 
 21 |         S_layer = Gaussian_Stoc_Layer(D_layers, Mu_layer, Log_Sigma_layer)
 22 |         
 23 |     if prob_type == 'bernoulli':
 24 |         Mu_layer = Deterministic_Layer(layer_sizes[-2], layer_sizes[-1], 'sigmoid')
 25 |         S_layer = Bernoulli_Stoc_Layer(D_layers, Mu_layer)
 26 |     
 27 |     if prob_type == 'bernoulli_sym':
 28 |         Mu_layer = Deterministic_Layer(layer_sizes[-2], layer_sizes[-1], 'sigmoid')
 29 |         S_layer = Bernoulli_sym_Stoc_Layer(D_layers, Mu_layer)
 30 |     
 31 |     if prob_type == 'softmax':
 32 |         Mu_layer = Deterministic_Layer(layer_sizes[-2], layer_sizes[-1], 'softmax')
 33 |         S_layer = Softmax_Stoc_Layer(D_layers, Mu_layer)
 34 |           
 35 |     return S_layer
 36 | 
 37 | class Stoc_Layer(object):
 38 | 
 39 |     def encode(self, input, sampling):
 40 |         raise NotImplementedError()
 41 |     
 42 |     def log_prob(self, output, params):
 43 |         raise NotImplementedError()
 44 |         
 45 |     def encode_and_log_prob(self, input, eval_output = None):
 46 |         # evaluate on eval_output if provided
 47 |         if eval_output is None:
 48 |             output, params = self.encode(input, sampling = True)
 49 |         else:
 50 |             _, params = self.encode(input, sampling = False)
 51 |             output = eval_output
 52 |         logprob = self.log_prob(output, params)
 53 |         return output, logprob
 54 |         
 55 |     def get_name(self):
 56 |         return 'stochastic_layer'
 57 |         
 58 | class Gaussian_Stoc_Layer(Stoc_Layer):
 59 | 
 60 |     def __init__(self, D_layers, Mu_layer, Log_Sigma_layer):
 61 |         self.D_layers = D_layers
 62 |         self.params = []
 63 |         for layer in self.D_layers:
 64 |             self.params = self.params + layer.params
 65 |         self.Mu_layer = Mu_layer
 66 |         self.Log_Sigma_layer = Log_Sigma_layer
 67 |         self.params = self.params + self.Mu_layer.params
 68 |         self.params = self.params + self.Log_Sigma_layer.params
 69 |         # output size
 70 |         self.output_size = self.Mu_layer.output_size
 71 |         
 72 |     def encode(self, input, sampling):
 73 |         output = input
 74 |         for layer in self.D_layers:
 75 |             output = layer.encode(output)
 76 |         # now compute mu and sigma
 77 |         Mu = self.Mu_layer.encode(output)
 78 |         Log_Sigma = 0.5 * self.Log_Sigma_layer.encode(output)
 79 |         if sampling:
 80 |             eps = tf.random_normal(Mu.get_shape())
 81 |             output = Mu + tf.exp(Log_Sigma) * eps
 82 |         else:
 83 |             output = Mu            
 84 |         return output, [Mu, Log_Sigma]
 85 |         
 86 |     def log_prob(self, output, params):
 87 |         (Mu, Log_Sigma) = params
 88 |         logprob = -(0.5 * np.log(2 * np.pi) + Log_Sigma) \
 89 |                   - 0.5 * ((output - Mu) / tf.exp(Log_Sigma)) ** 2
 90 |         return tf.reduce_sum(logprob, 1)      
 91 | 
 92 |     def get_prob_type(self):
 93 |         return 'gaussian'
 94 |             
 95 | class Bernoulli_Stoc_Layer(Stoc_Layer):
 96 | 
 97 |     def __init__(self, D_layers, Mu_layer):
 98 |         self.D_layers = D_layers
 99 |         self.params = []
100 |         for layer in self.D_layers:
101 |             self.params = self.params + layer.params
102 |         self.Mu_layer = Mu_layer
103 |         self.params = self.params + self.Mu_layer.params
104 |         # output size
105 |         self.output_size = self.Mu_layer.output_size
106 |         
107 |     def encode(self, input, sampling):
108 |         output = input
109 |         for layer in self.D_layers:
110 |             output = layer.encode(output)
111 |         # now compute mu and sigma
112 |         Mu = self.Mu_layer.encode(output)
113 |         if sampling:
114 |             shape = Mu.get_shape()
115 |             eps = tf.random_uniform(shape)
116 |             output = tf.select(eps - Mu <= 0, tf.ones(shape), tf.zeros(shape))
117 |         else:
118 |             output = Mu            
119 |         return output, Mu
120 |         
121 |     def log_prob(self, output, params):
122 |         Mu = params
123 |         logprob = output * tf.log(tf.clip_by_value(Mu, 1e-9, 1.0)) \
124 |                 + (1 - output) * tf.log(tf.clip_by_value(1.0 - Mu, 1e-9, 1.0))
125 |         return tf.reduce_sum(logprob, 1)      
126 |     
127 |     def get_prob_type(self):
128 |         return 'bernoulli'
129 | 
130 | class Bernoulli_sym_Stoc_Layer(Stoc_Layer):
131 | 
132 |     def __init__(self, D_layers, Mu_layer):
133 |         self.D_layers = D_layers
134 |         self.params = []
135 |         for layer in self.D_layers:
136 |             self.params = self.params + layer.params
137 |         self.Mu_layer = Mu_layer
138 |         self.params = self.params + self.Mu_layer.params
139 |         # output size
140 |         self.output_size = self.Mu_layer.output_size
141 |         
142 |     def encode(self, input, sampling):
143 |         output = input
144 |         for layer in self.D_layers:
145 |             output = layer.encode(output)
146 |         # now compute mu
147 |         Mu = self.Mu_layer.encode(output)
148 |         if sampling:
149 |             shape = Mu.get_shape()
150 |             eps = tf.random_uniform(shape)
151 |             output = tf.select(eps - Mu <= 0, tf.ones(shape), tf.zeros(shape))
152 |         else:
153 |             output = Mu
154 |         output = output * 2.0 - 1.0           
155 |         return output, Mu
156 |         
157 |     def log_prob(self, output, params):
158 |         Mu = params
159 |         z = (output + 1.0) / 2.0
160 |         logprob = z * tf.log(tf.clip_by_value(Mu, 1e-9, 1.0)) \
161 |                 + (1 - z) * tf.log(tf.clip_by_value(1.0 - Mu, 1e-9, 1.0))
162 |         return tf.reduce_sum(logprob, 1)      
163 |     
164 |     def get_prob_type(self):
165 |         return 'bernoulli_sym'
166 | 
167 | class Softmax_Stoc_Layer(Stoc_Layer):
168 | 
169 |     def __init__(self, D_layers, Logit_layer):
170 |         self.D_layers = D_layers
171 |         self.params = []
172 |         for layer in self.D_layers:
173 |             self.params = self.params + layer.params
174 |         self.Logit_layer = Logit_layer
175 |         self.params = self.params + self.Logit_layer.params
176 |         # output size
177 |         self.output_size = self.Logit_layer.output_size
178 |         
179 |     def encode(self, input, sampling = False):
180 |         output = input
181 |         for layer in self.D_layers:
182 |             output = layer.encode(output)
183 |         # now compute mu and sigma
184 |         Logit = self.Logit_layer.encode(output)
185 |         Logit = tf.log(tf.nn.softmax(Logit))
186 |         output = tf.exp(Logit)	# probability vector
187 |         #if sampling:
188 |         #    shape = output.get_shape()
189 |         #    eps = tf.random_uniform(shape)
190 |         #    diff = output - eps
191 |         #    max_out = tf.reduce_max(diff, 1, keep_dims = True)
192 |         #    output = tf.sign(diff - max_out) + 1.0
193 |         return output, Logit
194 |         
195 |     def log_prob(self, output, params):
196 |         Logit = params
197 |         logprob = output * Logit
198 |         return tf.reduce_sum(logprob, 1)      
199 |     
200 |     def get_prob_type(self):
201 |         return 'softmax'
202 |             
203 | 


--------------------------------------------------------------------------------