├── .gitignore ├── README.md ├── VAE.py ├── data.py ├── data ├── freyfaces.pkl ├── mnist.pkl.gz ├── news_ap.txt └── stopwords.txt ├── docs ├── 2dstructure.png ├── face.png ├── manifold.png ├── reconstruct.png └── vaes.png ├── main_face.py ├── main_mnist.py ├── main_text.py ├── model └── toy ├── updates.py └── utils_pg.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.npy 3 | *.pkl 4 | *.png 5 | data/* 6 | model/* 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ##Variational Auto-encoder 2 | 3 | run: python main_mnist.py 4 | 5 | python main_face.py 6 | 7 |

8 | 9 |

10 | 11 | 12 | ### Visualization 13 | - Reconstructions: 14 |

15 | 16 |

17 | - 2d latent space 18 |

19 | 20 |

21 | - Generations from positions 22 |

23 | 24 |

25 |

26 | 27 |

28 | - Visualizatin code from: https://jmetzen.github.io/2015-11-27/vae.html 29 | 30 | 31 | ### Reference 32 | - Kingma, Diederik P., and Max Welling. "[Auto-encoding variational bayes](http://arxiv.org/abs/1312.6114)." arXiv preprint arXiv:1312.6114 (2013). 33 | - Jan Hendrik Metzen's blog post: "[Variational Autoencoder in TensorFlow](https://jmetzen.github.io/2015-11-27/vae.html)" 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /VAE.py: -------------------------------------------------------------------------------- 1 | #pylint: skip-file 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | from utils_pg import * 6 | from updates import * 7 | 8 | class VAE(object): 9 | def __init__(self, in_size, out_size, hidden_size, latent_size, continuous, optimizer = "adadelta"): 10 | self.prefix = "VAE_" 11 | self.X = T.matrix("X") 12 | self.in_size = in_size 13 | self.out_size = out_size 14 | self.hidden_size = hidden_size 15 | self.latent_size = latent_size 16 | self.optimizer = optimizer 17 | self.continuous = continuous 18 | 19 | self.define_layers() 20 | self.define_train_test_funcs() 21 | 22 | def noiser(self, n): 23 | z = init_normal_weight((n, self.latent_size)) 24 | return floatX(z) 25 | 26 | def define_layers(self): 27 | self.params = [] 28 | 29 | layer_id = "1" 30 | self.W_xh = init_weights((self.in_size, self.hidden_size), self.prefix + "W_xh" + layer_id) 31 | self.b_xh = init_bias(self.hidden_size, self.prefix + "b_xh" + layer_id) 32 | 33 | layer_id = "2" 34 | self.W_hu = init_weights((self.hidden_size, self.latent_size), self.prefix + "W_hu" + layer_id) 35 | self.b_hu = init_bias(self.latent_size, self.prefix + "b_hu" + layer_id) 36 | self.W_hsigma = init_weights((self.hidden_size, self.latent_size), self.prefix + "W_hsigma" + layer_id) 37 | self.b_hsigma = init_bias(self.latent_size, self.prefix + "b_hsigma" + layer_id) 38 | 39 | layer_id = "3" 40 | self.W_zh = init_weights((self.latent_size, self.hidden_size), self.prefix + "W_zh" + layer_id) 41 | self.b_zh = init_bias(self.hidden_size, self.prefix + "b_zh" + layer_id) 42 | 43 | self.params += [self.W_xh, self.b_xh, self.W_hu, self.b_hu, self.W_hsigma, self.b_hsigma, \ 44 | self.W_zh, self.b_zh] 45 | 46 | layer_id = "4" 47 | if self.continuous: 48 | self.W_hyu = init_weights((self.hidden_size, self.out_size), self.prefix + "W_hyu" + layer_id) 49 | self.b_hyu = init_bias(self.out_size, self.prefix + "b_hyu" + layer_id) 50 | self.W_hysigma = init_weights((self.hidden_size, self.out_size), self.prefix + "W_hysigma" + layer_id) 51 | self.b_hysigma = init_bias(self.out_size, self.prefix + "b_hysigma" + layer_id) 52 | self.params += [self.W_hyu, self.b_hyu, self.W_hysigma, self.b_hysigma] 53 | else: 54 | self.W_hy = init_weights((self.hidden_size, self.out_size), self.prefix + "W_hy" + layer_id) 55 | self.b_hy = init_bias(self.out_size, self.prefix + "b_hy" + layer_id) 56 | self.params += [self.W_hy, self.b_hy] 57 | 58 | # encoder 59 | h_enc = T.nnet.relu(T.dot(self.X, self.W_xh) + self.b_xh) 60 | 61 | self.mu = T.dot(h_enc, self.W_hu) + self.b_hu 62 | log_var = T.dot(h_enc, self.W_hsigma) + self.b_hsigma 63 | self.var = T.exp(log_var) 64 | self.sigma = T.sqrt(self.var) 65 | 66 | srng = T.shared_randomstreams.RandomStreams(234) 67 | eps = srng.normal(self.mu.shape) 68 | self.z = self.mu + self.sigma * eps 69 | 70 | # decoder 71 | h_dec = T.nnet.relu(T.dot(self.z, self.W_zh) + self.b_zh) 72 | if self.continuous: 73 | self.reconstruct = T.dot(h_dec, self.W_hyu) + self.b_hyu 74 | self.log_var_dec = T.dot(h_dec, self.W_hysigma) + self.b_hysigma 75 | self.var_dec = T.exp(self.log_var_dec) 76 | else: 77 | self.reconstruct = T.nnet.sigmoid(T.dot(h_dec, self.W_hy) + self.b_hy) 78 | 79 | def multivariate_bernoulli(self, y_pred, y_true): 80 | return T.sum(y_true * T.log(y_pred) + (1 - y_true) * T.log(1 - y_pred), axis=1) 81 | 82 | def log_mvn(self, y_pred, y_true): 83 | p = y_true.shape[1] 84 | return T.sum(-0.5 * p * np.log(2 * np.pi) - 0.5 * self.log_var_dec - 0.5 * ((y_true - y_pred)**2 / self.var_dec), axis=1) 85 | 86 | def kld(self, mu, var): 87 | return 0.5 * T.sum(1 + T.log(var) - mu**2 - var, axis=1) 88 | 89 | def define_train_test_funcs(self): 90 | if self.continuous: 91 | cost = -T.mean((self.kld(self.mu, self.var) + self.log_mvn(self.reconstruct, self.X))) 92 | else: 93 | cost = -T.mean((self.kld(self.mu, self.var) + self.multivariate_bernoulli(self.reconstruct, self.X))) 94 | 95 | gparams = [] 96 | for param in self.params: 97 | #gparam = T.grad(cost, param) 98 | gparam = T.clip(T.grad(cost, param), -10, 10) 99 | gparams.append(gparam) 100 | 101 | lr = T.scalar("lr") 102 | optimizer = eval(self.optimizer) 103 | updates = optimizer(self.params, gparams, lr) 104 | 105 | self.train = theano.function(inputs = [self.X, lr], outputs = [cost, self.z], updates = updates) 106 | self.validate = theano.function(inputs = [self.X], outputs = [cost, self.reconstruct]) 107 | self.project = theano.function(inputs = [self.X], outputs = self.mu) 108 | self.generate = theano.function(inputs = [self.z], outputs = self.reconstruct) 109 | 110 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #pylint: skip-file 3 | import sys 4 | import os 5 | import numpy as np 6 | import theano 7 | import theano.tensor as T 8 | import cPickle, gzip 9 | import string 10 | 11 | curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) 12 | 13 | #data: http://deeplearning.net/data/mnist/mnist.pkl.gz 14 | def mnist(): 15 | f = gzip.open(curr_path + "/data/mnist.pkl.gz", "rb") 16 | train_set, valid_set, test_set = cPickle.load(f) 17 | f.close() 18 | return train_set, valid_set, test_set 19 | 20 | def freyface(): 21 | raw_faces = cPickle.load(open(curr_path + "/data/freyfaces.pkl", "rb")) 22 | mat_faces = np.zeros((len(raw_faces), len(raw_faces[0]))) 23 | for i in range(len(raw_faces)): # 1965 in total 24 | mat_faces[i, :] = np.asarray(raw_faces[i]) 25 | 26 | train_set = mat_faces[:1600, :] 27 | valid_set = mat_faces[1600:1800, :] 28 | test_set = mat_faces[1800:, :] 29 | return (train_set, ), (valid_set, ), (test_set, ) 30 | 31 | def batched_mnist(data_set, batch_size = 1): 32 | lst = [n for n in range(len(data_set[0]))] 33 | np.random.shuffle(lst) 34 | X = data_set[0][lst,] 35 | Y = data_set[1][lst] 36 | 37 | data_xy = {} 38 | batch_x = [] 39 | batch_y = [] 40 | batch_id = 0 41 | for i in xrange(len(X)): 42 | batch_x.append(X[i]) 43 | y = np.zeros((10), dtype = theano.config.floatX) 44 | y[Y[i]] = 1 45 | batch_y.append(y) 46 | if (len(batch_x) == batch_size) or (i == len(X) - 1): 47 | data_xy[batch_id] = [np.matrix(batch_x, dtype = theano.config.floatX), \ 48 | np.matrix(batch_y, dtype = theano.config.floatX)] 49 | batch_id += 1 50 | batch_x = [] 51 | batch_y = [] 52 | return data_xy 53 | 54 | def batched_freyface(data_set, batch_size = 1): 55 | lst = [n for n in range(len(data_set[0]))] 56 | np.random.shuffle(lst) 57 | data_xy = {} 58 | batch_x = [] 59 | X = data_set[0][lst,] 60 | batch_id = 0 61 | for i in xrange(len(X)): 62 | batch_x.append(X[i]) 63 | if (len(batch_x) == batch_size) or (i == len(X) - 1): 64 | data_xy[batch_id] = [np.matrix(batch_x, dtype = theano.config.floatX)] 65 | batch_id += 1 66 | batch_x = [] 67 | return data_xy 68 | 69 | #data: http://deeplearning.net/data/mnist/mnist.pkl.gz 70 | def shared_mnist(): 71 | def shared_dataset(data_xy): 72 | data_x, data_y = data_xy 73 | np_y = np.zeros((len(data_y), 10), dtype=theano.config.floatX) 74 | for i in xrange(len(data_y)): 75 | np_y[i, data_y[i]] = 1 76 | 77 | shared_x = theano.shared(np.asmatrix(data_x, dtype=theano.config.floatX)) 78 | shared_y = theano.shared(np.asmatrix(np_y, dtype=theano.config.floatX)) 79 | return shared_x, shared_y 80 | f = gzip.open(curr_path + "/data/mnist.pkl.gz", "rb") 81 | train_set, valid_set, test_set = cPickle.load(f) 82 | f.close() 83 | 84 | test_set_x, test_set_y = shared_dataset(test_set) 85 | valid_set_x, valid_set_y = shared_dataset(valid_set) 86 | train_set_x, train_set_y = shared_dataset(train_set) 87 | 88 | return [train_set_x, train_set_y], [valid_set_x, valid_set_y], [test_set_x, test_set_y] 89 | 90 | def load_stopwords(): 91 | stop_words = {} 92 | f = open("./data/stopwords.txt", "r") 93 | for line in f: 94 | line = line.strip('\n').strip() 95 | stop_words[line] = 1 96 | return stop_words 97 | 98 | def apnews(): 99 | dic = {} 100 | i2w = {} 101 | w2i = {} 102 | docs = {} 103 | stop_words = load_stopwords() 104 | 105 | f = open("./data/news_ap.txt", "r") 106 | doc_id = 0 107 | for line in f: 108 | line = line.strip('\n').lower() 109 | line = line.translate(None, string.punctuation) 110 | words = line.split() 111 | d = [] 112 | for w in words: 113 | if w in stop_words: 114 | continue 115 | d.append(w) 116 | if w in dic: 117 | dic[w] += 1 118 | else: 119 | dic[w] = 1 120 | w2i[w] = len(i2w) 121 | i2w[len(i2w)] = w 122 | 123 | docs[doc_id] = d 124 | doc_id += 1 125 | f.close() 126 | 127 | print len(docs), len(w2i), len(i2w), len(dic) 128 | doc_idx = [i for i in xrange(len(docs))] 129 | spliter = (int) (len(docs) / 10.0 * 9) 130 | train_idx = doc_idx[0:spliter] 131 | valid_idx = doc_idx[spliter:len(docs)] 132 | test_idx = valid_idx 133 | 134 | return train_idx, valid_idx, test_idx, [docs, dic, w2i, i2w] 135 | 136 | def batched_idx(lst, batch_size = 1): 137 | np.random.shuffle(lst) 138 | data_xy = {} 139 | batch_x = [] 140 | batch_id = 0 141 | for i in xrange(len(lst)): 142 | batch_x.append(lst[i]) 143 | if (len(batch_x) == batch_size) or (i == len(lst) - 1): 144 | data_xy[batch_id] = batch_x 145 | batch_id += 1 146 | batch_x = [] 147 | return data_xy 148 | 149 | def batched_news(x_idx, data): 150 | [docs, dic, w2i, i2w] = data 151 | X = np.zeros((len(x_idx), len(dic)), dtype = theano.config.floatX) 152 | for i in xrange(len(x_idx)): 153 | xi = x_idx[i] 154 | d = docs[xi] 155 | for w in d: 156 | X[i, w2i[w]] += 1 157 | 158 | for i in xrange(len(x_idx)): 159 | norm2 = np.linalg.norm(X[i,:]) 160 | if norm2 != 0: 161 | X[i,:] /= norm2 162 | 163 | return X 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /data/mnist.pkl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lipiji/variational-autoencoder-theano/36f9f93af8df6bcc4ae7bd680a33d8356b1e7dbc/data/mnist.pkl.gz -------------------------------------------------------------------------------- /data/stopwords.txt: -------------------------------------------------------------------------------- 1 | ` 2 | `` 3 | ^ 4 | ~ 5 | < 6 | = 7 | > 8 | | 9 | _ 10 | - 11 | , 12 | ; 13 | : 14 | ! 15 | ? 16 | / 17 | . 18 | ... 19 | ' 20 | '' 21 | " 22 | ( 23 | ) 24 | [ 25 | ] 26 | { 27 | } 28 | @ 29 | $ 30 | * 31 | \ 32 | & 33 | # 34 | % 35 | + 36 | a 37 | about 38 | above 39 | after 40 | again 41 | against 42 | all 43 | am 44 | an 45 | and 46 | any 47 | are 48 | aren't 49 | as 50 | at 51 | be 52 | because 53 | been 54 | before 55 | being 56 | below 57 | between 58 | both 59 | but 60 | by 61 | can't 62 | cannot 63 | could 64 | couldn't 65 | did 66 | didn't 67 | do 68 | does 69 | doesn't 70 | doing 71 | don't 72 | down 73 | during 74 | each 75 | few 76 | for 77 | from 78 | further 79 | had 80 | hadn't 81 | has 82 | hasn't 83 | have 84 | haven't 85 | having 86 | he 87 | he'd 88 | he'll 89 | he's 90 | her 91 | here 92 | here's 93 | hers 94 | herself 95 | him 96 | himself 97 | his 98 | how 99 | how's 100 | i 101 | i'd 102 | i'll 103 | i'm 104 | i've 105 | if 106 | in 107 | into 108 | is 109 | isn't 110 | it 111 | it's 112 | its 113 | itself 114 | let's 115 | me 116 | more 117 | most 118 | mustn't 119 | my 120 | myself 121 | no 122 | nor 123 | not 124 | of 125 | off 126 | on 127 | once 128 | only 129 | or 130 | other 131 | ought 132 | our 133 | ours 134 | ourselves 135 | out 136 | over 137 | own 138 | same 139 | shan't 140 | she 141 | she'd 142 | she'll 143 | she's 144 | should 145 | shouldn't 146 | so 147 | some 148 | such 149 | than 150 | that 151 | that's 152 | the 153 | their 154 | theirs 155 | them 156 | themselves 157 | then 158 | there 159 | there's 160 | these 161 | they 162 | they'd 163 | they'll 164 | they're 165 | they've 166 | this 167 | those 168 | through 169 | to 170 | too 171 | under 172 | until 173 | up 174 | very 175 | was 176 | wasn't 177 | we 178 | we'd 179 | we'll 180 | we're 181 | we've 182 | were 183 | weren't 184 | what 185 | what's 186 | when 187 | when's 188 | where 189 | where's 190 | which 191 | while 192 | who 193 | who's 194 | whom 195 | why 196 | why's 197 | with 198 | won't 199 | would 200 | wouldn't 201 | you 202 | you'd 203 | you'll 204 | you're 205 | you've 206 | your 207 | yours 208 | yourself 209 | yourselves -------------------------------------------------------------------------------- /docs/2dstructure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lipiji/variational-autoencoder-theano/36f9f93af8df6bcc4ae7bd680a33d8356b1e7dbc/docs/2dstructure.png -------------------------------------------------------------------------------- /docs/face.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lipiji/variational-autoencoder-theano/36f9f93af8df6bcc4ae7bd680a33d8356b1e7dbc/docs/face.png -------------------------------------------------------------------------------- /docs/manifold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lipiji/variational-autoencoder-theano/36f9f93af8df6bcc4ae7bd680a33d8356b1e7dbc/docs/manifold.png -------------------------------------------------------------------------------- /docs/reconstruct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lipiji/variational-autoencoder-theano/36f9f93af8df6bcc4ae7bd680a33d8356b1e7dbc/docs/reconstruct.png -------------------------------------------------------------------------------- /docs/vaes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lipiji/variational-autoencoder-theano/36f9f93af8df6bcc4ae7bd680a33d8356b1e7dbc/docs/vaes.png -------------------------------------------------------------------------------- /main_face.py: -------------------------------------------------------------------------------- 1 | #pylint: skip-file 2 | import time 3 | import sys 4 | import numpy as np 5 | import theano 6 | import theano.tensor as T 7 | from VAE import * 8 | import data 9 | import matplotlib.pyplot as plt 10 | 11 | use_gpu(0) 12 | 13 | lr = 0.001 14 | drop_rate = 0. 15 | batch_size = 128 16 | hidden_size = 500 17 | latent_size = 2 18 | # try: sgd, momentum, rmsprop, adagrad, adadelta, adam, nesterov_momentum 19 | optimizer = "adam" 20 | continuous = False 21 | 22 | train_set, valid_set, test_set = data.freyface() 23 | 24 | train_xy = data.batched_freyface(train_set, batch_size) 25 | dim_x = train_xy[0][0].shape[1] 26 | dim_y = dim_x 27 | print "#features = ", dim_x, "#labels = ", dim_y 28 | 29 | print "compiling..." 30 | model = VAE(dim_x, dim_x, hidden_size, latent_size, continuous, optimizer) 31 | 32 | print "training..." 33 | start = time.time() 34 | for i in xrange(100): 35 | error = 0.0 36 | in_start = time.time() 37 | for batch_id, xy in train_xy.items(): 38 | X = xy[0] 39 | cost, z = model.train(X, lr) 40 | error += cost 41 | in_time = time.time() - in_start 42 | 43 | error /= len(train_xy); 44 | print "Iter = " + str(i) + ", Loss = " + str(error) + ", Time = " + str(in_time) 45 | 46 | print "training finished. Time = " + str(time.time() - start) 47 | 48 | print "save model..." 49 | save_model("./model/vae_face.model", model) 50 | 51 | '''-------------Visualization------------------''' 52 | # code from: https://jmetzen.github.io/2015-11-27/vae.html 53 | 54 | load_model("./model/vae_face.model", model) 55 | 56 | print "validation.." 57 | valid_xy = data.batched_freyface(valid_set, batch_size) 58 | error = 0 59 | for batch_id, xy in valid_xy.items(): 60 | X = xy[0] 61 | cost, y = model.validate(X) 62 | error += cost 63 | print "Loss = " + str(error / len(valid_xy)) 64 | 65 | plt.figure(figsize=(8, 12)) 66 | for i in range(5): 67 | plt.subplot(5, 2, 2*i + 1) 68 | plt.imshow(X[i].reshape(28, 20), vmin=0, vmax=1, cmap='gray_r') 69 | plt.title("Test input") 70 | plt.colorbar() 71 | plt.subplot(5, 2, 2*i + 2) 72 | plt.imshow(y[i].reshape(28, 20), vmin=0, vmax=1, cmap='gray_r') 73 | plt.title("Reconstruction") 74 | plt.colorbar() 75 | plt.tight_layout() 76 | plt.savefig("reconstruct.png", bbox_inches="tight") 77 | plt.show() 78 | 79 | ## manifold 80 | if latent_size == 2: 81 | test_xy = data.batched_freyface(test_set, 160) 82 | X = test_xy[0][0] 83 | 84 | mu = np.array(model.project(X)) 85 | 86 | plt.figure(figsize=(8, 6)) 87 | plt.scatter(mu[:, 0], mu[:, 1], c="r") 88 | plt.savefig("2dstructure.png", bbox_inches="tight") 89 | plt.show() 90 | 91 | ################# 92 | 93 | nx = ny = 20 94 | v = 3 95 | x_values = np.linspace(-v, v, nx) 96 | y_values = np.linspace(-v, v, ny) 97 | canvas = np.empty((28*ny, 20*nx)) 98 | for i, yi in enumerate(x_values): 99 | for j, xi in enumerate(y_values): 100 | z = np.array([[xi, yi]], dtype=theano.config.floatX) 101 | y = model.generate(z) 102 | canvas[(nx-i-1)*28:(nx-i)*28, j*20:(j+1)*20] = y.reshape(28, 20) 103 | 104 | fit = plt.figure(figsize=(8, 10)) 105 | Xi, Yi = np.meshgrid(x_values, y_values) 106 | plt.imshow(canvas, origin="upper", cmap='gray_r') 107 | plt.tight_layout() 108 | plt.savefig("manifold.png", bbox_inches="tight") 109 | plt.show() 110 | 111 | -------------------------------------------------------------------------------- /main_mnist.py: -------------------------------------------------------------------------------- 1 | #pylint: skip-file 2 | import os 3 | cudaid = 0 4 | os.environ["THEANO_FLAGS"] = "device=cuda" + str(cudaid) 5 | 6 | import time 7 | import sys 8 | import numpy as np 9 | import theano 10 | import theano.tensor as T 11 | from VAE import * 12 | import data 13 | import matplotlib.pyplot as plt 14 | 15 | 16 | lr = 0.001 17 | drop_rate = 0. 18 | batch_size = 128 19 | hidden_size = 500 20 | latent_size = 2 21 | # try: sgd, momentum, rmsprop, adagrad, adadelta, adam, nesterov_momentum 22 | optimizer = "adam" 23 | continuous = False 24 | 25 | train_set, valid_set, test_set = data.mnist() 26 | 27 | train_xy = data.batched_mnist(train_set, batch_size) 28 | dim_x = train_xy[0][0].shape[1] 29 | dim_y = train_xy[0][1].shape[1] 30 | print "#features = ", dim_x, "#labels = ", dim_y 31 | 32 | print "compiling..." 33 | model = VAE(dim_x, dim_x, hidden_size, latent_size, continuous, optimizer) 34 | 35 | print "training..." 36 | start = time.time() 37 | for i in xrange(50): 38 | error = 0.0 39 | in_start = time.time() 40 | for batch_id, xy in train_xy.items(): 41 | X = xy[0] 42 | cost, z = model.train(X, lr) 43 | error += cost 44 | in_time = time.time() - in_start 45 | 46 | error /= len(train_xy); 47 | print "Iter = " + str(i) + ", Loss = " + str(error) + ", Time = " + str(in_time) 48 | 49 | print "training finished. Time = " + str(time.time() - start) 50 | 51 | print "save model..." 52 | save_model("./model/vae_mnist.model", model) 53 | 54 | 55 | '''-------------Visualization------------------''' 56 | # code from: https://jmetzen.github.io/2015-11-27/vae.html 57 | 58 | load_model("./model/vae_mnist.model", model) 59 | 60 | print "validation.." 61 | valid_xy = data.batched_mnist(valid_set, batch_size) 62 | error = 0 63 | for batch_id, xy in valid_xy.items(): 64 | X = xy[0] 65 | cost, y = model.validate(X) 66 | error += cost 67 | print "Loss = " + str(error / len(valid_xy)) 68 | 69 | plt.figure(figsize=(8, 12)) 70 | for i in range(5): 71 | plt.subplot(5, 2, 2*i + 1) 72 | plt.imshow(X[i].reshape(28, 28), vmin=0, vmax=1) 73 | plt.title("Test input") 74 | plt.colorbar() 75 | plt.subplot(5, 2, 2*i + 2) 76 | plt.imshow(y[i].reshape(28, 28), vmin=0, vmax=1) 77 | plt.title("Reconstruction") 78 | plt.colorbar() 79 | plt.tight_layout() 80 | plt.savefig("reconstruct.png", bbox_inches="tight") 81 | plt.show() 82 | 83 | ## manifold 84 | if latent_size == 2: 85 | test_xy = data.batched_mnist(test_set, 5000) 86 | X = test_xy[0][0] 87 | 88 | mu = np.array(model.project(X)) 89 | 90 | plt.figure(figsize=(8, 6)) 91 | plt.scatter(mu[:, 0], mu[:, 1], c=np.argmax(np.array(test_xy[0][1]), 1)) 92 | plt.colorbar() 93 | plt.savefig("2dstructure.png", bbox_inches="tight") 94 | plt.show() 95 | 96 | '''--------------------------''' 97 | nx = ny = 20 98 | x_values = np.linspace(-3, 3, nx) 99 | y_values = np.linspace(-3, 3, ny) 100 | canvas = np.empty((28*ny, 28*nx)) 101 | for i, yi in enumerate(x_values): 102 | for j, xi in enumerate(y_values): 103 | z = np.array([[xi, yi]], dtype=theano.config.floatX) 104 | y = model.generate(z) 105 | canvas[(nx-i-1)*28:(nx-i)*28, j*28:(j+1)*28] = y.reshape(28, 28) 106 | 107 | fit = plt.figure(figsize=(8, 10)) 108 | Xi, Yi = np.meshgrid(x_values, y_values) 109 | plt.imshow(canvas, origin="upper") 110 | plt.tight_layout() 111 | plt.savefig("manifold.png", bbox_inches="tight") 112 | plt.show() 113 | 114 | -------------------------------------------------------------------------------- /main_text.py: -------------------------------------------------------------------------------- 1 | #pylint: skip-file 2 | import os 3 | cudaid = 2 4 | os.environ["THEANO_FLAGS"] = "device=cuda" + str(cudaid) 5 | 6 | import time 7 | import sys 8 | import numpy as np 9 | import theano 10 | import theano.tensor as T 11 | from VAE import * 12 | import data 13 | import matplotlib.pyplot as plt 14 | 15 | #use_gpu(2) 16 | 17 | lr = 0.001 18 | drop_rate = 0. 19 | batch_size = 20 20 | hidden_size = 500 21 | latent_size = 50 22 | # try: sgd, momentum, rmsprop, adagrad, adadelta, adam, nesterov_momentum 23 | optimizer = "adam" 24 | continuous = False 25 | 26 | train_idx, valid_idx, test_idx, other_data = data.apnews() 27 | [docs, dic, w2i, i2w] = other_data 28 | 29 | dim_x = len(dic) 30 | dim_y = dim_x 31 | print "#features = ", dim_x, "#labels = ", dim_y 32 | 33 | print "compiling..." 34 | model = VAE(dim_x, dim_x, hidden_size, latent_size, continuous, optimizer) 35 | 36 | print "training..." 37 | start = time.time() 38 | for i in xrange(100): 39 | train_xy = data.batched_idx(train_idx, batch_size) 40 | error = 0.0 41 | in_start = time.time() 42 | for batch_id, x_idx in train_xy.items(): 43 | X = data.batched_news(x_idx, other_data) 44 | cost, z = model.train(X, lr) 45 | error += cost 46 | #print i, batch_id, "/", len(train_xy), cost 47 | in_time = time.time() - in_start 48 | 49 | error /= len(train_xy); 50 | print "Iter = " + str(i) + ", Loss = " + str(error) + ", Time = " + str(in_time) 51 | 52 | print "training finished. Time = " + str(time.time() - start) 53 | 54 | print "save model..." 55 | save_model("./model/vae_text.model", model) 56 | 57 | print "lode model..." 58 | load_model("./model/vae_text.model", model) 59 | 60 | print "validation.." 61 | valid_xy = data.batched_idx(valid_idx, batch_size) 62 | error = 0 63 | for batch_id, x_idx in valid_xy.items(): 64 | X = data.batched_news(x_idx, other_data) 65 | cost, y = model.validate(X) 66 | error += cost 67 | print "Loss = " + str(error / len(valid_xy)) 68 | 69 | top_w = 20 70 | ## manifold 71 | if latent_size == 2: 72 | test_xy = data.batched_idx(test_idx, 1000) 73 | x_idx = test_xy[0] 74 | X = data.batched_news(x_idx, other_data) 75 | 76 | mu = np.array(model.project(X)) 77 | 78 | plt.figure(figsize=(8, 6)) 79 | plt.scatter(mu[:, 0], mu[:, 1], c="r") 80 | #plt.savefig("2dstructure.png", bbox_inches="tight") 81 | plt.show() 82 | 83 | nx = ny = 20 84 | v = 100 85 | x_values = np.linspace(-v, v, nx) 86 | y_values = np.linspace(-v, v, ny) 87 | canvas = np.empty((28*ny, 20*nx)) 88 | for i, xi in enumerate(x_values): 89 | for j, yi in enumerate(y_values): 90 | z = np.array([[xi, yi]], dtype=theano.config.floatX) 91 | y = model.generate(z)[0,:] 92 | ind = np.argsort(-y) 93 | print xi, yi, 94 | for k in xrange(top_w): 95 | print i2w[ind[k]], 96 | print "\n" 97 | else: 98 | sampels = 32 99 | for i in xrange(sampels): 100 | z = model.noiser(latent_size) 101 | y = model.generate(z)[0,:] 102 | ind = np.argsort(-y) 103 | for k in xrange(top_w): 104 | print i2w[ind[k]], 105 | print "\n" 106 | 107 | -------------------------------------------------------------------------------- /model/toy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lipiji/variational-autoencoder-theano/36f9f93af8df6bcc4ae7bd680a33d8356b1e7dbc/model/toy -------------------------------------------------------------------------------- /updates.py: -------------------------------------------------------------------------------- 1 | #pylint: skip-file 2 | #https://github.com/Lasagne/Lasagne/blob/master/lasagne/updates.py 3 | import numpy as np 4 | import theano 5 | import theano.tensor as T 6 | 7 | ''' 8 | def clip_norm(g, c, n): 9 | if c > 0: 10 | g = K.switch(n >= c, g * c / n, g) 11 | return g 12 | 13 | def clip(x, min_value, max_value): 14 | if max_value < min_value: 15 | max_value = min_value 16 | return T.clip(x, min_value, max_value) 17 | ''' 18 | 19 | def sgd(params, gparams, learning_rate = 0.1): 20 | updates = [] 21 | for p, g in zip(params, gparams): 22 | updates.append((p, p - learning_rate * g)) 23 | return updates 24 | 25 | def momentum(params, gparams, learning_rate = 0.1, momentum = 0.9): 26 | updates = [] 27 | for p, g in zip(params, gparams): 28 | v = p.get_value(borrow = True) 29 | velocity = theano.shared(np.zeros(v.shape, dtype = v.dtype), broadcastable = p.broadcastable) 30 | x = momentum * velocity - learning_rate * g 31 | updates.append((velocity, x)) 32 | updates.append((p, p + x)) 33 | return updates 34 | 35 | def nesterov_momentum(params, gparams, learning_rate = 0.1, momentum = 0.9): 36 | updates = [] 37 | for p, g in zip(params, gparams): 38 | v = p.get_value(borrow = True) 39 | velocity = theano.shared(np.zeros(v.shape, dtype = v.dtype), broadcastable = p.broadcastable) 40 | x = momentum * velocity - learning_rate * g 41 | updates.append((velocity, x)) 42 | inc = momentum * x - learning_rate * g 43 | updates.append((p, p + inc)) 44 | return updates 45 | 46 | def rmsprop(params, gparams, learning_rate = 0.001, rho = 0.9, epsilon = 1e-6): 47 | updates = [] 48 | for p, g in zip(params, gparams): 49 | v = p.get_value(borrow = True) 50 | acc = theano.shared(np.zeros(v.shape, dtype = v.dtype), broadcastable = p.broadcastable) 51 | acc_new = rho * acc + (1 - rho) * g ** 2 52 | updates.append((acc, acc_new)) 53 | updates.append((p, p - learning_rate * g / T.sqrt(acc_new + epsilon))) 54 | return updates 55 | 56 | def adagrad(params, gparams, learning_rate = 0.01, epsilon = 1e-6): 57 | updates = [] 58 | for p, g in zip(params, gparams): 59 | v = p.get_value(borrow = True) 60 | acc = theano.shared(np.zeros(v.shape, dtype = v.dtype), broadcastable = p.broadcastable) 61 | acc_new = acc + g ** 2 62 | updates.append((acc, acc_new)) 63 | updates.append((p, p - learning_rate * g / T.sqrt(acc_new + epsilon))) 64 | return updates 65 | 66 | def adadelta(params, gparams, learning_rate = 1.0, rho = 0.95, epsilon = 1e-6): 67 | updates = [] 68 | for p, g in zip(params, gparams): 69 | v = p.get_value(borrow = True) 70 | acc = theano.shared(np.zeros(v.shape, dtype = v.dtype), broadcastable = p.broadcastable) 71 | delta_acc = theano.shared(np.zeros(v.shape, dtype = v.dtype), broadcastable = p.broadcastable) 72 | 73 | acc_new = rho * acc + (1 - rho) * g ** 2 74 | updates.append((acc, acc_new)) 75 | 76 | update = (g * T.sqrt(delta_acc + epsilon) / T.sqrt(acc_new + epsilon)) 77 | updates.append((p, p - learning_rate * update)) 78 | 79 | delta_acc_new = rho * delta_acc + (1 - rho) * update ** 2 80 | updates.append((delta_acc, delta_acc_new)) 81 | return updates 82 | 83 | def adam(params, gparams, learning_rate = 0.001, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8): 84 | updates = [] 85 | t_pre = theano.shared(np.asarray(.0, dtype=theano.config.floatX)) 86 | t = t_pre + 1 87 | a_t = learning_rate * T.sqrt(1 - beta2 ** t) / (1 - beta1 ** t) 88 | for p, g in zip(params, gparams): 89 | v = p.get_value(borrow = True) 90 | m_pre = theano.shared(np.zeros(v.shape, dtype = v.dtype), broadcastable = p.broadcastable) 91 | v_pre = theano.shared(np.zeros(v.shape, dtype = v.dtype), broadcastable = p.broadcastable) 92 | 93 | m_t = beta1 * m_pre + (1 - beta1) * g 94 | v_t = beta2 * v_pre + (1 - beta2) * g ** 2 95 | step = a_t * m_t / (T.sqrt(v_t) + epsilon) 96 | 97 | updates.append((m_pre, m_t)) 98 | updates.append((v_pre, v_t)) 99 | updates.append((p, p - step)) 100 | 101 | updates.append((t_pre, t)) 102 | return updates 103 | -------------------------------------------------------------------------------- /utils_pg.py: -------------------------------------------------------------------------------- 1 | #pylint: skip-file 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | import cPickle as pickle 6 | 7 | def floatX(X): 8 | return np.asarray(X, dtype=theano.config.floatX) 9 | 10 | def init_normal_weight(shape, scale=0.01): 11 | return np.random.normal(loc=0.0, scale=scale, size=shape) 12 | 13 | def init_weights(shape, name, sample = "xavier"): 14 | if sample == "uniform": 15 | values = np.random.uniform(-0.08, 0.08, shape) 16 | elif sample == "xavier": 17 | values = np.random.uniform(-np.sqrt(6. / (shape[0] + shape[1])), np.sqrt(6. / (shape[0] + shape[1])), shape) 18 | elif sample == "ortho": 19 | W = np.random.randn(shape[0], shape[0]) 20 | u, s, v = np.linalg.svd(W) 21 | values = u 22 | else: 23 | raise ValueError("Unsupported initialization scheme: %s" % sample) 24 | 25 | return theano.shared(floatX(values), name) 26 | 27 | def init_gradws(shape, name): 28 | return theano.shared(floatX(np.zeros(shape)), name) 29 | 30 | def init_bias(size, name): 31 | return theano.shared(floatX(np.zeros((size,))), name) 32 | 33 | def init_mat(mat, name): 34 | return theano.shared(floatX(mat), name) 35 | 36 | def save_model(f, model): 37 | ps = {} 38 | for p in model.params: 39 | ps[p.name] = p.get_value() 40 | pickle.dump(ps, open(f, "wb")) 41 | 42 | def load_model(f, model): 43 | ps = pickle.load(open(f, "rb")) 44 | for p in model.params: 45 | p.set_value(ps[p.name]) 46 | return model 47 | --------------------------------------------------------------------------------