├── .gitignore ├── images ├── morphing_0.jpeg ├── morphing_43_0.jpeg └── morphing_43_9.jpeg ├── vaegan ├── data │ ├── __init__.py │ └── image.py ├── utils │ ├── __init__.py │ ├── utils.py │ └── progress.py ├── __init__.py ├── optimizers │ ├── __init__.py │ ├── adagrad.py │ ├── rmsprop.py │ ├── adam.py │ └── opt_proc.py ├── functions.py ├── transform.py └── vaegan.py ├── examples ├── configures │ ├── lfw_morph.json │ └── lfw.json ├── morphing.py ├── get_lfw.py ├── train.py └── operate.py ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *~ 3 | -------------------------------------------------------------------------------- /images/morphing_0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anitan0925/vaegan/HEAD/images/morphing_0.jpeg -------------------------------------------------------------------------------- /images/morphing_43_0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anitan0925/vaegan/HEAD/images/morphing_43_0.jpeg -------------------------------------------------------------------------------- /images/morphing_43_9.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anitan0925/vaegan/HEAD/images/morphing_43_9.jpeg -------------------------------------------------------------------------------- /vaegan/data/__init__.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | from image import load_data, preprocess, postprocess 4 | -------------------------------------------------------------------------------- /vaegan/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | from utils import minibatches 4 | from progress import Progress 5 | -------------------------------------------------------------------------------- /vaegan/__init__.py: -------------------------------------------------------------------------------- 1 | # config : utf-8 2 | 3 | from transform import image_grid, Morph, Reconstruct, Operate 4 | from vaegan import VAEGAN 5 | -------------------------------------------------------------------------------- /vaegan/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | #coding : utf-8 2 | 3 | from adam import Adam 4 | from adagrad import AdaGrad 5 | from rmsprop import RMSProp 6 | 7 | -------------------------------------------------------------------------------- /examples/configures/lfw_morph.json: -------------------------------------------------------------------------------- 1 | { 2 | "test_data_dir" : "./test_lfw", 3 | "param_path" : "./tmp_params/param.model", 4 | "box" : [50,50,200,200], 5 | "output_dir" : "./tmp" 6 | } 7 | -------------------------------------------------------------------------------- /examples/configures/lfw.json: -------------------------------------------------------------------------------- 1 | { 2 | "pretrain" : true, 3 | "train_data_dir" : "./train_lfw", 4 | "test_data_dir" : "./test_lfw", 5 | "box" : [50,50,200,200], 6 | "output_params_dir" : "./tmp_params", 7 | "output_dir" : "./tmp" 8 | } 9 | -------------------------------------------------------------------------------- /vaegan/utils/utils.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | import numpy as np 4 | 5 | def minibatches( minibatch_size, X, Y=None, shuffle_f=False ): 6 | indices = np.arange( len(X) ) 7 | if shuffle_f: 8 | np.random.shuffle( indices ) 9 | 10 | for start in range( 0, len(X), minibatch_size ): 11 | minibatch_indices = indices[ start : start+minibatch_size ] 12 | if Y: 13 | yield X[minibatch_indices], Y[minibatch_indices] 14 | else: 15 | yield X[minibatch_indices] 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Atsushi Nitanda 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /vaegan/data/image.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | import os 4 | import numpy as np 5 | import theano 6 | from PIL import Image 7 | 8 | def load_data( data_dir, shape=None, box=None, restrict=-1 ): 9 | img_files = [] 10 | for root,dirs,files in os.walk( data_dir ): 11 | if len(files) == 0: 12 | continue 13 | img_files.extend( map( lambda f : os.path.join(root,f), files ) ) 14 | 15 | arrs = [] 16 | if restrict < 0: 17 | data_size = len(img_files) 18 | else: 19 | data_size = restrict 20 | 21 | for f_name in img_files[:data_size]: 22 | img = Image.open(f_name) 23 | if not box == None: 24 | img = img.crop(box) 25 | if not shape == None: 26 | img = img.resize( shape ) 27 | arr = np.asarray( img ).astype( theano.config.floatX ) 28 | arr = preprocess( arr ) 29 | arrs.append( arr ) 30 | 31 | arrs = np.asarray( arrs ).astype( theano.config.floatX ) 32 | 33 | return arrs 34 | 35 | def preprocess( arr ): 36 | arr = np.array( arr ) # copy 37 | arr /= 127.5 38 | arr -= 1. 39 | arr = arr.transpose(2,0,1) 40 | return arr 41 | 42 | def postprocess( arr ): 43 | arr = np.array( arr ) # copy 44 | arr = arr.transpose(1,2,0) 45 | arr += 1. 46 | arr *= 127.5 47 | return arr 48 | 49 | -------------------------------------------------------------------------------- /vaegan/utils/progress.py: -------------------------------------------------------------------------------- 1 | #coding : utf-8 2 | 3 | import sys 4 | 5 | class Progress( object ): 6 | def __init__( self, max_count, size ): 7 | if size <= max_count: 8 | self.__size = size 9 | else: 10 | self.__size = max_count 11 | 12 | self.__max_count = max_count 13 | self.__sep = int(max_count/size) + 1 14 | self.__count = 0 15 | 16 | def prog( self ): 17 | if int( self.__count % self.__sep ) != 0: 18 | self.__count += 1 19 | return 20 | p = int( self.__count / self.__sep ) + 1 21 | s = u'|' + u'=' * p + u' ' * (self.__size-p) + u'| %d/%d' \ 22 | % (self.__count,self.__max_count) 23 | sys.stdout.write("\r%s" % s) 24 | sys.stdout.flush() 25 | 26 | self.__count += 1 27 | 28 | def end( self ): 29 | self.__count = 0 30 | p = self.__size 31 | s = u'|' + u'=' * p + u' ' * (self.__size-p) + u'| %d/%d' \ 32 | % (self.__max_count ,self.__max_count) 33 | sys.stdout.write("\r%s" % s) 34 | sys.stdout.flush() 35 | print >>sys.stdout 36 | 37 | if __name__ == '__main__': 38 | a = range(1000000) 39 | prog = Progress(len(a),50) 40 | 41 | for e in range(10): 42 | for i,v in enumerate(a): 43 | prog.prog() 44 | prog.end() 45 | -------------------------------------------------------------------------------- /examples/morphing.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | import sys 4 | import os 5 | module_path = os.path.abspath( u'..' ) 6 | sys.path.append( module_path ) 7 | 8 | import json 9 | import cPickle 10 | import numpy as np 11 | import theano 12 | from vaegan.data import load_data, postprocess 13 | from vaegan import Morph 14 | from vaegan import VAEGAN 15 | 16 | def load_json( infile_name ): 17 | fin = open( infile_name, u'r' ) 18 | conf = json.load( fin ) 19 | fin.close() 20 | return conf 21 | 22 | conf = load_json( sys.argv[1] ) 23 | 24 | param_path = conf[u'param_path'] 25 | data_dir = conf[u'test_data_dir'] 26 | output_dir = conf[u'output_dir'] 27 | box = conf[u'box'] # cropped region 28 | shape = (64, 64) 29 | 30 | n_samples = 10 31 | n_steps = 10 32 | 33 | seed = 1234 34 | np.random.seed( seed ) 35 | 36 | # Build VAEGAN 37 | vaegan = VAEGAN( seed ) 38 | 39 | try: 40 | fin = open( param_path, u'rb' ) 41 | params = cPickle.load( fin ) 42 | params = [ p.astype( theano.config.floatX ) for p in params ] 43 | fin.close() 44 | except: 45 | print >>sys.stderr, u'Cannot open file:', param_path 46 | sys.exit(-1) 47 | 48 | vaegan.load_params( params ) 49 | 50 | data = load_data( data_dir, shape, box ) 51 | 52 | indices = np.arange( len(data) ) 53 | np.random.shuffle( indices ) 54 | sources = data[ indices[:n_samples] ] 55 | np.random.shuffle( indices ) 56 | sinks = data[ indices[:n_samples] ] 57 | 58 | form_type = 1 59 | morph = Morph( vaegan, form_type, sources, sinks, output_dir, n_steps=10 ) 60 | morph() 61 | -------------------------------------------------------------------------------- /examples/get_lfw.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | import sys 4 | import os 5 | import glob 6 | import random 7 | import shutil 8 | 9 | source = u'http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz' 10 | filename = u'lfw-deepfunneled.tgz' 11 | org_file = u'lfw-deepfunneled' 12 | train_dir = u'train_lfw' 13 | test_dir = u'test_lfw' 14 | train_ratio = 0.9 15 | 16 | if not os.path.exists( filename ): 17 | if sys.version_info[0] == 2: 18 | from urllib import urlretrieve 19 | else: 20 | from urllib.request import urlretrieve 21 | urlretrieve( source, filename ) 22 | 23 | if not os.path.exists( org_file ): 24 | import tarfile 25 | tar = tarfile.open( filename ) 26 | tar.extractall() 27 | tar.close() 28 | 29 | file_list = [ os.path.relpath(x, org_file) for x in glob.glob( u'%s/*' % org_file ) ] 30 | data_size = len(file_list) 31 | train_size = int( train_ratio * data_size ) 32 | test_size = data_size - train_size 33 | print u'n_train, n_test = %d, %d' % (train_size, test_size) 34 | random.shuffle( file_list ) 35 | 36 | train_list = file_list[:train_size] 37 | test_list = file_list[train_size:] 38 | 39 | if not os.path.isdir( train_dir ): 40 | os.makedirs( train_dir ) 41 | 42 | if not os.path.isdir( test_dir ): 43 | os.makedirs( test_dir ) 44 | 45 | map( lambda p : shutil.copytree( os.path.join( org_file, p ), 46 | os.path.join( train_dir, p ) ), train_list ) 47 | 48 | map( lambda p : shutil.copytree( os.path.join( org_file, p ), 49 | os.path.join( test_dir, p ) ), test_list ) 50 | 51 | -------------------------------------------------------------------------------- /examples/train.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | import sys 4 | import os 5 | module_path = os.path.abspath( u'..' ) 6 | sys.path.append( module_path ) 7 | 8 | import json 9 | import numpy as np 10 | import theano 11 | from vaegan.data import load_data 12 | from vaegan import Morph, Reconstruct 13 | from vaegan import VAEGAN 14 | from vaegan.optimizers import Adam 15 | 16 | def load_json( infile_name ): 17 | fin = open( infile_name, u'r' ) 18 | conf = json.load( fin ) 19 | fin.close() 20 | return conf 21 | 22 | conf = load_json( sys.argv[1] ) 23 | 24 | output_params_dir = conf[u'output_params_dir'] 25 | output_dir = conf[u'output_dir'] 26 | train_data_dir = conf[u'train_data_dir'] 27 | test_data_dir = conf[u'test_data_dir'] 28 | box = conf[u'box'] # cropped region 29 | shape = (64, 64) 30 | 31 | pretrain = True 32 | seed = 1234 33 | np.random.seed( seed ) 34 | 35 | # Build VAEGAN 36 | vaegan = VAEGAN( seed ) 37 | 38 | train_data = load_data( train_data_dir, shape, box ) 39 | test_data = load_data( test_data_dir, shape, box ) 40 | 41 | # Set transformation 42 | indices = np.arange( len(train_data) ) 43 | grid_shape = (10,10) 44 | np.random.shuffle( indices ) 45 | indices = indices[:grid_shape[0]*grid_shape[1]] 46 | form_type = 0 # Form of output images. 47 | morph = Morph( vaegan, form_type, train_data[indices], 48 | output_dir=output_dir, shape=grid_shape ) 49 | 50 | # Train 51 | if pretrain: 52 | print u'#Early phase' 53 | solver = Adam( vaegan, eta=1e-3, beta1=0.9, minibatch_size=64 ) 54 | solver.run( train_data, T=20, params_dir=output_params_dir, callback=morph, 55 | Xt=test_data ) 56 | 57 | print u'#Final phase' 58 | vaegan.set_phase(1) 59 | solver = Adam( vaegan, eta=1e-3, beta1=0.9, minibatch_size=64 ) 60 | solver.run( train_data, T=200, params_dir=output_params_dir, callback=morph, 61 | Xt=test_data ) 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VAEGAN 2 | This is a code for generating images with [VAEGAN](http://arxiv.org/abs/1512.09300) (variational autoencoder + generative adversarial net). 3 | Its original code is [1]. 4 | Our implementation is done using [Theano](https://github.com/Theano/Theano)(>=0.8.0rc1). 5 | 6 | ## Demonstoration 7 | Download [Labeled Faces in the Wild](http://vis-www.cs.umass.edu/lfw/) dataset, split it into train and test datasets, and train VAEGAN. 8 | 9 | ``` 10 | python get_lfw.py 11 | python train.py configures/lfw.json 12 | ``` 13 | 14 | To monitor the progress of training, it morph face images reconstructed on the training dataset (Fig. 1) to randomly generated face images (Fig. 2). 15 | 16 | ![Figure 1. Reconstructed images at the last epoch](images/morphing_43_0.jpeg) 17 | 18 | ![Figure 2. Randomly generated images at the last epoch](images/morphing_43_9.jpeg) 19 | 20 | Using the following command after training, we can make VAEGAN generate images that represent the morphing (Fig. 3) between randomly choosen two images from the test dataset (left and right end). 21 | 22 | ``` 23 | python morphing.py configures/lfw_morph.json 24 | ``` 25 | 26 | ![Figure 3. Morphing faces.](images/morphing_0.jpeg) 27 | 28 | 29 | ## Optimization tips 30 | 31 | - The optimization procedure is composed of two phases. In early phase, we pretrain VAE using pixel-wise error and discriminator separately. 32 | In final phase, we train overall VAEGAN model as in [1]. 33 | 34 | - We use the ADAM to train models in our example (eta=1e-3, beta1=0.9, beta2=0.999, epsilon=1e-8). 35 | 36 | - Depending on the probabilities of real/fake as real/fake on a minibatch at an each iteration, we change the cost that should be optimized as done in [2]. 37 | 38 | ## References 39 | [1] https://github.com/andersbll/autoencoding_beyond_pixels 40 | [2] https://github.com/skaae/vaeblog 41 | [3] https://gist.github.com/Newmu/a56d5446416f5ad2bbac 42 | [4] https://gist.github.com/kastnerkyle/f3f67424adda343fef40 43 | -------------------------------------------------------------------------------- /examples/operate.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | import sys 4 | import os 5 | module_path = os.path.abspath( u'..' ) 6 | sys.path.append( module_path ) 7 | 8 | import json 9 | import cPickle 10 | import numpy as np 11 | import theano 12 | from vaegan.data import load_data, postprocess 13 | from vaegan import Operate, image_grid 14 | from vaegan import VAEGAN 15 | 16 | def load_json( infile_name ): 17 | fin = open( infile_name, u'r' ) 18 | conf = json.load( fin ) 19 | fin.close() 20 | return conf 21 | 22 | conf = load_json( sys.argv[1] ) 23 | 24 | param_path = conf[u'param_path'] 25 | data_dir = conf[u'test_data_dir'] 26 | output_dir = conf[u'output_dir'] 27 | box = conf[u'box'] # cropped region 28 | shape = (64, 64) 29 | 30 | n_samples = 5 31 | 32 | seed = 1234 33 | np.random.seed( seed ) 34 | 35 | # Build VAEGAN 36 | vaegan = VAEGAN( seed ) 37 | 38 | try: 39 | fin = open( param_path, u'rb' ) 40 | params = cPickle.load( fin ) 41 | params = [ p.astype( theano.config.floatX ) for p in params ] 42 | fin.close() 43 | except: 44 | print >>sys.stderr, u'Cannot open file:', param_path 45 | sys.exit(-1) 46 | 47 | vaegan.load_params( params ) 48 | 49 | data = load_data( data_dir, shape, box ) 50 | 51 | indices = np.arange( len(data) ) 52 | np.random.shuffle( indices ) 53 | sources = data[ indices[:n_samples] ] 54 | np.random.shuffle( indices ) 55 | data1 = data[ indices[:n_samples] ] 56 | np.random.shuffle( indices ) 57 | data2 = data[ indices[:n_samples] ] 58 | 59 | operate = Operate( vaegan, sources, output_dir ) 60 | operate.plus(data1) 61 | operate.minus(data2) 62 | operate.equal() 63 | 64 | source_arrs = np.asarray( map( postprocess, sources ) ) 65 | source_imgs = image_grid( source_arrs, (n_samples,1) ) 66 | source_imgs.save( u'%s/sources.jpeg' % output_dir ) 67 | 68 | data1_arrs = np.asarray( map( postprocess, data1 ) ) 69 | data1_imgs = image_grid( data1_arrs, (n_samples,1) ) 70 | data1_imgs.save( u'%s/data1.jpeg' % output_dir ) 71 | 72 | data2_arrs = np.asarray( map( postprocess, data2 ) ) 73 | data2_imgs = image_grid( data2_arrs, (n_samples,1) ) 74 | data2_imgs.save( u'%s/data2.jpeg' % output_dir ) 75 | 76 | 77 | -------------------------------------------------------------------------------- /vaegan/optimizers/adagrad.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | """ 3 | < AdaGrad > 4 | """ 5 | 6 | from collections import OrderedDict 7 | import numpy as np 8 | import theano 9 | import theano.tensor as tensor 10 | from opt_proc import opt_proc 11 | 12 | class AdaGrad( object ): 13 | """ 14 | AdaGrad. 15 | """ 16 | 17 | def __init__( self, model, eta=1e-2, epsilon=1e-3, minibatch_size=10 ): 18 | """ 19 | Initialize AdaGrad. 20 | 21 | Arguments 22 | --------- 23 | model : model instance should equip params, grad(), [and updates]. 24 | eta : float. 25 | Learning rate. 26 | epsilon : float. 27 | Constant for numerical stability. 28 | minibatch_size : integer. 29 | Minibatch size to calcurate stochastic gradient. 30 | """ 31 | self.model = model 32 | self.__eta = eta 33 | self.__eps = epsilon 34 | self.minibatch_size = minibatch_size 35 | 36 | self.__compile() 37 | 38 | def __compile( self ): 39 | self.update_funcs = [] 40 | for params, inputs, cost in self.model.get_opt_infos(): 41 | # Shared variables for acc. 42 | accs = [ theano.shared( 43 | np.zeros( p.get_value().shape, dtype=theano.config.floatX ) ) 44 | for p in params ] 45 | 46 | sgrad = tensor.grad( cost, params ) 47 | 48 | new_accs = [ acc + sg ** 2 for (acc, sg) in zip( accs, sgrad ) ] 49 | 50 | updates = OrderedDict() 51 | updates.update( zip( accs, new_accs ) ) 52 | updates.update( 53 | [ (p, p - ( self.__eta * sg / tensor.sqrt( acc_new + self.__eps ) ) ) 54 | for (p, sg, acc_new) 55 | in zip( params, sgrad, new_accs ) ] ) 56 | 57 | self.update_funcs.append( theano.function( inputs = inputs, 58 | updates = updates ) ) 59 | 60 | def run( self, X, T, params_dir=u'./tmp_params', callback=None, Xt=None ): 61 | """ 62 | Run algorigthm for T epochs on training data X. 63 | 64 | Arguments 65 | --------- 66 | X : numpy array. 67 | Data. 68 | T : integer or float.. 69 | params_dir : str. 70 | Path to directory in which params will be saved. 71 | callback : function. 72 | Xt : numpy array. 73 | Test data. 74 | """ 75 | opt_proc( self, X, T, params_dir, callback, Xt ) 76 | -------------------------------------------------------------------------------- /vaegan/functions.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | """ 4 | Activations, convolution, and poolings. 5 | """ 6 | import numpy as np 7 | import theano 8 | import theano.tensor as tensor 9 | from theano.tensor.signal.pool import pool_2d 10 | from theano.tensor.nnet import conv2d 11 | 12 | def rectify( x ): 13 | return (x + abs(x)) / 2.0 14 | 15 | def leaky_rectify( x, leak=0.2 ): 16 | f1 = 0.5 * (1 + leak) 17 | f2 = 0.5 * (1 - leak) 18 | return f1 * x + f2 * abs(x) 19 | 20 | def tanh( x ): 21 | return tensor.tanh(x) 22 | 23 | def hard_tanh(x): 24 | return tensor.clip(x, -1., 1 ) 25 | 26 | def sigmoid( x ): 27 | return tensor.nnet.sigmoid(x) 28 | 29 | def linear( x ): 30 | return x 31 | 32 | def full_conn( x, w, b=None ): 33 | return tensor.dot( x, w ) + b 34 | 35 | def conv( x, w, b=None ): 36 | s = int(np.floor(w.get_value().shape[-1]/2.)) 37 | z = conv2d(x, w, border_mode='full')[:, :, s:-s, s:-s] 38 | if b is not None: 39 | z += b.dimshuffle('x', 0, 'x', 'x') 40 | 41 | return z 42 | 43 | def max_pool( x, size, ignore_border=False ): 44 | return pool_2d( x, size, ignore_border=ignore_border ) 45 | 46 | def depool( x, factor=2 ): 47 | """ 48 | This code is from 49 | https://gist.github.com/kastnerkyle/f3f67424adda343fef40 50 | """ 51 | output_shape = [ 52 | x.shape[1], 53 | x.shape[2]*factor, 54 | x.shape[3]*factor 55 | ] 56 | stride = x.shape[2] 57 | offset = x.shape[3] 58 | in_dim = stride * offset 59 | out_dim = in_dim * factor * factor 60 | 61 | upsamp_matrix = tensor.zeros((in_dim, out_dim)) 62 | rows = tensor.arange(in_dim) 63 | cols = rows*factor + (rows/stride * factor * offset) 64 | upsamp_matrix = tensor.set_subtensor(upsamp_matrix[rows, cols], 1.) 65 | 66 | flat = tensor.reshape(x, (x.shape[0], output_shape[0], x.shape[2] * x.shape[3])) 67 | 68 | up_flat = tensor.dot(flat, upsamp_matrix) 69 | upsamp = tensor.reshape(up_flat, (x.shape[0], output_shape[0], 70 | output_shape[1], output_shape[2])) 71 | 72 | return upsamp 73 | 74 | def batchnorm( x, g=None, b=None, eps=1e-8 ): 75 | if x.ndim == 4: 76 | m = tensor.mean(x, axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x') 77 | v = tensor.mean(tensor.sqr(x - m), 78 | axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x') 79 | 80 | x = (x - m) / tensor.sqrt( v + eps ) 81 | if g is not None and b is not None: 82 | x = x*g.dimshuffle('x', 0, 'x', 'x') + b.dimshuffle('x', 0, 'x', 'x') 83 | 84 | elif x.ndim == 2: 85 | m = tensor.mean(x, axis=0) 86 | v = tensor.mean(tensor.sqr(x - m), axis=0) 87 | 88 | x = (x - m) / tensor.sqrt( v + eps ) 89 | if g is not None and b is not None: 90 | x = x*g + b 91 | 92 | return x 93 | -------------------------------------------------------------------------------- /vaegan/optimizers/rmsprop.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | """ 3 | < RMSPROP > 4 | """ 5 | 6 | from collections import OrderedDict 7 | import numpy as np 8 | import theano 9 | import theano.tensor as tensor 10 | from opt_proc import opt_proc 11 | 12 | class RMSProp( object ): 13 | """ 14 | RMSProp 15 | """ 16 | 17 | def __init__( self, model, eta=1e-2, rho=0.9, epsilon=1e-6, minibatch_size=10 ): 18 | """ 19 | Initialize RMSPROP. 20 | 21 | Arguments 22 | --------- 23 | model : model instance should equip params, grad(), [and updates]. 24 | eta : float. 25 | Learning rate. 26 | rho : float. 27 | epsilon : float. 28 | Constant for numerical stability. 29 | minibatch_size : integer. 30 | Minibatch size to calcurate stochastic gradient. 31 | """ 32 | self.model = model 33 | self.__eta = eta 34 | self.__rho = rho 35 | self.__eps = epsilon 36 | self.minibatch_size = minibatch_size 37 | 38 | self.__compile() 39 | 40 | def __compile( self ): 41 | self.update_funcs = [] 42 | for params, inputs, cost in self.model.get_opt_infos(): 43 | # Shared variables for acc. 44 | accs = [ theano.shared( 45 | np.zeros( p.get_value().shape, dtype=theano.config.floatX ) ) 46 | for p in params ] 47 | 48 | sgrad = tensor.grad( cost, params ) 49 | 50 | new_accs = [ self.__rho * acc + (1 - self.__rho) * sg ** 2 51 | for (acc, sg) in zip( accs, sgrad ) ] 52 | 53 | updates = OrderedDict() 54 | updates.update( zip( accs, new_accs ) ) 55 | updates.update( 56 | [ (p, p - ( self.__eta * sg / tensor.sqrt( acc_new + self.__eps ) ) ) 57 | for (p, sg, acc_new) 58 | in zip( params, sgrad, new_accs ) ] ) 59 | 60 | self.update_funcs.append( theano.function( inputs = inputs, 61 | updates = updates ) ) 62 | 63 | def run( self, X, T, params_dir=u'./tmp_params', callback=None, Xt=None ): 64 | """ 65 | Run algorigthm for T epochs on training data X. 66 | 67 | Arguments 68 | --------- 69 | X : numpy array. 70 | Data. 71 | T : integer or float.. 72 | params_dir : str. 73 | Path to directory in which params will be saved. 74 | callback : function. 75 | Xt : numpy array. 76 | Test data. 77 | """ 78 | opt_proc( self, X, T, params_dir, callback, Xt ) 79 | -------------------------------------------------------------------------------- /vaegan/optimizers/adam.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | """ 4 | < ADAM > 5 | """ 6 | 7 | from collections import OrderedDict 8 | import numpy as np 9 | import theano 10 | import theano.tensor as tensor 11 | from opt_proc import opt_proc 12 | 13 | class Adam( object ): 14 | """ 15 | ADAM 16 | """ 17 | 18 | def __init__( self, model, eta=1e-3, beta1=0.9, beta2=0.999, 19 | epsilon=1e-8, minibatch_size=10 ): 20 | 21 | """ 22 | Initialize ADAM. 23 | 24 | Arguments 25 | --------- 26 | model : model instance should equip params, grad(), [and updates]. 27 | eta : float. 28 | Learning rate. 29 | beta1, beta2 : float. 30 | epsilon : float. 31 | Constant for numerical stability. 32 | minibatch_size : integer. 33 | Minibatch size to calcurate stochastic gradient. 34 | """ 35 | self.model = model 36 | self.__eta = eta 37 | self.__beta1 = beta1 38 | self.__beta2 = beta2 39 | self.__eps = epsilon 40 | self.minibatch_size = minibatch_size 41 | 42 | self.__compile() 43 | 44 | def __compile( self ): 45 | t = theano.shared( np.asarray( 0.,dtype=theano.config.floatX ) ) 46 | new_t = t + 1 47 | 48 | a_t = self.__eta * tensor.sqrt( 1 - self.__beta2 ** new_t ) \ 49 | / ( 1 - self.__beta1 ** new_t ) 50 | 51 | # Shared variables for ms and ves. 52 | self.update_funcs = [] 53 | for params, inputs, cost in self.model.get_opt_infos(): 54 | ms = [ theano.shared( 55 | np.zeros( p.get_value().shape, dtype=theano.config.floatX ) ) 56 | for p in params ] 57 | ves = [ theano.shared( 58 | np.zeros( p.get_value().shape, dtype=theano.config.floatX ) ) 59 | for p in params ] 60 | 61 | sgrad = tensor.grad( cost, params ) 62 | 63 | new_ms = [ self.__beta1 * m + ( 1 - self.__beta1 ) * sg 64 | for (m, sg) in zip( ms, sgrad ) ] 65 | new_ves = [ self.__beta2 * ve + ( 1 - self.__beta2 ) * (sg ** 2) 66 | for (ve, sg) in zip( ves, sgrad ) ] 67 | 68 | steps = [ a_t * new_m / ( tensor.sqrt(new_ve) + self.__eps ) 69 | for (new_m, new_ve) in zip( new_ms, new_ves ) ] 70 | 71 | updates = OrderedDict() 72 | updates.update( zip( ms, new_ms ) ) 73 | updates.update( zip( ves, new_ves ) ) 74 | updates.update( [ (p, p - step ) for (p, step) 75 | in zip( params, steps ) ] ) 76 | 77 | self.update_funcs.append( theano.function( inputs = inputs, 78 | updates = updates ) ) 79 | 80 | self.updates = theano.function( [], updates=[ (t,new_t) ] ) 81 | 82 | def run( self, X, T, params_dir=u'./tmp_params', callback=None, Xt=None ): 83 | """ 84 | Run algorigthm for T epochs on training data X. 85 | 86 | Arguments 87 | --------- 88 | X : numpy array. 89 | Data. 90 | T : integer or float.. 91 | params_dir : str. 92 | Path to directory in which params will be saved. 93 | callback : function. 94 | Xt : numpy array. 95 | Test data. 96 | """ 97 | opt_proc( self, X, T, params_dir, callback, Xt ) 98 | -------------------------------------------------------------------------------- /vaegan/optimizers/opt_proc.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | """ 4 | < Optimizing Procedure > 5 | """ 6 | 7 | import time 8 | import numpy as np 9 | import theano 10 | import theano.tensor as tensor 11 | from ..utils import minibatches 12 | from ..utils import Progress 13 | import cPickle 14 | import os 15 | import sys 16 | 17 | NUM = 0 18 | FLUSH_BUF = True 19 | SAVING_INTERVAL = 5 20 | MONITORING_INTERVAL = 5 21 | 22 | def set_num( n ): 23 | global NUM 24 | NUM = n 25 | 26 | def save_params( params, params_dir, param_name=None ): 27 | global NUM 28 | if param_name == None: 29 | fout = open( u'%s/param_%d.model' % (params_dir,NUM), u'wb' ) 30 | else: 31 | fout = open( u'%s/%s.model' % (params_dir,param_name), u'wb' ) 32 | cPickle.dump( [ p.get_value() for p in params ], fout ) 33 | fout.close() 34 | NUM += 1 35 | 36 | def opt_proc( optimizer, X, T, params_dir, callback=None, Xt=None ): 37 | """ 38 | Run algorigthm for T epochs on training data X. 39 | 40 | Arguments 41 | --------- 42 | optimizer : instance of optimizer class. 43 | X : numpy array. 44 | Data. 45 | T : integer or float.. 46 | callback : function. 47 | Xt : numpy array. 48 | Test data. 49 | """ 50 | 51 | if not os.path.isdir( params_dir ): 52 | os.makedirs( params_dir ) 53 | 54 | acc_time = 0. 55 | acc_itr = 0 56 | prog = Progress( len( list( 57 | minibatches( optimizer.minibatch_size, X ) ) ), 50 ) 58 | for epoch in range(T): 59 | itr = 0 60 | stime = time.time() 61 | for Xb in minibatches( optimizer.minibatch_size, X, shuffle_f=True ): 62 | if FLUSH_BUF: 63 | prog.prog() 64 | eb = np.asarray( np.random.randn( Xb.shape[0], optimizer.model.n_hidden ), 65 | dtype = theano.config.floatX ) 66 | zb = np.asarray( np.random.randn( Xb.shape[0], optimizer.model.n_hidden ), 67 | dtype = theano.config.floatX ) 68 | 69 | if optimizer.model.phase == 0: 70 | optimizer.update_funcs[0]( Xb, eb ) 71 | optimizer.update_funcs[1]( Xb, zb ) 72 | else: 73 | gen_update = True 74 | gan_update = True 75 | real_cost = optimizer.model.real_cost_func( Xb ) 76 | fake_cost = optimizer.model.fake_cost_func( Xb, eb, zb ) 77 | equilibrium = 0.68 78 | margin = 0.4 79 | 80 | if real_cost < equilibrium - margin or \ 81 | fake_cost < equilibrium - margin: 82 | gan_update = False 83 | if real_cost > equilibrium + margin or \ 84 | fake_cost > equilibrium + margin: 85 | gen_update = False 86 | if not (gen_update or gan_update): 87 | gen_update = True 88 | gan_update = True 89 | 90 | if gen_update: 91 | optimizer.update_funcs[0]( Xb, eb, zb ) 92 | if gan_update: 93 | optimizer.update_funcs[1]( Xb, eb, zb ) 94 | 95 | if hasattr( optimizer, 'updates' ): 96 | optimizer.updates() 97 | 98 | itr += 1 99 | 100 | prog.end() 101 | etime = time.time() 102 | acc_time += etime - stime 103 | acc_itr += itr 104 | print u'Epoch: %d, Iterations: %d, Time: %f' % \ 105 | ( epoch+1, acc_itr, acc_time ) 106 | 107 | if (epoch+1) % MONITORING_INTERVAL == 0: 108 | print u'train:', optimizer.model.cost( X ) 109 | if Xt != None: 110 | print u'test:', optimizer.model.cost( Xt ) 111 | 112 | if (epoch+1) % SAVING_INTERVAL == 0 or epoch == T-1: 113 | if epoch == T-1: 114 | param_name = u'param' 115 | else: 116 | param_name = None 117 | save_params( optimizer.model.gen_params + optimizer.model.gan_params, 118 | params_dir, param_name ) 119 | callback() 120 | 121 | -------------------------------------------------------------------------------- /vaegan/transform.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | 3 | import os 4 | import numpy as np 5 | import theano 6 | from data import preprocess, postprocess 7 | from PIL import Image 8 | 9 | def image_grid( arrs, grid_shape ): 10 | row_arrs = [] 11 | for i in range( grid_shape[0] ): 12 | img_l = range( i*grid_shape[1], i*grid_shape[1]+grid_shape[1] ) 13 | row_arrs.append( np.hstack( [ arrs[t] for t in img_l ] ) ) 14 | 15 | arr = np.vstack( row_arrs ) 16 | img = Image.fromarray( np.uint8(arr) ) 17 | 18 | return img 19 | 20 | class Morph( object ): 21 | def __init__( self, model, form_type, sources, sinks=None, output_dir=u'./tmp', 22 | shape=(10,10), n_steps=10 ): 23 | self.model = model 24 | self.form_type = form_type 25 | self.sources = sources 26 | self.shape = shape 27 | self.n_steps = n_steps 28 | self.output_dir = output_dir 29 | self.g_id = 0 30 | 31 | if sinks == None: 32 | self.sink_z = np.asarray( np.random.randn( len(self.sources), 33 | model.n_hidden ), 34 | dtype=theano.config.floatX ) 35 | self.sinks = None 36 | else: 37 | self.sinks = sinks 38 | 39 | if not os.path.isdir( self.output_dir ): 40 | os.makedirs( self.output_dir ) 41 | 42 | def __call__( self ): 43 | sample_z = self.model.encode( self.sources ) 44 | if self.sinks != None: 45 | sink_z = self.model.encode( self.sinks ) 46 | else: 47 | sink_z = self.sink_z 48 | 49 | self.paths = [ (1.-r)*sample_z + r*sink_z 50 | for r in np.linspace(0.,1.,self.n_steps ) ] 51 | 52 | if self.form_type == 0: 53 | for i,p in enumerate(self.paths): 54 | decoded = self.model.decode(p) 55 | decoded = np.asarray( map( postprocess, decoded ) ) 56 | grid_img = image_grid( decoded, self.shape ) 57 | grid_img.save( u'%s/morphing_%d_%d.jpeg' 58 | % (self.output_dir, self.g_id, i) ) 59 | elif self.form_type == 1: 60 | cor_arrs = [ np.vstack( map( postprocess, self.sources ) ) ] 61 | for i,p in enumerate(self.paths): 62 | decoded = self.model.decode(p) 63 | decoded = np.asarray( map( postprocess, decoded ) ) 64 | cor_arrs.append( np.vstack( decoded ) ) 65 | 66 | if self.sinks != None: 67 | cor_arrs.append( np.vstack( map( postprocess, self.sinks ) ) ) 68 | 69 | arr = np.hstack( cor_arrs ) 70 | path_img = Image.fromarray( np.uint8(arr) ) 71 | path_img.save( u'%s/morphing_%d.jpeg' % (self.output_dir, self.g_id) ) 72 | else: 73 | raise Exception( u'form_type:%s is not supported.' % self.form_type ) 74 | 75 | self.g_id += 1 76 | 77 | class Reconstruct( object ): 78 | def __init__( self, model, samples, output_dir=u'./tmp', shape=(10,10) ): 79 | self.model = model 80 | self.samples = samples 81 | self.shape = shape 82 | self.output_dir = output_dir 83 | self.g_id = 0 84 | 85 | if not os.path.isdir( self.output_dir ): 86 | os.makedirs( self.output_dir ) 87 | 88 | def __call__( self ): 89 | reconstructed = self.model.reconstruct( self.samples ) 90 | reconstructed = np.asarray( map( postprocess, reconstructed ) ) 91 | grid_img = image_grid( reconstructed, self.shape ) 92 | grid_img.save( u'%s/reconstruct_%d.jpeg' 93 | % (self.output_dir, self.g_id) ) 94 | self.g_id += 1 95 | 96 | class Operate( object ): 97 | def __init__( self, model, sources, output_dir=u'./tmp' ): 98 | self.model = model 99 | self.zs = self.model.encode( sources ) 100 | self.output_dir = output_dir 101 | self.g_id = 0 102 | 103 | if not os.path.isdir( self.output_dir ): 104 | os.makedirs( self.output_dir ) 105 | 106 | def plus( self, arrs ): 107 | self.zs += self.model.encode( arrs ) 108 | 109 | def minus( self, arrs ): 110 | self.zs -= self.model.encode( arrs ) 111 | 112 | def equal( self ): 113 | decoded = self.model.decode( self.zs ) 114 | results = np.asarray( map( postprocess, decoded ) ) 115 | 116 | grid_img = image_grid( results, (len(results),1) ) 117 | grid_img.save( u'%s/operate_%d.jpeg' 118 | % (self.output_dir, self.g_id) ) 119 | 120 | self.g_id += 1 121 | 122 | -------------------------------------------------------------------------------- /vaegan/vaegan.py: -------------------------------------------------------------------------------- 1 | # coding : utf-8 2 | """ 3 | < Variational auto-encoder > 4 | """ 5 | 6 | import numpy as np 7 | import theano 8 | import theano.tensor as tensor 9 | from theano.tensor.shared_randomstreams import RandomStreams 10 | from functions import * 11 | import utils 12 | 13 | def uniform_param( shape, name, scale=0.1 ): 14 | return theano.shared( 15 | np.random.uniform( size=shape, low=-scale, high=scale ).astype( 16 | dtype=theano.config.floatX ), name=name ) 17 | 18 | def zeros_param( shape, name ): 19 | return theano.shared( np.zeros( shape, dtype=theano.config.floatX ), name=name ) 20 | 21 | class VAEGAN( object ): 22 | def __init__( self, seed=1234 ): 23 | self.phase = 0 24 | self.alpha = 0.5 25 | 26 | # Random 27 | np.random.seed( seed ) 28 | self.numpy_rng = np.random.RandomState( seed ) 29 | self.theano_rng = RandomStreams( self.numpy_rng.randint( 2**30 ) ) 30 | 31 | self.x = tensor.tensor4() 32 | self.e = tensor.matrix() 33 | self.z_in = tensor.matrix() 34 | self.n_hidden = 512 35 | self.image_size = 64 36 | 37 | # Build network 38 | ## VAE 39 | mu, log_sigma, self.z = self._build_encoding_layer( self.x, self.e ) 40 | self.y = self._build_decoding_layer( self.z ) 41 | 42 | self.gen_params = self.encoding_params + self.decoding_params 43 | 44 | ## GAN 45 | p_org, v_org = self._build_gan_layer( self.x ) 46 | p_org2, v_gen = self._build_gan_layer( self.y ) 47 | p_gen = 1 - p_org2 48 | 49 | self.y_out = self._build_decoding_layer( self.z_in ) 50 | p_org3, v_gen2 = self._build_gan_layer( self.y_out ) 51 | p_gen2 = 1-p_org3 52 | 53 | # Build cost 54 | minibatch_size = tensor.cast( self.x.shape[0], theano.config.floatX ) 55 | 56 | ## Prior (maximize) 57 | self.prior_cost = 0.5 * tensor.sum( 1 + 2*log_sigma - mu**2 58 | - tensor.exp(2*log_sigma), 59 | axis=1 ).mean() 60 | 61 | ## Early phase (train VAE using pixel-wise error + discriminator separately) 62 | ## Reconstruct (minimize w.r.t. generative params ) 63 | self.reconstruct_cost_vis = 0.5 * tensor.sum( tensor.sqr(self.x - self.y), 64 | axis=(1,2,3) ).mean() 65 | 66 | ## GAN (maximize w.r.t discriminator params ) 67 | self.gan_logprob = tensor.mean( tensor.log( p_org ) + tensor.log( p_gen2 ) ) 68 | 69 | ## Overall cost w.r.t generator in early phase 70 | ## inputs: [ self.x, self.e ] 71 | self.early_cost_gen = self.reconstruct_cost_vis - self.prior_cost \ 72 | 73 | ## Overall cost w.r.t discriminator in early phase 74 | ## inputs: [ self.z_in ] 75 | self.early_cost_dis = - self.gan_logprob 76 | 77 | ## Monitoring cost in early phase 78 | self.early_monitoring_cost = [ self.reconstruct_cost_vis - self.prior_cost, 79 | self.gan_logprob ] 80 | 81 | self.early_opt_infos = [ 82 | [ self.gen_params, [self.x, self.e], self.early_cost_gen ], 83 | [ self.gan_params, [self.x, self.z_in], self.early_cost_dis ] ] 84 | 85 | ## Final phase 86 | ## Reconstruct (minimize w.r.t. generative params ) 87 | self.reconstruct_cost_hid = 0.5 * tensor.sum( tensor.sqr(v_org - v_gen), 88 | axis=(1,2,3) ).mean() 89 | 90 | ## GAN (maximize w.r.t discriminator params ) 91 | self.real_cost = - tensor.mean( tensor.log( p_org ) ) 92 | self.fake_cost = - 0.5 * ( tensor.mean( tensor.log( p_gen ) ) \ 93 | + tensor.mean( tensor.log( p_gen2 ) ) ) 94 | self.gan_logprob_plus = - self.real_cost - self.fake_cost 95 | 96 | ## GAN (minimize w.r.t generative params ) 97 | self.gan_logprob_gen = 0.5 * tensor.mean( tensor.log( p_gen ) 98 | + tensor.log( p_gen2 ) ) 99 | # To balance the progresses, we uses the following proxy objective. 100 | # self.gan_logprob_gen = - tensor.mean( tensor.log( 1 - p_gen ) 101 | # - tensor.log( 1 - p_gen2 ) ) 102 | 103 | ## Oveall cost w.r.t generator in final phase 104 | ## inputs: [ self.x, self.e. self.z_in ] 105 | self.final_cost_gen = self.alpha * ( self.reconstruct_cost_hid \ 106 | - self.prior_cost ) \ 107 | + self.gan_logprob_gen 108 | 109 | ## Oveall cost w.r.t discriminator in final phase 110 | ## inputs: [ self.x, self.e, self.z_in ] 111 | self.final_cost_dis = - self.gan_logprob_plus 112 | 113 | ## monitoring cost in final phase 114 | self.final_monitoring_cost = self.alpha * ( self.reconstruct_cost_hid \ 115 | - self.prior_cost ) \ 116 | + self.gan_logprob_plus 117 | 118 | self.final_opt_infos = [ 119 | [ self.gen_params, [self.x, self.e, self.z_in], self.final_cost_gen ], 120 | [ self.gan_params, [self.x, self.e, self.z_in], self.final_cost_dis ] ] 121 | 122 | # Compile functions 123 | self.reconstruct_func = theano.function( [self.x,self.e], self.y ) 124 | self.encode_func = theano.function( [self.x,self.e], self.z ) 125 | self.decode_func = theano.function( [self.z_in], self.y_out ) 126 | self.early_cost_func = theano.function( [self.x,self.e,self.z_in], 127 | self.early_monitoring_cost ) 128 | self.final_cost_func = theano.function( [self.x,self.e,self.z_in], 129 | self.final_monitoring_cost ) 130 | self.real_cost_func = theano.function( [self.x],self.real_cost ) 131 | self.fake_cost_func = theano.function( [self.x,self.e,self.z_in], 132 | self.fake_cost ) 133 | 134 | def load_params( self, params ): 135 | for (p_,p) in zip( params, self.gen_params + self.gan_params ): 136 | p.set_value(p_) 137 | 138 | def set_phase( self, phase ): 139 | self.phase = phase 140 | 141 | def get_opt_infos( self ): 142 | if self.phase == 0: 143 | return self.early_opt_infos 144 | else: 145 | return self.final_opt_infos 146 | 147 | def _build_encoding_layer( self, x, e ): 148 | down_size = self.image_size // 8 149 | 150 | if not hasattr( self, 'encoding_params' ): 151 | we1 = uniform_param( (64, 3, 5, 5), u'we1' ) 152 | bwe1 = uniform_param( (64), u'bwe1' ) 153 | bbe1 = zeros_param( (64), u'bbe1' ) 154 | we2 = uniform_param( (128, 64, 5, 5), u'we2' ) 155 | bwe2 = uniform_param( (128), u'bwe2' ) 156 | bbe2 = zeros_param( (128), u'bbe2' ) 157 | we3 = uniform_param( (256, 128, 5, 5), u'we3' ) 158 | bwe3 = uniform_param( (256), u'bwe3' ) 159 | bbe3 = zeros_param( (256), u'bbe3' ) 160 | we4 = uniform_param( (256*(down_size**2), 2048), u'we4' ) 161 | be4 = zeros_param( (2048), u'be4' ) 162 | bwe4 = uniform_param( (2048), u'bwe4' ) 163 | bbe4 = zeros_param( (2048), u'bbe4' ) 164 | wmu = uniform_param( (2048, self.n_hidden), u'wmu' ) 165 | bmu = zeros_param( (self.n_hidden), u'bmu' ) 166 | wsigma = uniform_param( (2048, self.n_hidden), u'wsigma' ) 167 | bsigma = zeros_param( (self.n_hidden), u'bsigma' ) 168 | 169 | self.encoding_params = [ we1, bwe1, bbe1, we2, bwe2, bbe2, 170 | we3, bwe3, bbe3, 171 | we4, be4, bwe4, bbe4, 172 | wmu, bmu, wsigma, bsigma ] 173 | 174 | [ we1, bwe1, bbe1, we2, bwe2, bbe2, we3, bwe3, bbe3, 175 | we4, be4, bwe4, bbe4, 176 | wmu, bmu, wsigma, bsigma ] = self.encoding_params 177 | 178 | h1 = rectify( batchnorm( max_pool( conv( x, we1 ), (2,2) ), bwe1, bbe1 ) ) 179 | h2 = rectify( batchnorm( max_pool( conv( h1, we2 ), (2,2) ), bwe2, bbe2 ) ) 180 | h3 = rectify( batchnorm( max_pool( conv( h2, we3 ), (2,2) ), bwe3, bbe3 ) ) 181 | h3_ = h3.reshape( (-1, 256*(down_size**2)) ) 182 | h4 = rectify( batchnorm( full_conn( h3_, we4, be4 ), bwe4, bbe4 ) ) 183 | 184 | mu = theano.dot( h4, wmu ) + bmu 185 | log_sigma = 0.5 * ( tensor.dot( h4, wsigma) + bsigma ) 186 | # The number of e should be the same as x. 187 | z = mu + tensor.exp( log_sigma ) * e 188 | 189 | return mu, log_sigma, z 190 | 191 | def _build_decoding_layer( self, z ): 192 | down_size = self.image_size // 8 193 | 194 | if not hasattr( self, 'decoding_params' ): 195 | wd4 = uniform_param( (self.n_hidden, 256*(down_size**2) ), u'wd4' ) 196 | bd4 = zeros_param( (256*(down_size**2)), u'bd4' ) 197 | bwd4 = uniform_param( (256*(down_size**2)), u'bwd4' ) 198 | bbd4 = zeros_param( (256*(down_size**2)), u'bbd4' ) 199 | wd3 = uniform_param( (256,256,5,5), u'wd3' ) 200 | bwd3 = uniform_param( (256), u'bwd3' ) 201 | bbd3 = zeros_param( (256), u'bbd3' ) 202 | wd2 = uniform_param( (128,256,5,5), u'wd2' ) 203 | bwd2 = uniform_param( (128), u'bwd2' ) 204 | bbd2 = zeros_param( (128), u'bbd2' ) 205 | wd1 = uniform_param( (32,128,5,5), u'wd1' ) 206 | bwd1 = uniform_param( (32), u'bwd1' ) 207 | bbd1 = zeros_param( (32), u'bbd1' ) 208 | wd0 = uniform_param( (3,32,5,5), u'wd0' ) 209 | 210 | self.decoding_params = [ wd4, bd4, bwd4, bbd4, 211 | wd3, bwd3, bbd3, 212 | wd2, bwd2, bbd2, wd1, bwd1, bbd1, wd0 ] 213 | 214 | [ wd4, bd4, bwd4, bbd4, wd3, bwd3, bbd3, 215 | wd2, bwd2, bbd2, wd1, bwd1, bbd1, wd0 ] \ 216 | = self.decoding_params 217 | 218 | h1 = rectify( batchnorm( full_conn( z, wd4, bd4 ), bwd4, bbd4 ) ) 219 | h1_ = h1.reshape( (-1,256, down_size, down_size) ) 220 | h2 = rectify( batchnorm( conv( depool( h1_, factor=2 ), wd3 ), bwd3, bbd3 ) ) 221 | h3 = rectify( batchnorm( conv( depool( h2 , factor=2 ), wd2 ), bwd2, bbd2 ) ) 222 | h4 = rectify( batchnorm( conv( depool( h3, factor=2 ), wd1 ), bwd1, bbd1 ) ) 223 | y = tanh( conv( h4, wd0 ) ) 224 | 225 | return y 226 | 227 | def _build_gan_layer( self, x ): 228 | down_size = self.image_size // 8 229 | 230 | if not hasattr( self, 'gan_params' ): 231 | wg1 = uniform_param( (32, 3, 5, 5), u'wg1' ) 232 | wg2 = uniform_param( (128, 32, 5, 5), u'wg2' ) 233 | wg3 = uniform_param( (256, 128, 5, 5), u'wg3' ) 234 | wg4 = uniform_param( (256, 256, 5, 5), u'wg4' ) 235 | wg5 = uniform_param( (256*(down_size**2), 512), u'wg5' ) 236 | bg5 = zeros_param( (512), u'bg5' ) 237 | wg6 = uniform_param( (512, 1), u'wg6' ) 238 | bg6 = zeros_param( (1), u'bgd6' ) 239 | self.gan_params = [ wg1, wg2, wg3, wg4, wg5, bg5, wg6, bg6 ] 240 | 241 | [ wg1, wg2, wg3, wg4, wg5, bg5, wg6, bg6 ] = self.gan_params 242 | 243 | h1 = rectify( conv( x, wg1 ) ) 244 | h2 = rectify( max_pool( conv( h1, wg2 ), (2,2) ) ) 245 | h3 = rectify( max_pool( conv( h2, wg3 ), (2,2) ) ) 246 | h4 = rectify( max_pool( conv( h3, wg4 ), (2,2) ) ) 247 | h4_ = h4.reshape( (-1,256*(down_size**2)) ) 248 | h5 = rectify( full_conn( h4_, wg5, bg5 ) ) 249 | 250 | image_feature = h3 251 | 252 | return sigmoid( full_conn( h5, wg6, bg6 ) ), image_feature 253 | 254 | def reconstruct( self, x ): 255 | eb = np.asarray( np.ones( (x.shape[0], self.n_hidden) ), 256 | dtype = theano.config.floatX ) 257 | return self.reconstruct_func(x,eb) 258 | 259 | def encode( self, x ): 260 | eb = np.asarray( np.ones( (x.shape[0], self.n_hidden) ), 261 | dtype = theano.config.floatX ) 262 | 263 | return self.encode_func(x,eb) 264 | 265 | def decode( self, z ): 266 | return self.decode_func(z) 267 | 268 | def cost( self, X, minibatch_size = 20 ): 269 | if self.phase == 0: 270 | val = [0,0] 271 | else: 272 | val = 0 273 | 274 | data_size = X.shape[0] 275 | for Xb in utils.minibatches( minibatch_size, X, shuffle_f=False ): 276 | eb = np.asarray( np.random.randn( Xb.shape[0], self.n_hidden ), 277 | dtype = theano.config.floatX ) 278 | zb = np.asarray( np.random.randn( Xb.shape[0], self.n_hidden ), 279 | dtype = theano.config.floatX ) 280 | if self.phase == 0: 281 | c = self.early_cost_func( Xb, eb, zb ) 282 | val[0] += c[0] * float(Xb.shape[0]) \ 283 | / float(data_size) 284 | val[1] += c[1] * float(Xb.shape[1]) \ 285 | / float(data_size) 286 | else: 287 | val += self.final_cost_func( Xb, eb, zb ) * float(Xb.shape[0]) \ 288 | / float(data_size) 289 | 290 | return val 291 | 292 | --------------------------------------------------------------------------------