├── model ├── __init__.py ├── configuration.py ├── cca_layer.py ├── dnn.py └── layers.py ├── LICENSE ├── README.md ├── data_provider.py └── deep_cca.py /model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 M. Sam Ribeiro 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deep-cca 2 | 3 | ### Overview 4 | **Deep Canonical Correlation Analysis** (DCCA) implementation using Theano 5 | 6 | DCCA is a nonlinear extension of Canonical Correaltion Analysis (CCA). Given two views of the same data, DCCA learns transformations that are maximally correlated (Galen et al. 2013). This implementation adopts a stochastic optimization approach (Wang et al. 2015) via SGD. The data used here is the MNIST dataset. Each image is divided into its left and right halves, and we let that be the two views of the same data (check Galen et al, 2013) for details. 7 | 8 | Many thanks to [Herman Kamper](https://github.com/kamperh) for various resources, comments, and discussions. 9 | 10 | ### References: 11 | - Andrew, Galen, et al. "Deep Canonical Correlation Analysis." ICML (3). 2013. 12 | - Wang, Weiran, et al. "Unsupervised learning of acoustic features via deep canonical correlation analysis." 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2015. 13 | 14 | 15 | ### Running 16 | 17 | ```sh 18 | $ python ./deep_cca.py 19 | ``` 20 | If you wish to run on a GPU, you might want to try something like 21 | ```sh 22 | $ THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32,force_device=True python ./deep_cca.py 23 | ``` -------------------------------------------------------------------------------- /model/configuration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | Deep CCA configuration 6 | 7 | Author: M. Sam Ribeiro 8 | Date: 2017 9 | 10 | """ 11 | 12 | 13 | class Config(object): 14 | 15 | def __init__(self): 16 | 17 | self.learning_rate = 0.01 18 | self.epochs = 100 19 | self.batch_size = 10000 20 | 21 | # L1 and L2 regularization -- not implemented yet 22 | #self.L1_reg = 0.00 23 | #self.L2_reg = 0.0001 24 | 25 | self.cca_dim = 50 # number of CCA dimensions 26 | self.cca_reg1 = 1e-4 # CCA regularization for view 1 27 | self.cca_reg2 = 1e-4 # CCA regularization for view 2 28 | 29 | # architectures for view1 and view2 30 | # each list item indicates a hidden layer, each int the number of nodes 31 | # [1000, 1000, 1000] is 3 hidden layers with 1000 nodes each 32 | self.architecture1 = [1024, 512, 256, 128] 33 | self.architecture2 = [1024, 512, 256, 128] 34 | 35 | 36 | def set_data_config(self, data): 37 | train_set_x1, train_set_x2 = data[0] 38 | valid_set_x1, valid_set_x2 = data[1] 39 | test_set_x1, test_set_x2 = data[2] 40 | 41 | self.n_train_batches = train_set_x1.get_value(borrow=True).shape[0] // self.batch_size 42 | self.n_valid_batches = valid_set_x1.get_value(borrow=True).shape[0] // self.batch_size 43 | self.n_test_batches = test_set_x1.get_value(borrow=True).shape[0] // self.batch_size 44 | 45 | self.x1_dim = train_set_x1.get_value(borrow=True).shape[1] 46 | self.x2_dim = train_set_x2.get_value(borrow=True).shape[1] 47 | -------------------------------------------------------------------------------- /model/cca_layer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | Theano-based Deep CCA implementation 6 | 7 | Mostly based on the following papers: 8 | (1) Andrew, Galen, et al. 9 | "Deep Canonical Correlation Analysis." 10 | ICML (3). 2013. 11 | 12 | (2) Wang, Weiran, et al. 13 | "Unsupervised learning of acoustic features via deep canonical correlation analysis." 14 | 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2015. 15 | 16 | Many thanks to Herman Kamper (https://github.com/kamperh) for various resources, comments, and discussions. 17 | 18 | Author: M. Sam Ribeiro 19 | Date: 2017 20 | 21 | """ 22 | 23 | 24 | import theano 25 | import theano.tensor as T 26 | 27 | 28 | class CCA(object): 29 | ''' Linear Canonical Correlation Analysis (CCA) ''' 30 | 31 | def __init__(self, config): 32 | 33 | # input dimension to CCA layer 34 | self.in_dim1 = config.architecture1[-1] 35 | self.in_dim2 = config.architecture2[-1] 36 | 37 | # CCA dimension, i.e., number of canonical components 38 | self.cca_dim = config.cca_dim 39 | 40 | # CCA regularization 41 | self.reg1 = config.cca_reg1 42 | self.reg2 = config.cca_reg1 43 | 44 | # for numerical statibility (from H. Kamper via W. Wang) 45 | self.eps = 1e-12 46 | 47 | 48 | def cca(self, data1, data2): 49 | 50 | n_data = data1.shape[0] 51 | 52 | # center the data 53 | data1 -= T.mean(data1, axis=0) 54 | data2 -= T.mean(data2, axis=0) 55 | data1 = data1.T 56 | data2 = data2.T 57 | 58 | # find covariance matrices 59 | sigma11 = (1/(n_data-1.)) * T.dot(data1, data1.T) 60 | sigma22 = (1/(n_data-1.)) * T.dot(data2, data2.T) 61 | sigma12 = (1/(n_data-1.)) * T.dot(data1, data2.T) 62 | 63 | # add regulatization 64 | sigma11 += self.reg1 * T.eye(self.in_dim1) 65 | sigma22 += self.reg2 * T.eye(self.in_dim2) 66 | 67 | # diagonalize covariance matrices to find inverses 68 | diag1, q1 = T.nlinalg.eigh(sigma11) 69 | diag2, q2 = T.nlinalg.eigh(sigma22) 70 | 71 | # numerical stability (from H. Kamper, via W. Wang) 72 | # http://stackoverflow.com/questions/20590909/returning-the-index-of-a-value-in-theano-vector 73 | idx = T.gt(diag1, self.eps).nonzero()[0] 74 | diag1 = diag1[idx] 75 | q1 = q1[:, idx] 76 | idx = T.gt(diag2, self.eps).nonzero()[0] 77 | diag2 = diag2[idx] 78 | q2 = q2[:, idx] 79 | 80 | # find correlation matrix T 81 | sigma11_inv = T.dot(q1, T.dot(T.diag(diag1**(-0.5)), q1.T)) 82 | sigma22_inv = T.dot(q2, T.dot(T.diag(diag2**(-0.5)), q2.T)) 83 | T_corr = T.dot(sigma11_inv, T.dot(sigma12, sigma22_inv)) 84 | 85 | # find the singular values of T through the eigenvalues of TT.T 86 | Tdiag, Tevec = T.nlinalg.eigh(T.dot(T_corr, T_corr.T)) 87 | Tdiag = Tdiag[T.gt(Tdiag, self.eps).nonzero()[0]] 88 | Tdiag.sort() 89 | Tdiag = Tdiag[::-1]**(0.5) 90 | 91 | # take the top k canonical components (top k singular values) 92 | # here we negate corr to treat this as a minimization problem 93 | corr = -T.sum(Tdiag[:self.cca_dim]) 94 | mean = T.mean(Tdiag[:self.cca_dim]) 95 | 96 | return corr, mean 97 | 98 | -------------------------------------------------------------------------------- /model/dnn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | Parallel network and feedforward network. 6 | 7 | Author: M. Sam Ribeiro 8 | Date: 2017 9 | 10 | """ 11 | 12 | import numpy 13 | import theano 14 | import theano.tensor as T 15 | 16 | from cca_layer import CCA 17 | from layers import HiddenLayer, ConvPoolLayer 18 | 19 | 20 | class DNN(object): 21 | 22 | def __init__(self, rng, in_x, in_size, architecture, activation=T.tanh): 23 | ''' Single feedforward Deep Neural Network ''' 24 | 25 | self.layers = [] 26 | self.params = [] 27 | self.n_layers = len(architecture) 28 | 29 | assert self.n_layers > 0 30 | 31 | self.x = in_x 32 | 33 | for i in xrange(self.n_layers): 34 | if i == 0: 35 | input_size = in_size 36 | else: 37 | input_size = architecture[i-1] 38 | 39 | if i == 0: 40 | layer_input = self.x 41 | else: 42 | layer_input = self.layers[-1].output 43 | 44 | hidden_layer = HiddenLayer(rng=rng, 45 | input=layer_input, 46 | n_in=input_size, 47 | n_out=architecture[i], 48 | activation=activation) 49 | self.layers.append(hidden_layer) 50 | self.params.extend(hidden_layer.params) 51 | 52 | self.output = self.layers[-1].output 53 | 54 | 55 | 56 | class ParallelDNN(object): 57 | 58 | def __init__(self, config, data): 59 | ''' Parallel DNN with CCA objective function ''' 60 | 61 | index = T.lscalar() # index to a [mini]batch 62 | x1 = T.matrix("x1", dtype=theano.config.floatX) # view1 of the data 63 | x2 = T.matrix("x2", dtype=theano.config.floatX) # view2 of the data 64 | 65 | rng = numpy.random.RandomState(1234) 66 | 67 | # parallel networks 68 | dnn1 = DNN(rng, x1, config.x1_dim, config.architecture1) 69 | dnn2 = DNN(rng, x2, config.x2_dim, config.architecture2) 70 | 71 | # CCA objective function 72 | cca = CCA(config) 73 | cost, mean = cca.cca(dnn1.output, dnn2.output) 74 | 75 | params = dnn1.params + dnn2.params 76 | gparams = [T.grad(cost, param) for param in params] 77 | 78 | updates = [ 79 | (param, param - config.learning_rate * gparam) 80 | for param, gparam in zip(params, gparams) 81 | ] 82 | 83 | train_set_x1, train_set_x2 = data[0] 84 | valid_set_x1, valid_set_x2 = data[1] 85 | test_set_x1, test_set_x2 = data[2] 86 | 87 | 88 | self.train = theano.function( 89 | inputs=[index], 90 | outputs=[cost, mean], 91 | updates=updates, 92 | givens={ 93 | x1: train_set_x1[index * config.batch_size: (index + 1) * config.batch_size], 94 | x2: train_set_x2[index * config.batch_size: (index + 1) * config.batch_size] 95 | } 96 | ) 97 | 98 | self.valid = theano.function( 99 | inputs=[index], 100 | outputs=[cost, mean], 101 | givens={ 102 | x1: valid_set_x1[index * config.batch_size:(index + 1) * config.batch_size], 103 | x2: valid_set_x2[index * config.batch_size:(index + 1) * config.batch_size] 104 | } 105 | ) 106 | 107 | self.test = theano.function( 108 | inputs=[index], 109 | outputs=[cost, mean], 110 | givens={ 111 | x1: test_set_x1[index * config.batch_size:(index + 1) * config.batch_size], 112 | x2: test_set_x2[index * config.batch_size:(index + 1) * config.batch_size] 113 | } 114 | ) 115 | -------------------------------------------------------------------------------- /data_provider.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | MNIST data provider for Deep CCA. 6 | MNIST digits are divided into left and right halves for DCCA model. 7 | These will correspond to 2 views from the same data. 8 | 9 | Based on code from http://deeplearning.net/tutorial 10 | 11 | Author: M. Sam Ribeiro 12 | Date: 2017 13 | 14 | """ 15 | 16 | import os 17 | import gzip 18 | import pickle 19 | import logging 20 | import theano 21 | import numpy as np 22 | 23 | 24 | def load_data(dataset, shared=False): 25 | ''' Loads the dataset 26 | 27 | :type dataset: string 28 | :param dataset: the path to the dataset (here MNIST) 29 | ''' 30 | 31 | logging.info('... loading data') 32 | 33 | # Download the MNIST dataset if it is not present 34 | data_dir, data_file = os.path.split(dataset) 35 | if data_dir == "" and not os.path.isfile(dataset): 36 | # Check if dataset is in the data directory. 37 | new_path = os.path.join( 38 | os.path.split(__file__)[0], 39 | "..", 40 | "data", 41 | dataset 42 | ) 43 | if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': 44 | dataset = new_path 45 | 46 | if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': 47 | from six.moves import urllib 48 | origin = ( 49 | 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' 50 | ) 51 | logging.info('Downloading data from %s' % origin) 52 | urllib.request.urlretrieve(origin, dataset) 53 | 54 | # Load the dataset 55 | fid = gzip.open(dataset, 'rb') 56 | try: 57 | train_set, valid_set, test_set = pickle.load(fid, encoding='latin1') 58 | except: 59 | train_set, valid_set, test_set = pickle.load(fid) 60 | fid.close() 61 | 62 | # train_set, valid_set, test_set format: tuple(input, target) 63 | # input is a numpy.ndarray of 2 dimensions (a matrix) 64 | # where each row corresponds to an example. target is a 65 | # numpy.ndarray of 1 dimension (vector) that has the same length as 66 | # the number of rows in the input. It should give the target 67 | # to the example with the same index in the input. 68 | 69 | def halve_dataset(data_xy): 70 | """ break MNIST matrix into right and left halves """ 71 | data, label = data_xy 72 | m, n = data.shape 73 | left = np.zeros((m, n/2)) 74 | right = np.zeros((m, n/2)) 75 | 76 | for i in range(m): 77 | image = data[i].reshape(28, 28) 78 | left[i] = image[:,:14].reshape(1, -1) 79 | right[i] = image[:,14:].reshape(1, -1) 80 | return (left, right, label) 81 | 82 | 83 | def shared_dataset(data_xxy, borrow=True): 84 | """ Function that loads the dataset into shared variables 85 | 86 | The reason we store our dataset in shared variables is to allow 87 | Theano to copy it into the GPU memory (when code is run on GPU). 88 | Since copying data into the GPU is slow, copying a minibatch everytime 89 | is needed (the default behaviour if the data is not in a shared 90 | variable) would lead to a large decrease in performance. 91 | """ 92 | data_x1, data_x2, data_y = data_xxy 93 | shared_x1 = theano.shared(np.asarray(data_x1, 94 | dtype=theano.config.floatX), 95 | borrow=borrow) 96 | shared_x2 = theano.shared(np.asarray(data_x2, 97 | dtype=theano.config.floatX), 98 | borrow=borrow) 99 | return shared_x1, shared_x2 100 | 101 | train_set = halve_dataset(train_set) 102 | valid_set = halve_dataset(valid_set) 103 | test_set = halve_dataset(test_set) 104 | 105 | if shared: 106 | train_set_x1, train_set_x2 = shared_dataset(train_set) 107 | valid_set_x1, valid_set_x2 = shared_dataset(valid_set) 108 | test_set_x1, test_set_x2 = shared_dataset(test_set) 109 | else: 110 | train_set_x1, train_set_x2, train_set_y = train_set 111 | valid_set_x1, valid_set_x2, valid_set_y = valid_set 112 | test_set_x1, test_set_x2, test_set_y = test_set 113 | 114 | data = [ 115 | (train_set_x1, train_set_x2), 116 | (valid_set_x1, valid_set_x2), 117 | (test_set_x1, test_set_x2) 118 | ] 119 | 120 | return data 121 | -------------------------------------------------------------------------------- /model/layers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | Hidden layers for deep neural networks. 6 | Based on code from http://deeplearning.net/tutorial 7 | 8 | Author: M. Sam Ribeiro 9 | Date: 2017 10 | 11 | """ 12 | 13 | import numpy 14 | import theano 15 | import theano.tensor as T 16 | 17 | 18 | 19 | class HiddenLayer(object): 20 | 21 | def __init__(self, rng, input, n_in, n_out, W=None, b=None, 22 | activation=T.tanh): 23 | """ 24 | Typical hidden layer of a MLP with fully-connected units. 25 | 26 | :type rng: numpy.random.RandomState 27 | :param rng: a random number generator used to initialize weights 28 | 29 | :type input: theano.tensor.dmatrix 30 | :param input: a symbolic tensor of shape (n_examples, n_in) 31 | 32 | :type n_in: int 33 | :param n_in: dimensionality of input 34 | 35 | :type n_out: int 36 | :param n_out: number of hidden units 37 | 38 | :type activation: theano.Op or function 39 | :param activation: Non linearity to be applied in the hidden 40 | layer 41 | """ 42 | self.input = input 43 | 44 | if W is None: 45 | W_values = numpy.asarray( 46 | rng.uniform( 47 | low=-numpy.sqrt(6. / (n_in + n_out)), 48 | high=numpy.sqrt(6. / (n_in + n_out)), 49 | size=(n_in, n_out) 50 | ), 51 | dtype=theano.config.floatX 52 | ) 53 | if activation == theano.tensor.nnet.sigmoid: 54 | W_values *= 4 55 | 56 | W = theano.shared(value=W_values, name='W', borrow=True) 57 | 58 | if b is None: 59 | b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) 60 | b = theano.shared(value=b_values, name='b', borrow=True) 61 | 62 | self.W = W 63 | self.b = b 64 | 65 | lin_output = T.dot(input, self.W) + self.b 66 | self.output = activation(lin_output) 67 | self.params = [self.W, self.b] 68 | 69 | 70 | 71 | class ConvPoolLayer(object): 72 | 73 | def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): 74 | """ 75 | Convolution and Pooling layer 76 | 77 | :type rng: numpy.random.RandomState 78 | :param rng: a random number generator used to initialize weights 79 | 80 | :type input: theano.tensor.dtensor4 81 | :param input: symbolic image tensor, of shape image_shape 82 | 83 | :type filter_shape: tuple or list of length 4 84 | :param filter_shape: (number of filters, num input feature maps, 85 | filter height, filter width) 86 | 87 | :type image_shape: tuple or list of length 4 88 | :param image_shape: (batch size, num input feature maps, 89 | image height, image width) 90 | 91 | :type poolsize: tuple or list of length 2 92 | :param poolsize: the downsampling (pooling) factor (#rows, #cols) 93 | """ 94 | 95 | assert image_shape[1] == filter_shape[1] 96 | self.input = input 97 | 98 | # there are "num input feature maps * filter height * filter width" 99 | # inputs to each hidden unit 100 | fan_in = numpy.prod(filter_shape[1:]) 101 | # each unit in the lower layer receives a gradient from: 102 | # "num output feature maps * filter height * filter width" / 103 | # pooling size 104 | fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) // 105 | numpy.prod(poolsize)) 106 | # initialize weights with random weights 107 | W_bound = numpy.sqrt(6. / (fan_in + fan_out)) 108 | self.W = theano.shared( 109 | numpy.asarray( 110 | rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), 111 | dtype=theano.config.floatX 112 | ), 113 | borrow=True 114 | ) 115 | 116 | # the bias is a 1D tensor -- one bias per output feature map 117 | b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) 118 | self.b = theano.shared(value=b_values, borrow=True) 119 | 120 | # convolve input feature maps with filters 121 | conv_out = conv2d( 122 | input=input, 123 | filters=self.W, 124 | filter_shape=filter_shape, 125 | input_shape=image_shape 126 | ) 127 | 128 | # pool each feature map individually, using maxpooling 129 | pooled_out = pool.pool_2d( 130 | input=conv_out, 131 | ds=poolsize, 132 | ignore_border=True 133 | ) 134 | 135 | # add the bias term. Since the bias is a vector (1D array), we first 136 | # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will 137 | # thus be broadcasted across mini-batches and feature map 138 | # width & height 139 | self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) 140 | 141 | # store parameters of this layer 142 | self.params = [self.W, self.b] 143 | 144 | # keep track of model input 145 | self.input = input 146 | 147 | -------------------------------------------------------------------------------- /deep_cca.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Theano-based Deep Canonical Correlation Analysis (Deep CCA) on MNIST data. 5 | 6 | Training procedure based on code from http://deeplearning.net/tutorial 7 | 8 | Author: M. Sam Ribeiro 9 | Date: 2017 10 | 11 | """ 12 | 13 | import os 14 | import time 15 | import logging 16 | 17 | import numpy 18 | import theano 19 | import theano.tensor as T 20 | 21 | from model.dnn import ParallelDNN 22 | from model.configuration import Config 23 | from data_provider import load_data 24 | 25 | 26 | def train(cfg, model): 27 | 28 | logging.info('... training') 29 | 30 | # early-stopping parameters 31 | patience = 10000 # look as this many examples regardless 32 | patience_increase = 2 # wait this much longer when a new best is 33 | # found 34 | improvement_threshold = 0.995 # a relative improvement of this much is 35 | # considered significant 36 | validation_frequency = min(cfg.n_train_batches, patience // 2) 37 | # go through this many 38 | # minibatche before checking the network 39 | # on the validation set; in this case we 40 | # check every epoch 41 | 42 | best_validation_score = numpy.inf 43 | best_epoch = 0 44 | start_time = time.time() 45 | 46 | epoch = 0 47 | done_looping = False 48 | 49 | train_model = model.train 50 | validate_model = model.valid 51 | test_model = model.test 52 | 53 | while (epoch < cfg.epochs) and (not done_looping): 54 | 55 | epoch = epoch + 1 56 | train_correlations = [] 57 | train_means = [] 58 | epoch_start_time = time.time() 59 | 60 | for minibatch_index in range(cfg.n_train_batches): 61 | 62 | minibatch_avg_cost, mini_batch_mean = train_model(minibatch_index) 63 | train_correlations.append(float(minibatch_avg_cost)) 64 | train_means.append(float(mini_batch_mean)) 65 | 66 | iteration = (epoch - 1) * cfg.n_train_batches + minibatch_index 67 | 68 | if (iteration + 1) % validation_frequency == 0: 69 | 70 | epoch_train_corr = numpy.mean(train_correlations) 71 | epoch_train_mean = numpy.mean(train_means) 72 | 73 | valid_correlations = [] 74 | valid_means = [] 75 | for i in range(cfg.n_valid_batches): 76 | valid_corr, valid_mean = validate_model(i) 77 | valid_correlations.append(float(valid_corr)) 78 | valid_means.append(float(valid_mean)) 79 | 80 | epoch_valid_corr = numpy.mean(valid_correlations) 81 | epoch_valid_mean = numpy.mean(valid_means) 82 | 83 | # if we got the best validation score until now 84 | if epoch_valid_corr < best_validation_score: 85 | #improve patience if loss improvement is good enough 86 | if ( 87 | epoch_valid_corr < best_validation_score * 88 | improvement_threshold 89 | ): 90 | patience = max(patience, iteration * patience_increase) 91 | 92 | best_validation_score = epoch_valid_corr 93 | best_epoch = epoch 94 | 95 | epoch_time = time.time() - epoch_start_time 96 | logging.info( 97 | 'epoch {0}, train correlation {1:.2f} (mean: {2:.2f}), validation correlation {3:.2f} (mean: {4:.2f}), time {5:.2f}s' \ 98 | .format(epoch, -epoch_train_corr, epoch_train_mean, -epoch_valid_corr, epoch_valid_mean, epoch_time)) 99 | 100 | if patience <= iteration: 101 | done_looping = True 102 | break 103 | 104 | running_time = time.time() - start_time 105 | filename = os.path.split(__file__)[1] 106 | 107 | logging.info('Optimization complete. Best validation score of {0:.2f} obtained at epoch {1:.2f}' \ 108 | .format(-best_validation_score, best_epoch)) 109 | logging.info('The code for file {0} ran for {1:.2f}m'.format(filename, running_time / 60.)) 110 | 111 | 112 | def test(cfg, model): 113 | 114 | test_model = model.test 115 | test_means = [] 116 | test_correlations = [] 117 | 118 | start_time = time.time() 119 | 120 | for i in range(cfg.n_test_batches): 121 | test_corr, test_mean = test_model(i) 122 | test_correlations.append(float(test_corr)) 123 | test_means.append(float(test_mean)) 124 | 125 | test_corr = numpy.mean(test_correlations) 126 | test_mean = numpy.mean(test_mean) 127 | running_time = time.time() - start_time 128 | 129 | logging.info('test correlation {0:.2f} (mean {1:.2f}), time {2:.2f}s' \ 130 | .format(-test_corr, test_mean, running_time)) 131 | 132 | 133 | 134 | if __name__ == "__main__": 135 | 136 | logging.basicConfig(format='%(asctime)-15s %(levelname)s: %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) 137 | 138 | # load dataset 139 | dataset = './mnist.pkl.gz' 140 | datasets = load_data(dataset, shared=True) 141 | 142 | # set configuration 143 | cfg = Config() 144 | cfg.set_data_config(datasets) 145 | 146 | # build model 147 | model = ParallelDNN(cfg, datasets) 148 | 149 | # train 150 | train(cfg, model) 151 | test(cfg, model) 152 | 153 | 154 | # THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32,force_device=True python ./deep_cca.py 155 | --------------------------------------------------------------------------------