├── model
    ├── __init__.py
    ├── configuration.py
    ├── cca_layer.py
    ├── dnn.py
    └── layers.py
├── LICENSE
├── README.md
├── data_provider.py
└── deep_cca.py


/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 M. Sam Ribeiro
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # deep-cca
 2 | 
 3 | ### Overview
 4 | **Deep Canonical Correlation Analysis** (DCCA) implementation using Theano
 5 | 
 6 | DCCA is a nonlinear extension of Canonical Correaltion Analysis (CCA). Given two views of the same data, DCCA learns transformations that are maximally correlated (Galen et al. 2013). This implementation adopts a stochastic optimization approach (Wang et al. 2015) via SGD. The data used here is the MNIST dataset. Each image is divided into its left and right halves, and we let that be the two views of the same data (check Galen et al, 2013) for details.
 7 | 
 8 | Many thanks to [Herman Kamper](https://github.com/kamperh) for various resources, comments, and discussions.
 9 | 
10 | ### References:
11 |  - Andrew, Galen, et al. "Deep Canonical Correlation Analysis." ICML (3). 2013.
12 |  - Wang, Weiran, et al. "Unsupervised learning of acoustic features via deep canonical correlation analysis." 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2015.
13 | 
14 | 
15 | ### Running
16 | 
17 | ```sh
18 | $ python ./deep_cca.py
19 | ```
20 | If you wish to run on a GPU, you might want to try something like
21 | ```sh
22 | $ THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32,force_device=True python ./deep_cca.py 
23 | ```


--------------------------------------------------------------------------------
/model/configuration.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | 
 5 | Deep CCA configuration
 6 | 
 7 | Author: M. Sam Ribeiro
 8 | Date: 2017
 9 | 
10 | """
11 | 
12 | 
13 | class Config(object):
14 | 
15 |     def __init__(self):
16 | 
17 |         self.learning_rate = 0.01
18 |         self.epochs        = 100
19 |         self.batch_size    = 10000
20 | 
21 |         # L1 and L2 regularization -- not implemented yet
22 |         #self.L1_reg       = 0.00
23 |         #self.L2_reg       = 0.0001
24 | 
25 |         self.cca_dim  = 50      # number of CCA dimensions
26 |         self.cca_reg1  = 1e-4   # CCA regularization for view 1
27 |         self.cca_reg2  = 1e-4   # CCA regularization for view 2
28 | 
29 |         # architectures for view1 and view2
30 |         # each list item indicates a hidden layer, each int the number of nodes
31 |         # [1000, 1000, 1000] is 3 hidden layers with 1000 nodes each
32 |         self.architecture1 = [1024, 512, 256, 128]
33 |         self.architecture2 = [1024, 512, 256, 128]
34 | 
35 | 
36 |     def set_data_config(self, data):
37 |         train_set_x1, train_set_x2 = data[0]
38 |         valid_set_x1, valid_set_x2 = data[1]
39 |         test_set_x1,  test_set_x2  = data[2]
40 | 
41 |         self.n_train_batches = train_set_x1.get_value(borrow=True).shape[0] // self.batch_size
42 |         self.n_valid_batches = valid_set_x1.get_value(borrow=True).shape[0] // self.batch_size
43 |         self.n_test_batches = test_set_x1.get_value(borrow=True).shape[0]   // self.batch_size
44 | 
45 |         self.x1_dim = train_set_x1.get_value(borrow=True).shape[1]
46 |         self.x2_dim = train_set_x2.get_value(borrow=True).shape[1]
47 | 


--------------------------------------------------------------------------------
/model/cca_layer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | 
 5 | Theano-based Deep CCA implementation
 6 | 
 7 | Mostly based on the following papers:
 8 |  (1) Andrew, Galen, et al. 
 9 |     "Deep Canonical Correlation Analysis." 
10 |      ICML (3). 2013.
11 | 
12 |  (2) Wang, Weiran, et al. 
13 |     "Unsupervised learning of acoustic features via deep canonical correlation analysis." 
14 |     2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2015.
15 | 
16 | Many thanks to Herman Kamper (https://github.com/kamperh) for various resources, comments, and discussions.
17 | 
18 | Author: M. Sam Ribeiro
19 | Date: 2017
20 | 
21 | """
22 | 
23 | 
24 | import theano
25 | import theano.tensor as T
26 | 
27 | 
28 | class CCA(object):
29 |     ''' Linear Canonical Correlation Analysis (CCA) '''
30 | 
31 |     def __init__(self, config):
32 | 
33 |         # input dimension to CCA layer
34 |         self.in_dim1 = config.architecture1[-1]
35 |         self.in_dim2 = config.architecture2[-1]
36 | 
37 |         # CCA dimension, i.e., number of canonical components
38 |         self.cca_dim = config.cca_dim
39 | 
40 |         # CCA regularization 
41 |         self.reg1 = config.cca_reg1
42 |         self.reg2 = config.cca_reg1
43 | 
44 |         # for numerical statibility (from H. Kamper via W. Wang)
45 |         self.eps = 1e-12
46 | 
47 | 
48 |     def cca(self, data1, data2):
49 | 
50 |         n_data = data1.shape[0]
51 | 
52 |         # center the data
53 |         data1 -= T.mean(data1, axis=0)
54 |         data2 -= T.mean(data2, axis=0)
55 |         data1 = data1.T
56 |         data2 = data2.T
57 | 
58 |         # find covariance matrices
59 |         sigma11 = (1/(n_data-1.)) * T.dot(data1, data1.T)
60 |         sigma22 = (1/(n_data-1.)) * T.dot(data2, data2.T)
61 |         sigma12 = (1/(n_data-1.)) * T.dot(data1, data2.T)
62 | 
63 |         # add regulatization
64 |         sigma11 += self.reg1 * T.eye(self.in_dim1)
65 |         sigma22 += self.reg2 * T.eye(self.in_dim2)
66 | 
67 |         # diagonalize covariance matrices to find inverses
68 |         diag1, q1 = T.nlinalg.eigh(sigma11)
69 |         diag2, q2 = T.nlinalg.eigh(sigma22)
70 | 
71 |         # numerical stability (from H. Kamper, via W. Wang)
72 |         # http://stackoverflow.com/questions/20590909/returning-the-index-of-a-value-in-theano-vector
73 |         idx = T.gt(diag1, self.eps).nonzero()[0] 
74 |         diag1 = diag1[idx]
75 |         q1 = q1[:, idx]
76 |         idx = T.gt(diag2, self.eps).nonzero()[0]
77 |         diag2 = diag2[idx]
78 |         q2 = q2[:, idx]
79 | 
80 |         # find correlation matrix T
81 |         sigma11_inv = T.dot(q1, T.dot(T.diag(diag1**(-0.5)), q1.T))
82 |         sigma22_inv = T.dot(q2, T.dot(T.diag(diag2**(-0.5)), q2.T))
83 |         T_corr = T.dot(sigma11_inv, T.dot(sigma12, sigma22_inv))
84 | 
85 |         # find the singular values of T through the eigenvalues of TT.T
86 |         Tdiag, Tevec = T.nlinalg.eigh(T.dot(T_corr, T_corr.T))
87 |         Tdiag = Tdiag[T.gt(Tdiag, self.eps).nonzero()[0]]
88 |         Tdiag.sort()
89 |         Tdiag = Tdiag[::-1]**(0.5)
90 | 
91 |         # take the top k canonical components (top k singular values)
92 |         # here we negate corr to treat this as a minimization problem
93 |         corr = -T.sum(Tdiag[:self.cca_dim])
94 |         mean = T.mean(Tdiag[:self.cca_dim])
95 | 
96 |         return corr, mean
97 | 
98 | 


--------------------------------------------------------------------------------
/model/dnn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | 
  5 | Parallel network and feedforward network.
  6 | 
  7 | Author: M. Sam Ribeiro
  8 | Date: 2017
  9 | 
 10 | """
 11 | 
 12 | import numpy
 13 | import theano
 14 | import theano.tensor as T
 15 | 
 16 | from cca_layer import CCA
 17 | from layers import HiddenLayer, ConvPoolLayer
 18 | 
 19 | 
 20 | class DNN(object):
 21 | 
 22 |     def __init__(self, rng, in_x, in_size, architecture, activation=T.tanh):
 23 |         ''' Single feedforward Deep Neural Network '''
 24 | 
 25 |         self.layers = []
 26 |         self.params = []
 27 |         self.n_layers = len(architecture)
 28 | 
 29 |         assert self.n_layers > 0
 30 | 
 31 |         self.x = in_x
 32 | 
 33 |         for i in xrange(self.n_layers):
 34 |             if i == 0:
 35 |                 input_size = in_size
 36 |             else:
 37 |                 input_size = architecture[i-1]
 38 | 
 39 |             if i == 0:
 40 |                 layer_input = self.x
 41 |             else:
 42 |                 layer_input = self.layers[-1].output
 43 | 
 44 |             hidden_layer = HiddenLayer(rng=rng,
 45 |                                         input=layer_input,
 46 |                                         n_in=input_size,
 47 |                                         n_out=architecture[i],
 48 |                                         activation=activation)
 49 |             self.layers.append(hidden_layer)
 50 |             self.params.extend(hidden_layer.params)
 51 | 
 52 |         self.output = self.layers[-1].output
 53 | 
 54 | 
 55 | 
 56 | class ParallelDNN(object):
 57 | 
 58 |     def __init__(self, config, data):
 59 |         ''' Parallel DNN with CCA objective function '''
 60 | 
 61 |         index = T.lscalar()                             # index to a [mini]batch
 62 |         x1 = T.matrix("x1", dtype=theano.config.floatX) # view1 of the data
 63 |         x2 = T.matrix("x2", dtype=theano.config.floatX) # view2 of the data
 64 | 
 65 |         rng = numpy.random.RandomState(1234)
 66 | 
 67 |         # parallel networks
 68 |         dnn1 = DNN(rng, x1, config.x1_dim, config.architecture1)
 69 |         dnn2 = DNN(rng, x2, config.x2_dim, config.architecture2)
 70 | 
 71 |         # CCA objective function
 72 |         cca = CCA(config)
 73 |         cost, mean = cca.cca(dnn1.output, dnn2.output)
 74 | 
 75 |         params = dnn1.params + dnn2.params
 76 |         gparams = [T.grad(cost, param) for param in params]
 77 | 
 78 |         updates = [
 79 |             (param, param - config.learning_rate * gparam)
 80 |             for param, gparam in zip(params, gparams)
 81 |         ]
 82 | 
 83 |         train_set_x1, train_set_x2 = data[0]
 84 |         valid_set_x1, valid_set_x2 = data[1]
 85 |         test_set_x1,  test_set_x2  = data[2]
 86 | 
 87 | 
 88 |         self.train = theano.function(
 89 |             inputs=[index],
 90 |             outputs=[cost, mean],
 91 |             updates=updates,
 92 |             givens={
 93 |                 x1: train_set_x1[index * config.batch_size: (index + 1) * config.batch_size],
 94 |                 x2: train_set_x2[index * config.batch_size: (index + 1) * config.batch_size]
 95 |             }
 96 |         )
 97 | 
 98 |         self.valid = theano.function(
 99 |             inputs=[index],
100 |             outputs=[cost, mean],
101 |             givens={
102 |                 x1: valid_set_x1[index * config.batch_size:(index + 1) * config.batch_size],
103 |                 x2: valid_set_x2[index * config.batch_size:(index + 1) * config.batch_size]
104 |             }
105 |         )
106 | 
107 |         self.test = theano.function(
108 |             inputs=[index],
109 |             outputs=[cost, mean],
110 |             givens={
111 |                 x1: test_set_x1[index * config.batch_size:(index + 1) * config.batch_size],
112 |                 x2: test_set_x2[index * config.batch_size:(index + 1) * config.batch_size]
113 |             }
114 |         )
115 | 


--------------------------------------------------------------------------------
/data_provider.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | 
  5 | MNIST data provider for Deep CCA.
  6 | MNIST digits are divided into left and right halves for DCCA model.
  7 | These will correspond to 2 views from the same data.
  8 | 
  9 | Based on code from http://deeplearning.net/tutorial
 10 | 
 11 | Author: M. Sam Ribeiro
 12 | Date: 2017
 13 | 
 14 | """
 15 | 
 16 | import os
 17 | import gzip
 18 | import pickle
 19 | import logging
 20 | import theano
 21 | import numpy as np
 22 | 
 23 | 
 24 | def load_data(dataset, shared=False):
 25 |     ''' Loads the dataset
 26 | 
 27 |     :type dataset: string
 28 |     :param dataset: the path to the dataset (here MNIST)
 29 |     '''
 30 | 
 31 |     logging.info('... loading data')
 32 | 
 33 |     # Download the MNIST dataset if it is not present
 34 |     data_dir, data_file = os.path.split(dataset)
 35 |     if data_dir == "" and not os.path.isfile(dataset):
 36 |         # Check if dataset is in the data directory.
 37 |         new_path = os.path.join(
 38 |             os.path.split(__file__)[0],
 39 |             "..",
 40 |             "data",
 41 |             dataset
 42 |         )
 43 |         if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
 44 |             dataset = new_path
 45 | 
 46 |     if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
 47 |         from six.moves import urllib
 48 |         origin = (
 49 |             'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
 50 |         )
 51 |         logging.info('Downloading data from %s' % origin)
 52 |         urllib.request.urlretrieve(origin, dataset)
 53 | 
 54 |     # Load the dataset
 55 |     fid = gzip.open(dataset, 'rb')
 56 |     try:
 57 |         train_set, valid_set, test_set = pickle.load(fid, encoding='latin1')
 58 |     except:
 59 |         train_set, valid_set, test_set = pickle.load(fid)
 60 |     fid.close()
 61 | 
 62 |     # train_set, valid_set, test_set format: tuple(input, target)
 63 |     # input is a numpy.ndarray of 2 dimensions (a matrix)
 64 |     # where each row corresponds to an example. target is a
 65 |     # numpy.ndarray of 1 dimension (vector) that has the same length as
 66 |     # the number of rows in the input. It should give the target
 67 |     # to the example with the same index in the input.
 68 | 
 69 |     def halve_dataset(data_xy):
 70 |         """ break MNIST matrix into right and left halves """
 71 |         data, label = data_xy
 72 |         m, n = data.shape
 73 |         left  = np.zeros((m, n/2))
 74 |         right = np.zeros((m, n/2))
 75 | 
 76 |         for i in range(m):
 77 |             image = data[i].reshape(28, 28)
 78 |             left[i] = image[:,:14].reshape(1, -1)
 79 |             right[i] = image[:,14:].reshape(1, -1)
 80 |         return (left, right, label)
 81 | 
 82 | 
 83 |     def shared_dataset(data_xxy, borrow=True):
 84 |         """ Function that loads the dataset into shared variables
 85 | 
 86 |         The reason we store our dataset in shared variables is to allow
 87 |         Theano to copy it into the GPU memory (when code is run on GPU).
 88 |         Since copying data into the GPU is slow, copying a minibatch everytime
 89 |         is needed (the default behaviour if the data is not in a shared
 90 |         variable) would lead to a large decrease in performance.
 91 |         """
 92 |         data_x1, data_x2, data_y = data_xxy
 93 |         shared_x1 = theano.shared(np.asarray(data_x1,
 94 |                                     dtype=theano.config.floatX),
 95 |                                     borrow=borrow)
 96 |         shared_x2 = theano.shared(np.asarray(data_x2,
 97 |                                     dtype=theano.config.floatX),
 98 |                                     borrow=borrow)
 99 |         return shared_x1, shared_x2
100 | 
101 |     train_set = halve_dataset(train_set)
102 |     valid_set = halve_dataset(valid_set)
103 |     test_set  = halve_dataset(test_set)
104 | 
105 |     if shared:
106 |         train_set_x1, train_set_x2 = shared_dataset(train_set)
107 |         valid_set_x1, valid_set_x2 = shared_dataset(valid_set)
108 |         test_set_x1,  test_set_x2  = shared_dataset(test_set)
109 |     else:
110 |         train_set_x1, train_set_x2, train_set_y = train_set
111 |         valid_set_x1, valid_set_x2, valid_set_y = valid_set
112 |         test_set_x1,  test_set_x2,  test_set_y  = test_set
113 | 
114 |     data = [
115 |         (train_set_x1, train_set_x2),
116 |         (valid_set_x1, valid_set_x2),
117 |         (test_set_x1, test_set_x2)
118 |         ]
119 | 
120 |     return data
121 | 


--------------------------------------------------------------------------------
/model/layers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | 
  5 | Hidden layers for deep neural networks.
  6 | Based on code from http://deeplearning.net/tutorial
  7 | 
  8 | Author: M. Sam Ribeiro
  9 | Date: 2017
 10 | 
 11 | """
 12 | 
 13 | import numpy
 14 | import theano
 15 | import theano.tensor as T
 16 | 
 17 | 
 18 | 
 19 | class HiddenLayer(object):
 20 | 
 21 |     def __init__(self, rng, input, n_in, n_out, W=None, b=None,
 22 |                  activation=T.tanh):
 23 |         """
 24 |         Typical hidden layer of a MLP with fully-connected units.
 25 | 
 26 |         :type rng: numpy.random.RandomState
 27 |         :param rng: a random number generator used to initialize weights
 28 | 
 29 |         :type input: theano.tensor.dmatrix
 30 |         :param input: a symbolic tensor of shape (n_examples, n_in)
 31 | 
 32 |         :type n_in: int
 33 |         :param n_in: dimensionality of input
 34 | 
 35 |         :type n_out: int
 36 |         :param n_out: number of hidden units
 37 | 
 38 |         :type activation: theano.Op or function
 39 |         :param activation: Non linearity to be applied in the hidden
 40 |                            layer
 41 |         """
 42 |         self.input = input
 43 | 
 44 |         if W is None:
 45 |             W_values = numpy.asarray(
 46 |                 rng.uniform(
 47 |                     low=-numpy.sqrt(6. / (n_in + n_out)),
 48 |                     high=numpy.sqrt(6. / (n_in + n_out)),
 49 |                     size=(n_in, n_out)
 50 |                 ),
 51 |                 dtype=theano.config.floatX
 52 |             )
 53 |             if activation == theano.tensor.nnet.sigmoid:
 54 |                 W_values *= 4
 55 | 
 56 |             W = theano.shared(value=W_values, name='W', borrow=True)
 57 | 
 58 |         if b is None:
 59 |             b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
 60 |             b = theano.shared(value=b_values, name='b', borrow=True)
 61 | 
 62 |         self.W = W
 63 |         self.b = b
 64 | 
 65 |         lin_output = T.dot(input, self.W) + self.b
 66 |         self.output = activation(lin_output)
 67 |         self.params = [self.W, self.b]
 68 | 
 69 | 
 70 | 
 71 | class ConvPoolLayer(object):
 72 | 
 73 |     def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
 74 |         """
 75 |         Convolution and Pooling layer
 76 | 
 77 |         :type rng: numpy.random.RandomState
 78 |         :param rng: a random number generator used to initialize weights
 79 | 
 80 |         :type input: theano.tensor.dtensor4
 81 |         :param input: symbolic image tensor, of shape image_shape
 82 | 
 83 |         :type filter_shape: tuple or list of length 4
 84 |         :param filter_shape: (number of filters, num input feature maps,
 85 |                               filter height, filter width)
 86 | 
 87 |         :type image_shape: tuple or list of length 4
 88 |         :param image_shape: (batch size, num input feature maps,
 89 |                              image height, image width)
 90 | 
 91 |         :type poolsize: tuple or list of length 2
 92 |         :param poolsize: the downsampling (pooling) factor (#rows, #cols)
 93 |         """
 94 | 
 95 |         assert image_shape[1] == filter_shape[1]
 96 |         self.input = input
 97 | 
 98 |         # there are "num input feature maps * filter height * filter width"
 99 |         # inputs to each hidden unit
100 |         fan_in = numpy.prod(filter_shape[1:])
101 |         # each unit in the lower layer receives a gradient from:
102 |         # "num output feature maps * filter height * filter width" /
103 |         #   pooling size
104 |         fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) //
105 |                    numpy.prod(poolsize))
106 |         # initialize weights with random weights
107 |         W_bound = numpy.sqrt(6. / (fan_in + fan_out))
108 |         self.W = theano.shared(
109 |             numpy.asarray(
110 |                 rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
111 |                 dtype=theano.config.floatX
112 |             ),
113 |             borrow=True
114 |         )
115 | 
116 |         # the bias is a 1D tensor -- one bias per output feature map
117 |         b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
118 |         self.b = theano.shared(value=b_values, borrow=True)
119 | 
120 |         # convolve input feature maps with filters
121 |         conv_out = conv2d(
122 |             input=input,
123 |             filters=self.W,
124 |             filter_shape=filter_shape,
125 |             input_shape=image_shape
126 |         )
127 | 
128 |         # pool each feature map individually, using maxpooling
129 |         pooled_out = pool.pool_2d(
130 |             input=conv_out,
131 |             ds=poolsize,
132 |             ignore_border=True
133 |         )
134 | 
135 |         # add the bias term. Since the bias is a vector (1D array), we first
136 |         # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
137 |         # thus be broadcasted across mini-batches and feature map
138 |         # width & height
139 |         self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
140 | 
141 |         # store parameters of this layer
142 |         self.params = [self.W, self.b]
143 | 
144 |         # keep track of model input
145 |         self.input = input
146 | 
147 | 


--------------------------------------------------------------------------------
/deep_cca.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Theano-based Deep Canonical Correlation Analysis (Deep CCA) on MNIST data.
  5 | 
  6 | Training procedure based on code from http://deeplearning.net/tutorial
  7 | 
  8 | Author: M. Sam Ribeiro
  9 | Date: 2017
 10 | 
 11 | """
 12 | 
 13 | import os
 14 | import time
 15 | import logging
 16 | 
 17 | import numpy
 18 | import theano
 19 | import theano.tensor as T
 20 | 
 21 | from model.dnn import ParallelDNN
 22 | from model.configuration import Config
 23 | from data_provider import load_data
 24 | 
 25 | 
 26 | def train(cfg, model):
 27 | 
 28 |     logging.info('... training')
 29 | 
 30 |     # early-stopping parameters
 31 |     patience = 10000  # look as this many examples regardless
 32 |     patience_increase = 2  # wait this much longer when a new best is
 33 |                            # found
 34 |     improvement_threshold = 0.995  # a relative improvement of this much is
 35 |                                    # considered significant
 36 |     validation_frequency = min(cfg.n_train_batches, patience // 2)
 37 |                                   # go through this many
 38 |                                   # minibatche before checking the network
 39 |                                   # on the validation set; in this case we
 40 |                                   # check every epoch
 41 | 
 42 |     best_validation_score = numpy.inf
 43 |     best_epoch = 0
 44 |     start_time = time.time()
 45 | 
 46 |     epoch = 0
 47 |     done_looping = False
 48 | 
 49 |     train_model   = model.train
 50 |     validate_model = model.valid
 51 |     test_model = model.test
 52 | 
 53 |     while (epoch < cfg.epochs) and (not done_looping):
 54 | 
 55 |         epoch = epoch + 1
 56 |         train_correlations = []
 57 |         train_means = []
 58 |         epoch_start_time = time.time()
 59 | 
 60 |         for minibatch_index in range(cfg.n_train_batches):
 61 | 
 62 |             minibatch_avg_cost, mini_batch_mean = train_model(minibatch_index)
 63 |             train_correlations.append(float(minibatch_avg_cost))
 64 |             train_means.append(float(mini_batch_mean))
 65 | 
 66 |             iteration = (epoch - 1) * cfg.n_train_batches + minibatch_index
 67 | 
 68 |             if (iteration + 1) % validation_frequency == 0:
 69 | 
 70 |                 epoch_train_corr = numpy.mean(train_correlations)
 71 |                 epoch_train_mean = numpy.mean(train_means)
 72 | 
 73 |                 valid_correlations = []
 74 |                 valid_means = []
 75 |                 for i in range(cfg.n_valid_batches):
 76 |                     valid_corr, valid_mean = validate_model(i)
 77 |                     valid_correlations.append(float(valid_corr))
 78 |                     valid_means.append(float(valid_mean))
 79 | 
 80 |                 epoch_valid_corr = numpy.mean(valid_correlations)
 81 |                 epoch_valid_mean = numpy.mean(valid_means)
 82 | 
 83 |                 # if we got the best validation score until now
 84 |                 if epoch_valid_corr < best_validation_score:
 85 |                     #improve patience if loss improvement is good enough
 86 |                     if (
 87 |                         epoch_valid_corr < best_validation_score *
 88 |                         improvement_threshold
 89 |                     ):
 90 |                         patience = max(patience, iteration * patience_increase)
 91 | 
 92 |                     best_validation_score = epoch_valid_corr
 93 |                     best_epoch = epoch
 94 | 
 95 |                 epoch_time = time.time() - epoch_start_time
 96 |                 logging.info(
 97 |                     'epoch {0}, train correlation {1:.2f} (mean: {2:.2f}), validation correlation {3:.2f} (mean: {4:.2f}), time {5:.2f}s' \
 98 |                         .format(epoch, -epoch_train_corr, epoch_train_mean, -epoch_valid_corr, epoch_valid_mean, epoch_time))
 99 | 
100 |             if patience <= iteration:
101 |                 done_looping = True
102 |                 break
103 | 
104 |     running_time = time.time() - start_time
105 |     filename = os.path.split(__file__)[1]
106 | 
107 |     logging.info('Optimization complete. Best validation score of {0:.2f} obtained at epoch {1:.2f}' \
108 |         .format(-best_validation_score, best_epoch))
109 |     logging.info('The code for file {0} ran for {1:.2f}m'.format(filename, running_time / 60.))
110 | 
111 | 
112 | def test(cfg, model):
113 | 
114 |     test_model = model.test
115 |     test_means = []
116 |     test_correlations = []
117 | 
118 |     start_time = time.time()
119 | 
120 |     for i in range(cfg.n_test_batches):
121 |         test_corr, test_mean = test_model(i)
122 |         test_correlations.append(float(test_corr))
123 |         test_means.append(float(test_mean))
124 | 
125 |     test_corr = numpy.mean(test_correlations)
126 |     test_mean = numpy.mean(test_mean)
127 |     running_time = time.time() - start_time
128 | 
129 |     logging.info('test correlation {0:.2f} (mean {1:.2f}), time {2:.2f}s' \
130 |         .format(-test_corr, test_mean, running_time))
131 | 
132 | 
133 | 
134 | if __name__ == "__main__":
135 | 
136 |     logging.basicConfig(format='%(asctime)-15s %(levelname)s: %(message)s',  datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO)
137 | 
138 |     # load dataset
139 |     dataset = './mnist.pkl.gz'
140 |     datasets = load_data(dataset, shared=True)
141 | 
142 |     # set configuration
143 |     cfg = Config()
144 |     cfg.set_data_config(datasets)
145 | 
146 |     # build model
147 |     model = ParallelDNN(cfg, datasets)
148 | 
149 |     # train
150 |     train(cfg, model)
151 |     test(cfg, model)
152 | 
153 | 
154 | # THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32,force_device=True python ./deep_cca.py 
155 | 


--------------------------------------------------------------------------------