├── README.md
├── linear_cca.py
├── train.py
├── model.py
└── load.py


/README.md:
--------------------------------------------------------------------------------
1 | # DeepCCA
2 | Deep Canonical Correlation Analysis implemented with tensorflow
3 | 


--------------------------------------------------------------------------------
/linear_cca.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | 
 4 | def linear_cca(H1, H2, outdim_size):
 5 |     """
 6 |     An implementation of linear CCA
 7 |     # Arguments:
 8 |         H1 and H2: the matrices containing the data for view 1 and view 2. Each row is a sample.
 9 |         outdim_size: specifies the number of new features
10 |     # Returns
11 |         A and B: the linear transformation matrices 
12 |         mean1 and mean2: the means of data for both views
13 |     """
14 |     r1 = 1e-4
15 |     r2 = 1e-4
16 | 
17 |     m = H1.shape[0]
18 |     o = H1.shape[1]
19 | 
20 |     mean1 = numpy.mean(H1, axis=0)
21 |     mean2 = numpy.mean(H2, axis=0)
22 |     H1bar = H1 - numpy.tile(mean1, (m, 1))
23 |     H2bar = H2 - numpy.tile(mean2, (m, 1))
24 | 
25 |     SigmaHat12 = (1.0 / (m - 1)) * numpy.dot(H1bar.T, H2bar)
26 |     SigmaHat11 = (1.0 / (m - 1)) * numpy.dot(H1bar.T, H1bar) + r1 * numpy.identity(o)
27 |     SigmaHat22 = (1.0 / (m - 1)) * numpy.dot(H2bar.T, H2bar) + r2 * numpy.identity(o)
28 | 
29 |     [D1, V1] = numpy.linalg.eigh(SigmaHat11)
30 |     [D2, V2] = numpy.linalg.eigh(SigmaHat22)
31 |     SigmaHat11RootInv = numpy.dot(numpy.dot(V1, numpy.diag(D1 ** -0.5)), V1.T)
32 |     SigmaHat22RootInv = numpy.dot(numpy.dot(V2, numpy.diag(D2 ** -0.5)), V2.T)
33 | 
34 |     Tval = numpy.dot(numpy.dot(SigmaHat11RootInv, SigmaHat12), SigmaHat22RootInv)
35 | 
36 |     [U, D, V] = numpy.linalg.svd(Tval)
37 |     V = V.T
38 |     A = numpy.dot(SigmaHat11RootInv, U[:, 0:outdim_size])
39 |     B = numpy.dot(SigmaHat22RootInv, V[:, 0:outdim_size])
40 |     D = D[0:outdim_size]
41 | 
42 |     return A, B, mean1, mean2
43 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from model import *
  3 | from load import read_mnist
  4 | from linear_cca import linear_cca
  5 | from sklearn import svm
  6 | from sklearn.metrics import accuracy_score
  7 | 
  8 | 
  9 | n_epochs = 100
 10 | learning_rate = 0.01
 11 | momentum=0.99
 12 | batch_size = 800
 13 | outdim_size = 10
 14 | input_size1 = 784
 15 | input_size2 = 784
 16 | layer_sizes1 = [1024, 1024, 1024, outdim_size]
 17 | layer_sizes2 = [1024, 1024, 1024, outdim_size]
 18 | reg_par = 1e-4
 19 | use_all_singular_values = True
 20 | 
 21 | 
 22 | trainData, tuneData, testData = read_mnist()
 23 | 
 24 | dcca_model = DeepCCA(layer_sizes1, layer_sizes2,
 25 |                       input_size1, input_size2,
 26 |                       outdim_size,
 27 |                       reg_par, use_all_singular_values)
 28 | 
 29 | 
 30 | input_view1 = dcca_model.input_view1
 31 | input_view2 = dcca_model.input_view2
 32 | hidden_view1 = dcca_model.output_view1
 33 | hidden_view2 = dcca_model.output_view2
 34 | neg_corr = dcca_model.neg_corr
 35 | 
 36 | 
 37 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
 38 | sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
 39 | 
 40 | train_op = tf.train.MomentumOptimizer(learning_rate, momentum).minimize(neg_corr,
 41 |                                                                         var_list=tf.trainable_variables())
 42 | 
 43 | tf.global_variables_initializer().run()
 44 | 
 45 | 
 46 | iterations = 0
 47 | for epoch in range(n_epochs):
 48 |     index = np.arange(trainData.num_examples)
 49 |     np.random.shuffle(index)
 50 |     trX1 = trainData.images1[index]
 51 |     trX2= trainData.images2[index]
 52 | 
 53 |     for start, end in zip(range(0, trainData.num_examples, batch_size),
 54 |             range(batch_size, trainData.num_examples, batch_size)):
 55 |         Xs1 = trX1[start:end]
 56 |         Xs2 = trX2[start:end]
 57 | 
 58 |         _, neg_corr_val = sess.run(
 59 |                                   [train_op, neg_corr],
 60 |                                    feed_dict={
 61 |                                        input_view1:Xs1,
 62 |                                        input_view2:Xs2
 63 |                                    })
 64 | 
 65 | 
 66 |         if iterations % 100 == 0:
 67 |             print("iteration:", iterations)
 68 |             print("neg_loss_for_train:", neg_corr_val)
 69 |             tune_neg_corr_val = sess.run(neg_corr,
 70 |                 feed_dict={
 71 |                     input_view1: tuneData.images1,
 72 |                     input_view2: tuneData.images2
 73 |                 })
 74 |             print("neg_loss_for_tune:", tune_neg_corr_val)
 75 | 
 76 |         iterations += 1
 77 | 
 78 | 
 79 | ################# Linear CCA #############################
 80 | 
 81 | X1proj, X2proj = sess.run(
 82 |                         [hidden_view1, hidden_view2],
 83 |                         feed_dict={
 84 |                             input_view1: trainData.images1,
 85 |                             input_view2: trainData.images2
 86 |                         })
 87 | XV1proj, XV2proj = sess.run(
 88 |                         [hidden_view1, hidden_view2],
 89 |                         feed_dict={
 90 |                             input_view1: tuneData.images1,
 91 |                             input_view2: tuneData.images2
 92 |                         })
 93 | XTe1proj, XTe2proj = sess.run(
 94 |                         [hidden_view1, hidden_view2],
 95 |                         feed_dict={
 96 |                             input_view1: testData.images1,
 97 |                             input_view2: testData.images2
 98 |                         })
 99 | print("Linear CCA started!")
100 | w = [None, None]
101 | m = [None, None]
102 | w[0], w[1], m[0], m[1] = linear_cca(X1proj, X2proj, 10)
103 | print("Linear CCA ended!")
104 | X1proj -= m[0].reshape([1, -1]).repeat(len(X1proj), axis=0)
105 | X1proj = np.dot(X1proj, w[0])
106 | 
107 | XV1proj -= m[0].reshape([1, -1]).repeat(len(XV1proj), axis=0)
108 | XV1proj = np.dot(XV1proj, w[0])
109 | 
110 | XTe1proj -= m[0].reshape([1, -1]).repeat(len(XTe1proj), axis=0)
111 | XTe1proj = np.dot(XTe1proj, w[0])
112 | 
113 | trainLable = trainData.labels.astype('float')
114 | tuneLable = tuneData.labels.astype('float')
115 | testLable = testData.labels.astype('float')
116 | 
117 | 
118 | ################# SVM classify #############################
119 | 
120 | print('training SVM...')
121 | clf = svm.LinearSVC(C=0.01, dual=False)
122 | clf.fit(X1proj, trainLable.ravel())
123 | 
124 | p = clf.predict(XTe1proj)
125 | test_acc = accuracy_score(testLable, p)
126 | p = clf.predict(XV1proj)
127 | valid_acc = accuracy_score(tuneLable, p)
128 | print('DCCA: tune acc={}, test acc={}'.format(valid_acc, test_acc))
129 | 
130 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from keras.layers import Dense, Merge
  3 | from keras.models import Sequential
  4 | from keras.optimizers import RMSprop, SGD
  5 | from keras.regularizers import l2
  6 | from keras import backend as K
  7 | import tensorflow as tf
  8 | 
  9 | def my_init_sigmoid(shape, dtype=None):
 10 |     rnd = K.random_uniform(
 11 |         shape, 0., 1., dtype)
 12 |     from keras.initializers import _compute_fans
 13 |     fan_in, fan_out = _compute_fans(shape)
 14 |     return 8. * (rnd - 0.5) * math.sqrt(6) / math.sqrt(fan_in + fan_out)
 15 | 
 16 | def my_init_others(shape, dtype=None):
 17 |     rnd = K.random_uniform(
 18 |         shape, 0., 1., dtype)
 19 |     from keras.initializers import _compute_fans
 20 |     fan_in, fan_out = _compute_fans(shape)
 21 |     return 2. * (rnd - 0.5) / math.sqrt(fan_in)
 22 | 
 23 | 
 24 | class DeepCCA():
 25 |     def __init__(self, layer_sizes1,
 26 |                  layer_sizes2, input_size1,
 27 |                  input_size2, outdim_size, reg_par, use_all_singular_values):
 28 | 
 29 |         self.layer_sizes1 = layer_sizes1  # [1024, 1024, 1024, outdim_size]
 30 |         self.layer_sizes2 = layer_sizes2
 31 |         self.input_size1 = input_size1
 32 |         self.input_size2 = input_size2
 33 |         self.outdim_size = outdim_size
 34 | 
 35 |         self.input_view1 = tf.placeholder(tf.float32, [None, input_size1])
 36 |         self.input_view2 = tf.placeholder(tf.float32, [None, input_size2])
 37 | 
 38 |         self.output_view1 = self.build_mlp_net(self.input_view1, layer_sizes1, reg_par)
 39 |         self.output_view2 = self.build_mlp_net(self.input_view2, layer_sizes2, reg_par)
 40 | 
 41 |         self.neg_corr = self.neg_correlation(self.output_view1, self.output_view2, use_all_singular_values)
 42 | 
 43 |     def build_mlp_net(self, input, layer_sizes, reg_par):
 44 |         output = input
 45 |         for l_id, ls in enumerate(layer_sizes):
 46 |             if l_id == len(layer_sizes) - 1:
 47 |                 activation = None
 48 |                 kernel_initializer = my_init_others
 49 |             else:
 50 |                 activation = tf.nn.sigmoid
 51 |                 kernel_initializer = my_init_sigmoid
 52 | 
 53 |             output = Dense(ls, activation=activation,
 54 |                       kernel_initializer=kernel_initializer,
 55 |                       kernel_regularizer=l2(reg_par))(output)
 56 | 
 57 |         return output
 58 | 
 59 |     def neg_correlation(self, output1, output2, use_all_singular_values):
 60 |         r1 = 1e-4
 61 |         r2 = 1e-4
 62 |         eps = 1e-12
 63 | 
 64 |         # unpack (separate) the output of networks for view 1 and view 2
 65 |         H1 = tf.transpose(output1)
 66 |         H2 = tf.transpose(output2)
 67 | 
 68 |         m = tf.shape(H1)[1]
 69 | 
 70 |         H1bar = H1 - (1.0 / tf.cast(m, tf.float32)) * tf.matmul(H1, tf.ones([m, m]))
 71 |         H2bar = H2 - (1.0 / tf.cast(m, tf.float32)) * tf.matmul(H2, tf.ones([m, m]))
 72 | 
 73 |         SigmaHat12 = (1.0 / (tf.cast(m, tf.float32) - 1)) * tf.matmul(H1bar, tf.transpose(H2bar))
 74 |         SigmaHat11 = (1.0 / (tf.cast(m, tf.float32) - 1)) * tf.matmul(H1bar, tf.transpose(H1bar)) + r1 * tf.eye(self.outdim_size)
 75 |         SigmaHat22 = (1.0 / (tf.cast(m, tf.float32) - 1)) * tf.matmul(H2bar, tf.transpose(H2bar)) + r2 * tf.eye(self.outdim_size)
 76 | 
 77 |         # Calculating the root inverse of covariance matrices by using eigen decomposition
 78 |         [D1, V1] = tf.linalg.eigh(SigmaHat11)
 79 |         [D2, V2] = tf.linalg.eigh(SigmaHat22)
 80 | 
 81 |         # Added to increase stability
 82 |         posInd1 = tf.where(tf.greater(D1, eps))
 83 |         posInd1 = tf.reshape(posInd1, [-1, tf.shape(posInd1)[0]])[0]
 84 |         D1 = tf.gather(D1, posInd1)
 85 |         V1 = tf.gather(V1, posInd1)
 86 | 
 87 |         posInd2 = tf.where(tf.greater(D2, eps))
 88 |         posInd2 = tf.reshape(posInd2, [-1, tf.shape(posInd2)[0]])[0]
 89 |         D2 = tf.gather(D2, posInd2)
 90 |         V2 = tf.gather(V2, posInd2)
 91 | 
 92 |         SigmaHat11RootInv = tf.matmul(tf.matmul(V1, tf.linalg.diag(D1 ** -0.5)), tf.transpose(V1))
 93 |         SigmaHat22RootInv = tf.matmul(tf.matmul(V2, tf.linalg.diag(D2 ** -0.5)), tf.transpose(V2))
 94 | 
 95 |         Tval = tf.matmul(tf.matmul(SigmaHat11RootInv, SigmaHat12), SigmaHat22RootInv)
 96 | 
 97 |         if use_all_singular_values:
 98 |             # all singular values are used to calculate the correlation
 99 |             # corr = tf.sqrt(tf.linalg.trace(tf.matmul(tf.transpose(Tval), Tval)))  ### The usage of "sqrt" here is wrong!!!
100 |             Tval.set_shape([self.outdim_size, self.outdim_size])
101 |             s = tf.svd(Tval, compute_uv=False)
102 |             corr = tf.reduce_sum(s)
103 |         else:
104 |             # just the top outdim_size singular values are used
105 |             [U, V] = tf.linalg.eigh(tf.matmul(tf.transpose(Tval), Tval))
106 |             non_critical_indexes = tf.where(tf.greater(U, eps))
107 |             non_critical_indexes = tf.reshape(non_critical_indexes, [-1, tf.shape(non_critical_indexes)[0]])[0]
108 |             U = tf.gather(U, non_critical_indexes)
109 |             U = tf.gather(U, tf.nn.top_k(U[:, ]).indices)
110 |             corr = tf.reduce_sum(tf.sqrt(U[0:self.outdim_size]))
111 |         return -corr
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/load.py:
--------------------------------------------------------------------------------
  1 | import scipy.io as sio
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | 
  5 | class DataSet(object):
  6 |     
  7 |     def __init__(self, images1, images2, labels, fake_data=False, one_hot=False,
  8 |                  dtype=tf.float32):
  9 |         """Construct a DataSet.
 10 |         one_hot arg is used only if fake_data is true.  `dtype` can be either
 11 |         `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into
 12 |         `[0, 1]`.
 13 |         """
 14 |         dtype = tf.as_dtype(dtype).base_dtype
 15 |         if dtype not in (tf.uint8, tf.float32):
 16 |             raise TypeError('Invalid image dtype %r, expected uint8 or float32' % dtype)
 17 |         
 18 |         if fake_data:
 19 |             self._num_examples = 10000
 20 |             self.one_hot = one_hot
 21 |         else:
 22 |             assert images1.shape[0] == labels.shape[0], (
 23 |                 'images1.shape: %s labels.shape: %s' % (images1.shape,
 24 |                                                         labels.shape))
 25 |             assert images2.shape[0] == labels.shape[0], (
 26 |                 'images2.shape: %s labels.shape: %s' % (images2.shape,
 27 |                                                         labels.shape))
 28 |             self._num_examples = images1.shape[0]
 29 |             # Convert shape from [num examples, rows, columns, depth]
 30 |             # to [num examples, rows*columns] (assuming depth == 1)
 31 |             #assert images.shape[3] == 1
 32 |             #images = images.reshape(images.shape[0],
 33 |             #                        images.shape[1] * images.shape[2])
 34 |             if dtype == tf.float32 and images1.dtype != np.float32:
 35 |                 # Convert from [0, 255] -> [0.0, 1.0].
 36 |                 print("type conversion view 1")
 37 |                 images1 = images1.astype(np.float32)
 38 |             
 39 |             if dtype == tf.float32 and images2.dtype != np.float32:
 40 |                 print("type conversion view 2")
 41 |                 images2 = images2.astype(np.float32)
 42 | 
 43 |         self._images1 = images1
 44 |         self._images2 = images2
 45 |         self._labels = labels
 46 |         self._epochs_completed = 0
 47 |         self._index_in_epoch = 0
 48 |     
 49 |     @property
 50 |     def images1(self):
 51 |         return self._images1
 52 |     
 53 |     @property
 54 |     def images2(self):
 55 |         return self._images2
 56 |     
 57 |     @property
 58 |     def labels(self):
 59 |         return self._labels
 60 |     
 61 |     @property
 62 |     def num_examples(self):
 63 |         return self._num_examples
 64 |     
 65 |     @property
 66 |     def epochs_completed(self):
 67 |         return self._epochs_completed
 68 |     
 69 |     def next_batch(self, batch_size, fake_data=False):
 70 |         """Return the next `batch_size` examples from this data set."""
 71 |         if fake_data:
 72 |             fake_image = [1] * 784
 73 |             if self.one_hot:
 74 |                 fake_label = [1] + [0] * 9
 75 |             else:
 76 |                 fake_label = 0
 77 |             return [fake_image for _ in xrange(batch_size)], [fake_image for _ in xrange(batch_size)], [fake_label for _ in xrange(batch_size)]
 78 |         
 79 |         start = self._index_in_epoch
 80 |         self._index_in_epoch += batch_size
 81 |         if self._index_in_epoch > self._num_examples:
 82 |             # Finished epoch
 83 |             self._epochs_completed += 1
 84 |             # Shuffle the data
 85 |             perm = np.arange(self._num_examples)
 86 |             np.random.shuffle(perm)
 87 |             self._images1 = self._images1[perm]
 88 |             self._images2 = self._images2[perm]
 89 |             self._labels = self._labels[perm]
 90 |             # Start next epoch
 91 |             start = 0
 92 |             self._index_in_epoch = batch_size
 93 |             assert batch_size <= self._num_examples
 94 |         
 95 |         end = self._index_in_epoch
 96 |         return self._images1[start:end], self._images2[start:end], self._labels[start:end]
 97 | 
 98 | def read_mnist():
 99 | 
100 |     data=sio.loadmat('MNIST.mat')
101 |     
102 |     train=DataSet(data['X1'],data['X2'],data['trainLabel'])
103 |     
104 |     tune=DataSet(data['XV1'],data['XV2'],data['tuneLabel'])
105 |     
106 |     test=DataSet(data['XTe1'],data['XTe2'],data['testLabel'])
107 |     
108 |     return train, tune, test
109 | 
110 | 
111 | def read_xrmb():
112 | 
113 |     data=sio.loadmat('/share/data/speech-multiview/wwang5/cca/XRMBf2KALDI_window7_single.mat')
114 |     
115 |     train=DataSet(data['X1'],data['X2'],data['trainLabel'])
116 |     
117 |     tune=DataSet(data['XV1'],data['XV2'],data['tuneLabel'])
118 |     
119 |     test=DataSet(data['XTe1'],data['XTe2'],data['testLabel'])
120 |     
121 |     return train, tune, test
122 | 
123 |     
124 | def read_flicker():
125 | 
126 |     data=sio.loadmat('/share/data/speech-multiview/wwang5/cca/VCCA/flicker/flicker_tensorflow_split1.mat')
127 |     X1=data['X1']
128 |     X2=data['X2']
129 |     XV1=data['XV1']
130 |     XV2=data['XV2']
131 |     XTe1=data['XTe1']
132 |     XTe2=data['XTe2']
133 |     
134 |     for i in range(2,11):
135 |         
136 |         data=sio.loadmat('/share/data/speech-multiview/wwang5/cca/VCCA/flicker/flicker_tensorflow_split' + str(i) + '.mat')
137 |         
138 |         X1=np.concatenate([X1, data['X1']])
139 |         X2=np.concatenate([X2, data['X2']])
140 |         XV1=np.concatenate([XV1, data['XV1']])
141 |         XV2=np.concatenate([XV2, data['XV2']])
142 |         XTe1=np.concatenate([XTe1, data['XTe1']])
143 |         XTe2=np.concatenate([XTe2, data['XTe2']])
144 |     
145 |     train=DataSet(X1, X2, np.zeros(len(X1)))
146 |     
147 |     tune=DataSet(XV1, XV2, np.zeros(len(XV1)))
148 |     
149 |     test=DataSet(XTe1, XTe2, np.zeros(len(XTe1)))
150 |     
151 |     return train, tune, test
152 | 
153 |     
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------