├── README.md ├── linear_cca.py ├── train.py ├── model.py └── load.py /README.md: -------------------------------------------------------------------------------- 1 | # DeepCCA 2 | Deep Canonical Correlation Analysis implemented with tensorflow 3 | -------------------------------------------------------------------------------- /linear_cca.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | 4 | def linear_cca(H1, H2, outdim_size): 5 | """ 6 | An implementation of linear CCA 7 | # Arguments: 8 | H1 and H2: the matrices containing the data for view 1 and view 2. Each row is a sample. 9 | outdim_size: specifies the number of new features 10 | # Returns 11 | A and B: the linear transformation matrices 12 | mean1 and mean2: the means of data for both views 13 | """ 14 | r1 = 1e-4 15 | r2 = 1e-4 16 | 17 | m = H1.shape[0] 18 | o = H1.shape[1] 19 | 20 | mean1 = numpy.mean(H1, axis=0) 21 | mean2 = numpy.mean(H2, axis=0) 22 | H1bar = H1 - numpy.tile(mean1, (m, 1)) 23 | H2bar = H2 - numpy.tile(mean2, (m, 1)) 24 | 25 | SigmaHat12 = (1.0 / (m - 1)) * numpy.dot(H1bar.T, H2bar) 26 | SigmaHat11 = (1.0 / (m - 1)) * numpy.dot(H1bar.T, H1bar) + r1 * numpy.identity(o) 27 | SigmaHat22 = (1.0 / (m - 1)) * numpy.dot(H2bar.T, H2bar) + r2 * numpy.identity(o) 28 | 29 | [D1, V1] = numpy.linalg.eigh(SigmaHat11) 30 | [D2, V2] = numpy.linalg.eigh(SigmaHat22) 31 | SigmaHat11RootInv = numpy.dot(numpy.dot(V1, numpy.diag(D1 ** -0.5)), V1.T) 32 | SigmaHat22RootInv = numpy.dot(numpy.dot(V2, numpy.diag(D2 ** -0.5)), V2.T) 33 | 34 | Tval = numpy.dot(numpy.dot(SigmaHat11RootInv, SigmaHat12), SigmaHat22RootInv) 35 | 36 | [U, D, V] = numpy.linalg.svd(Tval) 37 | V = V.T 38 | A = numpy.dot(SigmaHat11RootInv, U[:, 0:outdim_size]) 39 | B = numpy.dot(SigmaHat22RootInv, V[:, 0:outdim_size]) 40 | D = D[0:outdim_size] 41 | 42 | return A, B, mean1, mean2 43 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from model import * 3 | from load import read_mnist 4 | from linear_cca import linear_cca 5 | from sklearn import svm 6 | from sklearn.metrics import accuracy_score 7 | 8 | 9 | n_epochs = 100 10 | learning_rate = 0.01 11 | momentum=0.99 12 | batch_size = 800 13 | outdim_size = 10 14 | input_size1 = 784 15 | input_size2 = 784 16 | layer_sizes1 = [1024, 1024, 1024, outdim_size] 17 | layer_sizes2 = [1024, 1024, 1024, outdim_size] 18 | reg_par = 1e-4 19 | use_all_singular_values = True 20 | 21 | 22 | trainData, tuneData, testData = read_mnist() 23 | 24 | dcca_model = DeepCCA(layer_sizes1, layer_sizes2, 25 | input_size1, input_size2, 26 | outdim_size, 27 | reg_par, use_all_singular_values) 28 | 29 | 30 | input_view1 = dcca_model.input_view1 31 | input_view2 = dcca_model.input_view2 32 | hidden_view1 = dcca_model.output_view1 33 | hidden_view2 = dcca_model.output_view2 34 | neg_corr = dcca_model.neg_corr 35 | 36 | 37 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) 38 | sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options)) 39 | 40 | train_op = tf.train.MomentumOptimizer(learning_rate, momentum).minimize(neg_corr, 41 | var_list=tf.trainable_variables()) 42 | 43 | tf.global_variables_initializer().run() 44 | 45 | 46 | iterations = 0 47 | for epoch in range(n_epochs): 48 | index = np.arange(trainData.num_examples) 49 | np.random.shuffle(index) 50 | trX1 = trainData.images1[index] 51 | trX2= trainData.images2[index] 52 | 53 | for start, end in zip(range(0, trainData.num_examples, batch_size), 54 | range(batch_size, trainData.num_examples, batch_size)): 55 | Xs1 = trX1[start:end] 56 | Xs2 = trX2[start:end] 57 | 58 | _, neg_corr_val = sess.run( 59 | [train_op, neg_corr], 60 | feed_dict={ 61 | input_view1:Xs1, 62 | input_view2:Xs2 63 | }) 64 | 65 | 66 | if iterations % 100 == 0: 67 | print("iteration:", iterations) 68 | print("neg_loss_for_train:", neg_corr_val) 69 | tune_neg_corr_val = sess.run(neg_corr, 70 | feed_dict={ 71 | input_view1: tuneData.images1, 72 | input_view2: tuneData.images2 73 | }) 74 | print("neg_loss_for_tune:", tune_neg_corr_val) 75 | 76 | iterations += 1 77 | 78 | 79 | ################# Linear CCA ############################# 80 | 81 | X1proj, X2proj = sess.run( 82 | [hidden_view1, hidden_view2], 83 | feed_dict={ 84 | input_view1: trainData.images1, 85 | input_view2: trainData.images2 86 | }) 87 | XV1proj, XV2proj = sess.run( 88 | [hidden_view1, hidden_view2], 89 | feed_dict={ 90 | input_view1: tuneData.images1, 91 | input_view2: tuneData.images2 92 | }) 93 | XTe1proj, XTe2proj = sess.run( 94 | [hidden_view1, hidden_view2], 95 | feed_dict={ 96 | input_view1: testData.images1, 97 | input_view2: testData.images2 98 | }) 99 | print("Linear CCA started!") 100 | w = [None, None] 101 | m = [None, None] 102 | w[0], w[1], m[0], m[1] = linear_cca(X1proj, X2proj, 10) 103 | print("Linear CCA ended!") 104 | X1proj -= m[0].reshape([1, -1]).repeat(len(X1proj), axis=0) 105 | X1proj = np.dot(X1proj, w[0]) 106 | 107 | XV1proj -= m[0].reshape([1, -1]).repeat(len(XV1proj), axis=0) 108 | XV1proj = np.dot(XV1proj, w[0]) 109 | 110 | XTe1proj -= m[0].reshape([1, -1]).repeat(len(XTe1proj), axis=0) 111 | XTe1proj = np.dot(XTe1proj, w[0]) 112 | 113 | trainLable = trainData.labels.astype('float') 114 | tuneLable = tuneData.labels.astype('float') 115 | testLable = testData.labels.astype('float') 116 | 117 | 118 | ################# SVM classify ############################# 119 | 120 | print('training SVM...') 121 | clf = svm.LinearSVC(C=0.01, dual=False) 122 | clf.fit(X1proj, trainLable.ravel()) 123 | 124 | p = clf.predict(XTe1proj) 125 | test_acc = accuracy_score(testLable, p) 126 | p = clf.predict(XV1proj) 127 | valid_acc = accuracy_score(tuneLable, p) 128 | print('DCCA: tune acc={}, test acc={}'.format(valid_acc, test_acc)) 129 | 130 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import math 2 | from keras.layers import Dense, Merge 3 | from keras.models import Sequential 4 | from keras.optimizers import RMSprop, SGD 5 | from keras.regularizers import l2 6 | from keras import backend as K 7 | import tensorflow as tf 8 | 9 | def my_init_sigmoid(shape, dtype=None): 10 | rnd = K.random_uniform( 11 | shape, 0., 1., dtype) 12 | from keras.initializers import _compute_fans 13 | fan_in, fan_out = _compute_fans(shape) 14 | return 8. * (rnd - 0.5) * math.sqrt(6) / math.sqrt(fan_in + fan_out) 15 | 16 | def my_init_others(shape, dtype=None): 17 | rnd = K.random_uniform( 18 | shape, 0., 1., dtype) 19 | from keras.initializers import _compute_fans 20 | fan_in, fan_out = _compute_fans(shape) 21 | return 2. * (rnd - 0.5) / math.sqrt(fan_in) 22 | 23 | 24 | class DeepCCA(): 25 | def __init__(self, layer_sizes1, 26 | layer_sizes2, input_size1, 27 | input_size2, outdim_size, reg_par, use_all_singular_values): 28 | 29 | self.layer_sizes1 = layer_sizes1 # [1024, 1024, 1024, outdim_size] 30 | self.layer_sizes2 = layer_sizes2 31 | self.input_size1 = input_size1 32 | self.input_size2 = input_size2 33 | self.outdim_size = outdim_size 34 | 35 | self.input_view1 = tf.placeholder(tf.float32, [None, input_size1]) 36 | self.input_view2 = tf.placeholder(tf.float32, [None, input_size2]) 37 | 38 | self.output_view1 = self.build_mlp_net(self.input_view1, layer_sizes1, reg_par) 39 | self.output_view2 = self.build_mlp_net(self.input_view2, layer_sizes2, reg_par) 40 | 41 | self.neg_corr = self.neg_correlation(self.output_view1, self.output_view2, use_all_singular_values) 42 | 43 | def build_mlp_net(self, input, layer_sizes, reg_par): 44 | output = input 45 | for l_id, ls in enumerate(layer_sizes): 46 | if l_id == len(layer_sizes) - 1: 47 | activation = None 48 | kernel_initializer = my_init_others 49 | else: 50 | activation = tf.nn.sigmoid 51 | kernel_initializer = my_init_sigmoid 52 | 53 | output = Dense(ls, activation=activation, 54 | kernel_initializer=kernel_initializer, 55 | kernel_regularizer=l2(reg_par))(output) 56 | 57 | return output 58 | 59 | def neg_correlation(self, output1, output2, use_all_singular_values): 60 | r1 = 1e-4 61 | r2 = 1e-4 62 | eps = 1e-12 63 | 64 | # unpack (separate) the output of networks for view 1 and view 2 65 | H1 = tf.transpose(output1) 66 | H2 = tf.transpose(output2) 67 | 68 | m = tf.shape(H1)[1] 69 | 70 | H1bar = H1 - (1.0 / tf.cast(m, tf.float32)) * tf.matmul(H1, tf.ones([m, m])) 71 | H2bar = H2 - (1.0 / tf.cast(m, tf.float32)) * tf.matmul(H2, tf.ones([m, m])) 72 | 73 | SigmaHat12 = (1.0 / (tf.cast(m, tf.float32) - 1)) * tf.matmul(H1bar, tf.transpose(H2bar)) 74 | SigmaHat11 = (1.0 / (tf.cast(m, tf.float32) - 1)) * tf.matmul(H1bar, tf.transpose(H1bar)) + r1 * tf.eye(self.outdim_size) 75 | SigmaHat22 = (1.0 / (tf.cast(m, tf.float32) - 1)) * tf.matmul(H2bar, tf.transpose(H2bar)) + r2 * tf.eye(self.outdim_size) 76 | 77 | # Calculating the root inverse of covariance matrices by using eigen decomposition 78 | [D1, V1] = tf.linalg.eigh(SigmaHat11) 79 | [D2, V2] = tf.linalg.eigh(SigmaHat22) 80 | 81 | # Added to increase stability 82 | posInd1 = tf.where(tf.greater(D1, eps)) 83 | posInd1 = tf.reshape(posInd1, [-1, tf.shape(posInd1)[0]])[0] 84 | D1 = tf.gather(D1, posInd1) 85 | V1 = tf.gather(V1, posInd1) 86 | 87 | posInd2 = tf.where(tf.greater(D2, eps)) 88 | posInd2 = tf.reshape(posInd2, [-1, tf.shape(posInd2)[0]])[0] 89 | D2 = tf.gather(D2, posInd2) 90 | V2 = tf.gather(V2, posInd2) 91 | 92 | SigmaHat11RootInv = tf.matmul(tf.matmul(V1, tf.linalg.diag(D1 ** -0.5)), tf.transpose(V1)) 93 | SigmaHat22RootInv = tf.matmul(tf.matmul(V2, tf.linalg.diag(D2 ** -0.5)), tf.transpose(V2)) 94 | 95 | Tval = tf.matmul(tf.matmul(SigmaHat11RootInv, SigmaHat12), SigmaHat22RootInv) 96 | 97 | if use_all_singular_values: 98 | # all singular values are used to calculate the correlation 99 | # corr = tf.sqrt(tf.linalg.trace(tf.matmul(tf.transpose(Tval), Tval))) ### The usage of "sqrt" here is wrong!!! 100 | Tval.set_shape([self.outdim_size, self.outdim_size]) 101 | s = tf.svd(Tval, compute_uv=False) 102 | corr = tf.reduce_sum(s) 103 | else: 104 | # just the top outdim_size singular values are used 105 | [U, V] = tf.linalg.eigh(tf.matmul(tf.transpose(Tval), Tval)) 106 | non_critical_indexes = tf.where(tf.greater(U, eps)) 107 | non_critical_indexes = tf.reshape(non_critical_indexes, [-1, tf.shape(non_critical_indexes)[0]])[0] 108 | U = tf.gather(U, non_critical_indexes) 109 | U = tf.gather(U, tf.nn.top_k(U[:, ]).indices) 110 | corr = tf.reduce_sum(tf.sqrt(U[0:self.outdim_size])) 111 | return -corr 112 | 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /load.py: -------------------------------------------------------------------------------- 1 | import scipy.io as sio 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | class DataSet(object): 6 | 7 | def __init__(self, images1, images2, labels, fake_data=False, one_hot=False, 8 | dtype=tf.float32): 9 | """Construct a DataSet. 10 | one_hot arg is used only if fake_data is true. `dtype` can be either 11 | `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into 12 | `[0, 1]`. 13 | """ 14 | dtype = tf.as_dtype(dtype).base_dtype 15 | if dtype not in (tf.uint8, tf.float32): 16 | raise TypeError('Invalid image dtype %r, expected uint8 or float32' % dtype) 17 | 18 | if fake_data: 19 | self._num_examples = 10000 20 | self.one_hot = one_hot 21 | else: 22 | assert images1.shape[0] == labels.shape[0], ( 23 | 'images1.shape: %s labels.shape: %s' % (images1.shape, 24 | labels.shape)) 25 | assert images2.shape[0] == labels.shape[0], ( 26 | 'images2.shape: %s labels.shape: %s' % (images2.shape, 27 | labels.shape)) 28 | self._num_examples = images1.shape[0] 29 | # Convert shape from [num examples, rows, columns, depth] 30 | # to [num examples, rows*columns] (assuming depth == 1) 31 | #assert images.shape[3] == 1 32 | #images = images.reshape(images.shape[0], 33 | # images.shape[1] * images.shape[2]) 34 | if dtype == tf.float32 and images1.dtype != np.float32: 35 | # Convert from [0, 255] -> [0.0, 1.0]. 36 | print("type conversion view 1") 37 | images1 = images1.astype(np.float32) 38 | 39 | if dtype == tf.float32 and images2.dtype != np.float32: 40 | print("type conversion view 2") 41 | images2 = images2.astype(np.float32) 42 | 43 | self._images1 = images1 44 | self._images2 = images2 45 | self._labels = labels 46 | self._epochs_completed = 0 47 | self._index_in_epoch = 0 48 | 49 | @property 50 | def images1(self): 51 | return self._images1 52 | 53 | @property 54 | def images2(self): 55 | return self._images2 56 | 57 | @property 58 | def labels(self): 59 | return self._labels 60 | 61 | @property 62 | def num_examples(self): 63 | return self._num_examples 64 | 65 | @property 66 | def epochs_completed(self): 67 | return self._epochs_completed 68 | 69 | def next_batch(self, batch_size, fake_data=False): 70 | """Return the next `batch_size` examples from this data set.""" 71 | if fake_data: 72 | fake_image = [1] * 784 73 | if self.one_hot: 74 | fake_label = [1] + [0] * 9 75 | else: 76 | fake_label = 0 77 | return [fake_image for _ in xrange(batch_size)], [fake_image for _ in xrange(batch_size)], [fake_label for _ in xrange(batch_size)] 78 | 79 | start = self._index_in_epoch 80 | self._index_in_epoch += batch_size 81 | if self._index_in_epoch > self._num_examples: 82 | # Finished epoch 83 | self._epochs_completed += 1 84 | # Shuffle the data 85 | perm = np.arange(self._num_examples) 86 | np.random.shuffle(perm) 87 | self._images1 = self._images1[perm] 88 | self._images2 = self._images2[perm] 89 | self._labels = self._labels[perm] 90 | # Start next epoch 91 | start = 0 92 | self._index_in_epoch = batch_size 93 | assert batch_size <= self._num_examples 94 | 95 | end = self._index_in_epoch 96 | return self._images1[start:end], self._images2[start:end], self._labels[start:end] 97 | 98 | def read_mnist(): 99 | 100 | data=sio.loadmat('MNIST.mat') 101 | 102 | train=DataSet(data['X1'],data['X2'],data['trainLabel']) 103 | 104 | tune=DataSet(data['XV1'],data['XV2'],data['tuneLabel']) 105 | 106 | test=DataSet(data['XTe1'],data['XTe2'],data['testLabel']) 107 | 108 | return train, tune, test 109 | 110 | 111 | def read_xrmb(): 112 | 113 | data=sio.loadmat('/share/data/speech-multiview/wwang5/cca/XRMBf2KALDI_window7_single.mat') 114 | 115 | train=DataSet(data['X1'],data['X2'],data['trainLabel']) 116 | 117 | tune=DataSet(data['XV1'],data['XV2'],data['tuneLabel']) 118 | 119 | test=DataSet(data['XTe1'],data['XTe2'],data['testLabel']) 120 | 121 | return train, tune, test 122 | 123 | 124 | def read_flicker(): 125 | 126 | data=sio.loadmat('/share/data/speech-multiview/wwang5/cca/VCCA/flicker/flicker_tensorflow_split1.mat') 127 | X1=data['X1'] 128 | X2=data['X2'] 129 | XV1=data['XV1'] 130 | XV2=data['XV2'] 131 | XTe1=data['XTe1'] 132 | XTe2=data['XTe2'] 133 | 134 | for i in range(2,11): 135 | 136 | data=sio.loadmat('/share/data/speech-multiview/wwang5/cca/VCCA/flicker/flicker_tensorflow_split' + str(i) + '.mat') 137 | 138 | X1=np.concatenate([X1, data['X1']]) 139 | X2=np.concatenate([X2, data['X2']]) 140 | XV1=np.concatenate([XV1, data['XV1']]) 141 | XV2=np.concatenate([XV2, data['XV2']]) 142 | XTe1=np.concatenate([XTe1, data['XTe1']]) 143 | XTe2=np.concatenate([XTe2, data['XTe2']]) 144 | 145 | train=DataSet(X1, X2, np.zeros(len(X1))) 146 | 147 | tune=DataSet(XV1, XV2, np.zeros(len(XV1))) 148 | 149 | test=DataSet(XTe1, XTe2, np.zeros(len(XTe1))) 150 | 151 | return train, tune, test 152 | 153 | 154 | 155 | 156 | --------------------------------------------------------------------------------