├── AM_205_Final_Report.pdf ├── vis_utils.py ├── pca_impute_demo.py ├── pca_impute.py ├── README.md ├── pca_all_impute.py ├── bpca_pymc.py ├── ppca_demo.py ├── ppca.py └── bpca.py /AM_205_Final_Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzaym/Probabilistic-and-Bayesian-PCA/HEAD/AM_205_Final_Report.pdf -------------------------------------------------------------------------------- /vis_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | def hinton(matrix=None, max_weight=None, ax=None): 5 | """Draw Hinton diagram for visualizing a weight matrix.""" 6 | ax = ax if ax is not None else plt.gca() 7 | 8 | if not max_weight: 9 | max_weight = 2 ** np.ceil(np.log(np.abs(matrix).max()) / np.log(2)) 10 | 11 | ax.patch.set_facecolor('gray') 12 | ax.set_aspect('equal', 'box') 13 | ax.xaxis.set_major_locator(plt.NullLocator()) 14 | ax.yaxis.set_major_locator(plt.NullLocator()) 15 | 16 | for (x, y), w in np.ndenumerate(matrix): 17 | color = 'white' if w > 0 else 'black' 18 | size = np.sqrt(np.abs(w) / max_weight) 19 | rect = plt.Rectangle([x - size / 2, y - size / 2], size, size, 20 | facecolor=color, edgecolor=color) 21 | ax.add_patch(rect) 22 | 23 | ax.autoscale_view() 24 | ax.invert_yaxis() 25 | plt.show() -------------------------------------------------------------------------------- /pca_impute_demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from numpy.random import multivariate_normal 4 | from pca_impute import PCAImputer 5 | 6 | if __name__ == '__main__': 7 | 8 | # original data 9 | cov = np.diag([10, 9, 8, 7] + [1]*28 + [6, 5, 4, 3] + [1]*28)**2 10 | data = multivariate_normal(np.zeros(64), cov, 256) 11 | 12 | # missing at random 13 | mask_missing = np.random.randint(2, size=data.shape) 14 | data_missing = data.copy() 15 | data_missing[np.where(mask_missing)] = np.nan 16 | 17 | # impute by PCA 18 | imputer = PCAImputer(n_dimension=8) 19 | data_imputed = imputer.fit_transform(data_missing, n_iteration=100) 20 | 21 | plt.matshow(data) 22 | plt.title('original data') 23 | plt.show() 24 | plt.matshow(data_missing) 25 | plt.title('missing at random') 26 | plt.show() 27 | plt.matshow(data_imputed) 28 | plt.title('imputed data') 29 | plt.show() 30 | 31 | print('reconstruction err: {}'.format(np.sqrt(np.sum(np.sum(np.square(data-data_imputed)))))) -------------------------------------------------------------------------------- /pca_impute.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ppca import PPCA 3 | 4 | class PCAImputer: 5 | 6 | def __init__(self, n_dimension): 7 | self._q = n_dimension 8 | 9 | def fit_transform(self, data, method='eig', probabilistic=False, n_iteration=100): 10 | """fitting a PCA to the original data by iterativly filling the missing entries 11 | with value generated from PCA. Each missing entries are initialized with the 12 | row mean.""" 13 | self._data = data.copy() 14 | self._missing = np.isnan(data) 15 | self._observed = ~self._missing 16 | self._pca = PPCA(n_dimension = self._q) 17 | 18 | row_defau = np.zeros(self._data.shape[0]) 19 | row_means = np.repeat(np.nanmean(self._data, axis=1, out=row_defau).reshape(-1, 1), \ 20 | self._data.shape[1], axis=1) 21 | self._data[self._missing] = row_means[self._missing] 22 | for i in range(n_iteration): 23 | self._pca.fit(self._data, method=method) 24 | self._data[self._missing] = self._pca.inverse_transform(self._pca.transform(self._data, \ 25 | probabilistic), probabilistic)[self._missing] 26 | return self._data 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Probabilistic and Bayesian PCA 2 | 3 | ### Overview 4 | This repository is for the final project of Harvard AM205 "Numerical Methods". We examined two generalized versions of conventional PCA from a statistical perspective: Probabilistic PCA (PPCA) and Bayesian PCA (BPCA). We compared their behaviors on synthetic data and real-world data with different distributions, and also explored the possible application for estimating missing data. 5 | 6 | ### Code Specification 7 | #### Python files: 8 | 18 | 19 | #### Jupyter notebooks (for test and demo): 20 | 21 | 28 | -------------------------------------------------------------------------------- /pca_all_impute.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ppca import PPCA 3 | from sklearn.decomposition import PCA 4 | from bpca import BPCA 5 | 6 | class PCAImputer: 7 | 8 | def __init__(self, method='pca', n_dimension=0): 9 | self._q = n_dimension 10 | self._method = method 11 | if method == 'pca': 12 | self._pca = PCA(n_components=self._q) 13 | elif method == 'ppca': 14 | self._pca = PPCA(n_dimension=self._q) 15 | else: 16 | self._pca = BPCA() 17 | 18 | def fit_transform(self, data, ppca_method='eig', probabilistic=False, n_iteration=100, \ 19 | verbose=False, print_every=50, trace_mse=False, cdata=None): 20 | self._data = data.copy() 21 | self._missing = np.isnan(data) 22 | self._observed = ~self._missing 23 | self._mse = np.zeros(n_iteration) 24 | 25 | row_defau = np.zeros(self._data.shape[0]) 26 | row_means = np.repeat(np.nanmean(self._data, axis=1, out=row_defau).reshape(-1, 1), \ 27 | self._data.shape[1], axis=1) 28 | self._data[self._missing] = row_means[self._missing] 29 | self._data = np.nan_to_num(self._data) 30 | 31 | for i in range(n_iteration): 32 | if self._method == 'ppca': 33 | self._pca.fit(self._data, method=ppca_method) 34 | self._data[self._missing] = self._pca.inverse_transform(self._pca.transform(self._data, \ 35 | probabilistic), probabilistic)[self._missing] 36 | else: 37 | self._pca.fit(self._data) 38 | self._data[self._missing] = self._pca.inverse_transform(self._pca.transform(self._data))[self._missing] 39 | self._mse[i] = np.sum((cdata-self._data)**2)/cdata.shape[0] 40 | if verbose and i % print_every == 0: 41 | print('Iter %d, MSE=%f' %(i, self._mse[i])) 42 | # if np.abs(self._mse[i-1]-self._mse[i]) < 1e-6: 43 | # break 44 | return self._data, self._mse -------------------------------------------------------------------------------- /bpca_pymc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import pymc3 as pm 4 | from theano.tensor.nlinalg import diag 5 | 6 | 7 | class BPCA(object): 8 | 9 | def __init__(self, X, a_alpha=1e-3, b_alpha=1e-3, a_tau=1e-3, b_tau=1e-3, beta=1e-3): 10 | # data, # of samples, dims 11 | self.X = X 12 | self.d = self.X.shape[1] 13 | self.N = self.X.shape[0] 14 | self.q = self.d-1 15 | 16 | # hyperparameters 17 | self.a_alpha = a_alpha 18 | self.b_alpha = b_alpha 19 | self.a_tau = a_tau 20 | self.b_tau = b_tau 21 | self.beta = beta 22 | 23 | with pm.Model() as model: 24 | z = pm.MvNormal('z', mu=np.zeros(self.q), cov=np.eye(self.q), shape=(self.N, self.q)) 25 | mu = pm.MvNormal('mu', mu=np.zeros(self.d), cov=np.eye(self.d)/self.beta, shape=self.d) 26 | alpha = pm.Gamma('alpha', alpha=self.a_alpha, beta=self.b_alpha, shape=self.q) 27 | w = pm.MatrixNormal('w', mu=np.zeros((self.d, self.q)), rowcov=np.eye(self.d), colcov=diag(1/alpha), shape=(self.d, self.q)) 28 | tau = pm.Gamma('tau', alpha=self.a_tau, beta=self.b_tau) 29 | x = pm.math.dot(z, w.T) + mu 30 | obs_x = pm.MatrixNormal('obs_x', mu=x, rowcov=np.eye(self.N), colcov=np.eye(self.d)/tau, shape=(self.N, self.d), observed=self.X) 31 | 32 | self.model = model 33 | 34 | 35 | def fit(self, iters=10000): 36 | with self.model: 37 | inference = pm.ADVI() 38 | approx = pm.fit(n=iters, method=inference) 39 | trace = approx.sample(iters//2) 40 | 41 | # save 42 | s = len(trace)//2 43 | self.trace = trace 44 | self.inference = inference 45 | self.z = trace[s::]['z'].mean(axis=0) 46 | self.mu = trace[s::]['mu'].mean(axis=0) 47 | self.alpha = trace[s::]['alpha'].mean(axis=0) 48 | self.w = trace[s::]['w'].mean(axis=0) 49 | 50 | 51 | def transform(self): 52 | x = pm.sample_ppc(self.trace, 5000, model=self.model) 53 | return x['obs_x'].mean(axis=0) 54 | 55 | 56 | def fit_transform(self, iters=10000): 57 | self.fit(iters) 58 | return self.transform() 59 | 60 | 61 | def get_weight_matrix(self): 62 | return self.w 63 | 64 | 65 | def get_inv_variance(self): 66 | return self.alpha 67 | 68 | 69 | def get_elbos(self): 70 | return -self.inference.hist -------------------------------------------------------------------------------- /ppca_demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from numpy.random import multivariate_normal 4 | from ppca import PPCA 5 | 6 | if __name__ == '__main__': 7 | 8 | cov = np.diag([10, 9, 8, 7] + [1]*28 + [6, 5, 4, 3] + [1]*28)**2 9 | data = multivariate_normal(np.zeros(64), cov, 256) 10 | 11 | ppca1 = PPCA(n_dimension=4) 12 | ppca1.fit(data, method='EM') 13 | ppca2 = PPCA(n_dimension=4) 14 | ppca2.fit(data, method='eig') 15 | 16 | print('\n\n\n\n**TEST FITTING THE COVARIANCE MATRIX**') 17 | plt.matshow(cov); 18 | print('\n\noriginal covariance matrix') 19 | plt.show() 20 | plt.matshow(ppca1._C); 21 | print('\n\nfitted covariance matrix (fitted by EM)') 22 | plt.show() 23 | plt.matshow(ppca2._C); 24 | print('\n\nfitted covariance matrix (fitted by eigen)') 25 | plt.show() 26 | 27 | print('\n\n\n\n**TEST GENERATING DATA**') 28 | plt.scatter(data[:, 0], data[:, 1], alpha=0.2); 29 | print('\n\noriginal data (first 2 dimensions)') 30 | plt.show() 31 | gene = ppca1.generate(256) 32 | plt.scatter(gene[:, 0], gene[:, 1], alpha=0.2); 33 | print('\n\ngenerated data (first 2 dimensions) (fitted by EM)') 34 | plt.show() 35 | gene = ppca2.generate(256) 36 | plt.scatter(gene[:, 0], gene[:, 1], alpha=0.2); 37 | print('\n\ngenerated data (first 2 dimensions) (fitted by eigen)') 38 | plt.show() 39 | 40 | print('\n\n\n\n**TEST CALCULATING LIKELIHOOD**') 41 | ppca1 = PPCA(n_dimension=2) 42 | loglikelihoods = ppca1.fit(data, method='EM', keep_loglikes=True) 43 | plt.plot(loglikelihoods) 44 | plt.show() 45 | 46 | print('\n\n\n\n**TEST DIMENSION REDUCTION AND RECOVERING**') 47 | plt.matshow(data) 48 | print('\n\noriginal data') 49 | plt.show() 50 | 51 | ppca3 = PPCA(n_dimension=2) 52 | ppca3.fit(data, method='EM') 53 | plt.matshow( ppca3.inverse_transform( ppca3.transform(data) ) ) 54 | print('\n\nrecovered data: 2-component') 55 | plt.show() 56 | 57 | ppca4 = PPCA(n_dimension=2) 58 | ppca4.fit(data, batchsize=16, n_iteration=2000, method='EM') 59 | plt.matshow( ppca4.inverse_transform( ppca4.transform(data) ) ) 60 | print('\n\nrecovered data: 2-component (mini-batch)') 61 | plt.show() 62 | 63 | ppca5 = PPCA(n_dimension=63) 64 | ppca5.fit(data, method='EM') 65 | plt.matshow( ppca5.inverse_transform( ppca5.transform(data) ) ) 66 | print('\n\nrecovered data: 63-component') 67 | plt.show() 68 | 69 | -------------------------------------------------------------------------------- /ppca.py: -------------------------------------------------------------------------------- 1 | """ 2 | Probablistic Principal Component Analysis using the EM algorithm from Tipping & Bishop 1997. 3 | (See http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.426.2749&rep=rep1&type=pdf) 4 | """ 5 | 6 | import numpy as np 7 | from numpy.random import randn 8 | from numpy.random import normal 9 | from numpy.random import multivariate_normal 10 | from numpy.linalg import det 11 | from numpy.linalg import eig 12 | from numpy.linalg import pinv 13 | from numpy.linalg import multi_dot 14 | from sklearn.exceptions import NotFittedError 15 | 16 | class PPCA(object): 17 | 18 | def __init__(self, n_dimension): 19 | 20 | # mapping from latent variables to observations: x = W z + mu + epsilon 21 | # where x: observation, x in R^(d) 22 | # q: dimension of latent space 23 | self._q = n_dimension 24 | 25 | def fit(self, data, batchsize=None, n_iteration=500, method='EM', 26 | keep_loglikes=False): 27 | 28 | if method not in ('EM', 'eig'): 29 | raise ValueError('unrecognized method.') 30 | if method == 'eig' and keep_loglikes: 31 | raise ValueError('loglike not supported for eig method') 32 | if method == 'eig' and batchsize is not None: 33 | raise ValueError('mini-batch not supported for eig method') 34 | 35 | ####################### INITIALIZE OBSERVATIONS ####################### 36 | # X: observations, X in R^(d*N), data assumed to be in R^(N*d) 37 | self._X = data.T 38 | # d: dimension of observations 39 | self._d = self._X.shape[0] 40 | # N: number of observations 41 | self._N = self._X.shape[1] 42 | # mu: mean of x, mu in R^(d) 43 | self._mu = np.mean(self._X, axis=1).reshape(-1, 1) 44 | 45 | ##################### INITIALIZE LATENT VARIABLES ##################### 46 | # W: linear transformation matrix, W in R^(d*q) 47 | self._W = randn(self._d, self._q) 48 | # epsilon: Gaussian noise, epsilon in R^(d), epsilon ~ N(0, sigma^2 I) 49 | self._sigma2 = 0 50 | # C: covariance matrix of observation, x ~ N(mu, C) 51 | self._update_C() 52 | 53 | loglikes = [] if keep_loglikes else None 54 | 55 | if method == 'EM': 56 | loglikes = self._fit_EM(batchsize, n_iteration, keep_loglikes) 57 | else: #method == 'eig' 58 | self._fit_eig(n_iteration) 59 | 60 | return loglikes 61 | 62 | def transform(self, data_observ, probabilistic=False): 63 | """transform the observations into the latent space, when probabilistic 64 | set to True, will draw a sample from the posterior distribution of the 65 | latent variable""" 66 | assert len(data_observ.shape) == 2 67 | invM = pinv(self._calc_M()) 68 | expect_data_latent = multi_dot([invM, self._W.T, 69 | data_observ.T - self._mu]) 70 | assert expect_data_latent.shape == (self._q, len(data_observ)) 71 | if probabilistic: 72 | cov = np.dot(self._sigma2, invM) 73 | data_latent = np.zeros(shape=(len(data_observ), self._q)) 74 | for i in range(len(data_observ)): 75 | data_latent[i] = multivariate_normal(expect_data_latent[:, i].flatten(), cov) 76 | return data_latent 77 | else: 78 | return expect_data_latent.T 79 | 80 | def inverse_transform(self, data_latent, probabilistic=False): 81 | """transform the latent variable into observations, when probabilistic 82 | set to True, will draw a sample from the distribution of the observations""" 83 | assert len(data_latent.shape) == 2 84 | expect_data_observ = np.dot(self._W, data_latent.T) + self._mu 85 | if probabilistic: 86 | return (expect_data_observ 87 | + normal(scale=np.sqrt(self._sigma2), size=expect_data_observ.shape)).T 88 | else: 89 | return expect_data_observ.T 90 | 91 | def generate(self, n_sample): 92 | """generate samples from the fitted model""" 93 | try: 94 | return multivariate_normal(self._mu.flatten(), self._C, n_sample) 95 | except: 96 | raise NotFittedError('This PPCA instance is not fitted yet. Call \'fit\' with appropriate arguments before using this method.') 97 | 98 | def calc_components(self): 99 | """generate an orthonormal basis from the fitted model""" 100 | vals, vecs = eig(np.dot(self._W.T, self._W)) 101 | return np.dot( self._W, pinv(np.dot(np.diag(vals**0.5), vecs.T)) ).T 102 | ######################## FITTING BY EM ALGORITHM ########################## 103 | def _fit_EM(self, batchsize, n_iteration=500, keep_loglikes=False): 104 | 105 | if batchsize is not None and batchsize > self._N: 106 | raise ValueError('batchsize exceeds number of observations') 107 | 108 | loglikes = [] if keep_loglikes else None 109 | 110 | for i in range(n_iteration): 111 | # E-step: Estimation (omitted) 112 | # M-step: Maximization 113 | if batchsize is not None: 114 | idx = self.batch_idx(i, batchsize) 115 | Xb = self._X[:, idx] 116 | self._maximize_L(Xb, np.mean(Xb, axis=1).reshape(-1, 1)) 117 | else: 118 | self._maximize_L(self._X, self._mu) 119 | 120 | if keep_loglikes: 121 | loglikes.append(self._calc_loglike(self._X, self._mu)) 122 | 123 | return loglikes 124 | 125 | def _maximize_L(self, X, mu): 126 | S = self._calc_S(X, mu) 127 | M = self._calc_M() 128 | self._update_W(S, M) 129 | self._update_sigma2(S, M) 130 | self._update_C() 131 | 132 | def _update_W(self, S, M): 133 | temp = pinv( self._sigma2 * np.eye(self._q) \ 134 | + multi_dot([ pinv(M), self._W.T, S, self._W]) ) 135 | self._W = multi_dot([ S, self._W, temp ]) 136 | 137 | def _update_sigma2(self, S, M): 138 | temp = multi_dot([ S, self._W, pinv(M), self._W.T ]) 139 | self._sigma2 = 1/self._d * np.trace(S - temp) 140 | 141 | ##################### FITTING BY EIGENDECOMPOSITION ####################### 142 | def _fit_eig(self, n_iteration=500): 143 | 144 | S = self._calc_S(self._X, self._mu) 145 | vals, vecs = eig(S) 146 | vals, vecs = vals.real, vecs.real 147 | ordbydom = np.argsort(vals)[::-1] 148 | topq_dom = ordbydom[:self._q] 149 | less_dom = ordbydom[self._q:] 150 | self._sigma2 = np.sum(vals[less_dom]) / (self._d - self._q) 151 | self._W = np.dot( vecs[:, topq_dom], 152 | np.sqrt(np.diag(vals[topq_dom])-self._sigma2*np.eye(self._q)) ) 153 | self._update_C() 154 | 155 | ########################### UTILITY FUNCTIONS ############################# 156 | def _calc_S(self, X, mu): 157 | """calculate the covariance matrix of observations X""" 158 | centeredX = X - mu 159 | return np.dot(centeredX, centeredX.T) / X.shape[1] 160 | 161 | def _calc_M(self): 162 | return self._sigma2 * np.eye(self._q) + np.dot(self._W.T, self._W) 163 | 164 | def _calc_loglike(self, X, mu): 165 | """calculate the loglikelihood of observing data X""" 166 | return -self._N/2 * (self._d*np.log(2*np.pi) \ 167 | + np.log(det(self._C)) \ 168 | + np.trace(np.dot(pinv(self._C), self._calc_S(X, mu.reshape(-1,1))))) 169 | 170 | def _update_C(self): 171 | self._C = self._sigma2 * np.eye(self._d) + np.dot(self._W, self._W.T) 172 | 173 | def batch_idx(self, i, batchsize): 174 | if batchsize == self._N: 175 | return np.arange(self._N) 176 | idx1 = (i*batchsize) % self._N 177 | idx2 = ((i+1)*batchsize) % self._N 178 | if idx2 < idx1: idx1 -= self._N 179 | return np.arange(idx1, idx2) -------------------------------------------------------------------------------- /bpca.py: -------------------------------------------------------------------------------- 1 | # https://pdfs.semanticscholar.org/a1fb/a67f147b16e3c4bffdab3cc6f17520c74547.pdf 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import scipy 6 | from scipy.stats import multivariate_normal as mvn 7 | from scipy.stats import gamma 8 | 9 | class BPCA(object): 10 | 11 | def __init__(self, a_alpha=1e-3, b_alpha=1e-3, a_tau=1e-3, b_tau=1e-3, beta=1e-3): 12 | # hyperparameters 13 | self.a_alpha = a_alpha # parameter of alpha's prior (a Gamma distribution) 14 | self.b_alpha = b_alpha # parameter of alpha's prior (a Gamma distribution) 15 | self.a_tau = a_tau # parameter of tau's prior (a Gamma distribution) 16 | self.b_tau = b_tau # parameter of tau's prior (a Gamma distribution) 17 | self.beta = beta 18 | # history of ELBOS 19 | self.elbos = None 20 | self.variations = None 21 | # history of log likelihoods 22 | self.loglikelihoods = None 23 | 24 | 25 | def update(self): 26 | """fixed-point update of the Bayesian PCA""" 27 | # inverse of the sigma^2 28 | self.tau = self.a_tau_tilde / self.b_tau_tilde 29 | # hyperparameters controlling the magnitudes of each column of the weight matrix 30 | self.alpha = self.a_alpha_tilde / self.b_alpha_tilde 31 | # covariance matrix of the latent variables 32 | self.cov_z = np.linalg.inv(np.eye(self.q) + self.tau * 33 | (np.trace(self.cov_w) + np.dot(self.mean_w.T, self.mean_w))) 34 | # mean of the latent variable 35 | self.mean_z = self.tau * np.dot(np.dot(self.cov_z, self.mean_w.T), self.Xb - self.mean_mu) 36 | # covariance matrix of the mean observation 37 | self.cov_mu = np.eye(self.d) / (self.beta + self.b * self.tau) 38 | # mean of the mean observation 39 | self.mean_mu = self.tau * np.dot(self.cov_mu, np.sum(self.Xb-np.dot(self.mean_w, 40 | self.mean_z), axis=1)).reshape(self.d, 1) 41 | # covariance matrix of each column of the weight matrix 42 | self.cov_w = np.linalg.inv(np.diag(self.alpha) + self.tau * 43 | (self.b * self.cov_z + np.dot(self.mean_z, self.mean_z.T))) 44 | # mean of each column of the weight matrix 45 | self.mean_w = self.tau * np.dot(self.cov_w, np.dot(self.mean_z, (self.Xb-self.mean_mu).T)).T 46 | # estimation of the b in alpha's Gamma distribution 47 | self.b_alpha_tilde = self.b_alpha + 0.5 * (np.trace(self.cov_w) + 48 | np.diag(np.dot(self.mean_w.T, self.mean_w))) 49 | # estimation of the b in tau's Gamma distribution 50 | self.b_tau_tilde = self.b_tau + 0.5 * np.trace(np.dot(self.Xb.T, self.Xb)) + \ 51 | 0.5 * self.b*(np.trace(self.cov_mu)+np.dot(self.mean_mu.flatten(), self.mean_mu.flatten())) + \ 52 | 0.5 * np.trace(np.dot(np.trace(self.cov_w)+np.dot(self.mean_w.T, self.mean_w), 53 | self.b*self.cov_z+np.dot(self.mean_z, self.mean_z.T))) + \ 54 | np.sum(np.dot(np.dot(self.mean_mu.flatten(), self.mean_w), self.mean_z)) + \ 55 | -np.trace(np.dot(self.Xb.T, np.dot(self.mean_w, self.mean_z))) + \ 56 | -np.sum(np.dot(self.Xb.T, self.mean_mu)) 57 | 58 | 59 | def calculate_log_likelihood(self): 60 | """calculate the log likelihood of observing self.X""" 61 | w = self.mean_w 62 | c = np.eye(self.d)*self.tau + np.dot(w, w.T) 63 | xc = self.X - self.X.mean(axis=1).reshape(-1,1) 64 | s = np.dot(xc, xc.T) / self.N 65 | self.s = s 66 | c_inv_s = scipy.linalg.lstsq(c, s)[0] 67 | loglikelihood = -0.5*self.N*(self.d*np.log(2*np.pi)+np.log(np.linalg.det(c))+np.trace(c_inv_s)) 68 | return loglikelihood 69 | 70 | 71 | def calculate_ELBO(self): 72 | '''ELBO = E_q[-log(q(theta))+log(p(theta)+log(p(Y|theta,X)))] 73 | = -entropy + logprior + loglikelihood ''' 74 | 75 | # random sample 76 | z = np.array([np.random.multivariate_normal(self.mean_z[:,i], self.cov_z) for i in range(self.b)]).T 77 | mu = np.random.multivariate_normal(self.mean_mu.flatten(), self.cov_mu) 78 | w = np.array([np.random.multivariate_normal(self.mean_w[i], self.cov_w) for i in range(self.d)]) 79 | alpha = np.random.gamma(self.a_alpha_tilde, 1/self.b_alpha_tilde) 80 | tau = np.random.gamma(self.a_tau_tilde, 1/self.b_tau_tilde) 81 | 82 | # entropy 83 | # q(z) 84 | entropy = np.sum(np.array([mvn.logpdf(z[:,i], self.mean_z[:,i], self.cov_z) for i in range(self.b)])) 85 | 86 | # q(mu) 87 | entropy += mvn.logpdf(mu, self.mean_mu.flatten(), self.cov_mu) 88 | 89 | # q(W) 90 | entropy += np.sum(np.array([mvn.logpdf(w[i], self.mean_w[i], self.cov_w) for i in range(self.d)])) 91 | 92 | # q(alpha) 93 | entropy += np.sum(gamma.logpdf(alpha, self.a_alpha_tilde, scale=1/self.b_alpha_tilde)) 94 | 95 | # q(tau) 96 | entropy += gamma.logpdf(tau, self.a_tau_tilde, scale=1/self.b_tau_tilde) 97 | 98 | # logprior 99 | # p(z), z ~ N(0, I) 100 | logprior = np.sum(np.array([mvn.logpdf(z[:,i], mean=np.zeros(self.q), cov=np.eye(self.q)) for i in range(self.b)])) 101 | 102 | # p(w|alpha), conditional gaussian 103 | logprior += np.sum(np.array([self.d/2*np.log(alpha[i]/(2*np.pi))-alpha[i]*np.sum(w[:,i]**2)/2 for i in range(self.q)])) 104 | 105 | # p(alpha), alpha[i] ~ Gamma(a, b) 106 | logprior += np.sum(gamma.logpdf(alpha, self.a_alpha, scale=1/self.b_alpha)) 107 | 108 | # p(mu), mu ~ N(0, I/beta) 109 | logprior += mvn.logpdf(mu, mean=np.zeros(self.d), cov=np.eye(self.d)/self.beta) 110 | 111 | # p(tau), tau ~ Gamma(c, d) 112 | logprior += gamma.logpdf(tau, self.a_tau, scale=1/self.b_tau) 113 | 114 | # loglikelihood 115 | pred = np.dot(w, z) + mu.reshape(-1,1) 116 | loglikelihood = np.sum(np.array([mvn.logpdf(self.Xb[:,i], pred[:,i], np.eye(self.d)/tau) for i in range(self.b)])) 117 | 118 | return -entropy + logprior + loglikelihood 119 | 120 | 121 | def batch_idx(self, i): 122 | if self.b == self.N: 123 | return np.arange(self.N) 124 | idx1 = (i*self.b) % self.N 125 | idx2 = ((i+1)*self.b) % self.N 126 | if idx2 < idx1: 127 | idx1 -= self.N 128 | return np.arange(idx1, idx2) 129 | 130 | 131 | def fit(self, X=None, batch_size=128, iters=500, print_every=100, verbose=False, trace_elbo=False, trace_loglikelihood=False): 132 | """fit the Bayesian PCA model using fixed-point update""" 133 | # data, # of samples, dims 134 | self.X = X.T # don't need to transpose X when passing it 135 | self.d = self.X.shape[0] 136 | self.N = self.X.shape[1] 137 | self.q = self.d-1 138 | self.ed = [] 139 | self.b = min(batch_size, self.N) 140 | 141 | # variational parameters 142 | self.mean_z = np.random.randn(self.q, self.b) # latent variable 143 | self.cov_z = np.eye(self.q) 144 | self.mean_mu = np.random.randn(self.d, 1) 145 | self.cov_mu = np.eye(self.d) 146 | self.mean_w = np.random.randn(self.d, self.q) 147 | self.cov_w = np.eye(self.q) 148 | self.a_alpha_tilde = self.a_alpha + self.d/2 149 | self.b_alpha_tilde = np.abs(np.random.randn(self.q)) 150 | self.a_tau_tilde = self.a_tau + self.b * self.d / 2 151 | self.b_tau_tilde = np.abs(np.random.randn(1)) 152 | 153 | # update 154 | order = np.arange(self.N) 155 | elbos = np.zeros(iters) 156 | loglikelihoods = np.zeros(iters) 157 | for i in range(iters): 158 | idx = order[self.batch_idx(i)] 159 | self.Xb = self.X[:,idx] 160 | self.update() 161 | if trace_elbo: 162 | elbos[i] = self.calculate_ELBO() 163 | if trace_loglikelihood: 164 | loglikelihoods[i] = self.calculate_log_likelihood() 165 | if verbose and i % print_every == 0: 166 | print('Iter %d, LL: %f, alpha: %s' % (i, loglikelihoods[i], str(self.alpha))) 167 | self.captured_dims() 168 | self.elbos = elbos if trace_elbo else None 169 | self.loglikelihoods = loglikelihoods if trace_loglikelihood else None 170 | 171 | 172 | def captured_dims(self): 173 | """return the number of captured dimensions""" 174 | sum_alpha = np.sum(1/self.alpha) 175 | self.ed = np.array([i for i, inv_alpha in enumerate(1/self.alpha) if inv_alpha < sum_alpha/self.q]) 176 | 177 | 178 | def transform(self, X=None, full=True): 179 | """generate samples from the fitted model""" 180 | X = self.X if X is None else X.T 181 | if full: 182 | w = self.mean_w 183 | l = self.q 184 | else: 185 | w = self.mean_w[:,ed] 186 | l = len(self.ed) 187 | m = np.eye(l)*self.tau + np.dot(w.T, w) 188 | inv_m = np.linalg.inv(m) 189 | z = np.dot(np.dot(inv_m, w.T), X - self.mean_mu) 190 | return z.T 191 | # return np.array([np.random.multivariate_normal(z[:,i], inv_m*self.tau) for i in range(X.shape[1])]) 192 | 193 | 194 | def inverse_transform(self, z, full=True): 195 | """transform the latent variable into observations""" 196 | z = z.T 197 | if full: 198 | w = self.mean_w 199 | else: 200 | w = self.mean_w[:,ed] 201 | x = np.dot(w, z) + self.mean_mu 202 | return x.T 203 | # return np.array([np.random.multivariate_normal(x[:,i], np.eye(self.d)*self.tau) for i in range(z.shape[1])]) 204 | 205 | 206 | def fit_transform(self, X=None, batch_size=128, iters=500, print_every=100, verbose=False, trace_elbo=False, trace_loglikelihood=False): 207 | self.fit(X, batch_size, iters, print_every, verbose, trace_elbo) 208 | return self.transform() 209 | 210 | 211 | def generate(self, size=1): 212 | """generate samples from the fitted model""" 213 | w = self.mean_w[:, self.ed] 214 | c = np.eye(self.d)*self.tau + np.dot(w, w.T) 215 | return np.array([np.random.multivariate_normal(self.mean_mu.flatten(), c) for i in range(size)]) 216 | 217 | 218 | def get_weight_matrix(self): 219 | return self.mean_w 220 | 221 | 222 | def get_inv_variance(self): 223 | return self.alpha 224 | 225 | 226 | def get_effective_dims(self): 227 | return len(self.ed) 228 | 229 | 230 | def get_cov_mat(self): 231 | w = self.mean_w[:, self.ed] 232 | c = np.eye(self.d)*self.tau + np.dot(w, w.T) 233 | return c 234 | 235 | 236 | def get_elbo(self): 237 | return self.elbos 238 | 239 | 240 | def get_loglikelihood(self): 241 | return self.loglikelihoods 242 | 243 | --------------------------------------------------------------------------------