├── AM_205_Final_Report.pdf
├── vis_utils.py
├── pca_impute_demo.py
├── pca_impute.py
├── README.md
├── pca_all_impute.py
├── bpca_pymc.py
├── ppca_demo.py
├── ppca.py
└── bpca.py
/AM_205_Final_Report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzaym/Probabilistic-and-Bayesian-PCA/HEAD/AM_205_Final_Report.pdf
--------------------------------------------------------------------------------
/vis_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 |
4 | def hinton(matrix=None, max_weight=None, ax=None):
5 | """Draw Hinton diagram for visualizing a weight matrix."""
6 | ax = ax if ax is not None else plt.gca()
7 |
8 | if not max_weight:
9 | max_weight = 2 ** np.ceil(np.log(np.abs(matrix).max()) / np.log(2))
10 |
11 | ax.patch.set_facecolor('gray')
12 | ax.set_aspect('equal', 'box')
13 | ax.xaxis.set_major_locator(plt.NullLocator())
14 | ax.yaxis.set_major_locator(plt.NullLocator())
15 |
16 | for (x, y), w in np.ndenumerate(matrix):
17 | color = 'white' if w > 0 else 'black'
18 | size = np.sqrt(np.abs(w) / max_weight)
19 | rect = plt.Rectangle([x - size / 2, y - size / 2], size, size,
20 | facecolor=color, edgecolor=color)
21 | ax.add_patch(rect)
22 |
23 | ax.autoscale_view()
24 | ax.invert_yaxis()
25 | plt.show()
--------------------------------------------------------------------------------
/pca_impute_demo.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | from numpy.random import multivariate_normal
4 | from pca_impute import PCAImputer
5 |
6 | if __name__ == '__main__':
7 |
8 | # original data
9 | cov = np.diag([10, 9, 8, 7] + [1]*28 + [6, 5, 4, 3] + [1]*28)**2
10 | data = multivariate_normal(np.zeros(64), cov, 256)
11 |
12 | # missing at random
13 | mask_missing = np.random.randint(2, size=data.shape)
14 | data_missing = data.copy()
15 | data_missing[np.where(mask_missing)] = np.nan
16 |
17 | # impute by PCA
18 | imputer = PCAImputer(n_dimension=8)
19 | data_imputed = imputer.fit_transform(data_missing, n_iteration=100)
20 |
21 | plt.matshow(data)
22 | plt.title('original data')
23 | plt.show()
24 | plt.matshow(data_missing)
25 | plt.title('missing at random')
26 | plt.show()
27 | plt.matshow(data_imputed)
28 | plt.title('imputed data')
29 | plt.show()
30 |
31 | print('reconstruction err: {}'.format(np.sqrt(np.sum(np.sum(np.square(data-data_imputed))))))
--------------------------------------------------------------------------------
/pca_impute.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ppca import PPCA
3 |
4 | class PCAImputer:
5 |
6 | def __init__(self, n_dimension):
7 | self._q = n_dimension
8 |
9 | def fit_transform(self, data, method='eig', probabilistic=False, n_iteration=100):
10 | """fitting a PCA to the original data by iterativly filling the missing entries
11 | with value generated from PCA. Each missing entries are initialized with the
12 | row mean."""
13 | self._data = data.copy()
14 | self._missing = np.isnan(data)
15 | self._observed = ~self._missing
16 | self._pca = PPCA(n_dimension = self._q)
17 |
18 | row_defau = np.zeros(self._data.shape[0])
19 | row_means = np.repeat(np.nanmean(self._data, axis=1, out=row_defau).reshape(-1, 1), \
20 | self._data.shape[1], axis=1)
21 | self._data[self._missing] = row_means[self._missing]
22 | for i in range(n_iteration):
23 | self._pca.fit(self._data, method=method)
24 | self._data[self._missing] = self._pca.inverse_transform(self._pca.transform(self._data, \
25 | probabilistic), probabilistic)[self._missing]
26 | return self._data
27 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Probabilistic and Bayesian PCA
2 |
3 | ### Overview
4 | This repository is for the final project of Harvard AM205 "Numerical Methods". We examined two generalized versions of conventional PCA from a statistical perspective: Probabilistic PCA (PPCA) and Bayesian PCA (BPCA). We compared their behaviors on synthetic data and real-world data with different distributions, and also explored the possible application for estimating missing data.
5 |
6 | ### Code Specification
7 | #### Python files:
8 |
9 | - bpca.py: an implementation of Bayesian PCA by varational inference
10 | - bpca_pymc.py: another implementation of Bayesian PCA using pymc
11 | - vis_utils.py: visulization utilities (Hinton graph)
12 | - ppca.py: an implementation of probabilistic PCA
13 | - ppca_demo.py: a demo for ppca.py
14 | - pca_impute.py: a imputer built on top of ppca.py
15 | - pca_impute_demo.py: a demo for pca_impute.py
16 | - pca_all_impute.py: another imputer that integrated PCA PPCA and BPCA
17 |
18 |
19 | #### Jupyter notebooks (for test and demo):
20 |
21 |
22 | - General Test.ipynb: code for experiments section in the final report
23 | - plots.ipynb: code for experiments section in the final report
24 | - BPCA Test.ipynb: experiments on the behavior of Bayesian PCA
25 | - Imputation Test.ipynb: experiments on the behavior of PCAImputer
26 | - recovering_SST.ipynb: code for recovering sea surface temperature section in the final report, the data for this notebook can be found at https://drive.google.com/drive/folders/1gaVxsZJZfinTenCBicCRggJygEA50D7S?usp=sharing
27 |
28 |
--------------------------------------------------------------------------------
/pca_all_impute.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ppca import PPCA
3 | from sklearn.decomposition import PCA
4 | from bpca import BPCA
5 |
6 | class PCAImputer:
7 |
8 | def __init__(self, method='pca', n_dimension=0):
9 | self._q = n_dimension
10 | self._method = method
11 | if method == 'pca':
12 | self._pca = PCA(n_components=self._q)
13 | elif method == 'ppca':
14 | self._pca = PPCA(n_dimension=self._q)
15 | else:
16 | self._pca = BPCA()
17 |
18 | def fit_transform(self, data, ppca_method='eig', probabilistic=False, n_iteration=100, \
19 | verbose=False, print_every=50, trace_mse=False, cdata=None):
20 | self._data = data.copy()
21 | self._missing = np.isnan(data)
22 | self._observed = ~self._missing
23 | self._mse = np.zeros(n_iteration)
24 |
25 | row_defau = np.zeros(self._data.shape[0])
26 | row_means = np.repeat(np.nanmean(self._data, axis=1, out=row_defau).reshape(-1, 1), \
27 | self._data.shape[1], axis=1)
28 | self._data[self._missing] = row_means[self._missing]
29 | self._data = np.nan_to_num(self._data)
30 |
31 | for i in range(n_iteration):
32 | if self._method == 'ppca':
33 | self._pca.fit(self._data, method=ppca_method)
34 | self._data[self._missing] = self._pca.inverse_transform(self._pca.transform(self._data, \
35 | probabilistic), probabilistic)[self._missing]
36 | else:
37 | self._pca.fit(self._data)
38 | self._data[self._missing] = self._pca.inverse_transform(self._pca.transform(self._data))[self._missing]
39 | self._mse[i] = np.sum((cdata-self._data)**2)/cdata.shape[0]
40 | if verbose and i % print_every == 0:
41 | print('Iter %d, MSE=%f' %(i, self._mse[i]))
42 | # if np.abs(self._mse[i-1]-self._mse[i]) < 1e-6:
43 | # break
44 | return self._data, self._mse
--------------------------------------------------------------------------------
/bpca_pymc.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import pymc3 as pm
4 | from theano.tensor.nlinalg import diag
5 |
6 |
7 | class BPCA(object):
8 |
9 | def __init__(self, X, a_alpha=1e-3, b_alpha=1e-3, a_tau=1e-3, b_tau=1e-3, beta=1e-3):
10 | # data, # of samples, dims
11 | self.X = X
12 | self.d = self.X.shape[1]
13 | self.N = self.X.shape[0]
14 | self.q = self.d-1
15 |
16 | # hyperparameters
17 | self.a_alpha = a_alpha
18 | self.b_alpha = b_alpha
19 | self.a_tau = a_tau
20 | self.b_tau = b_tau
21 | self.beta = beta
22 |
23 | with pm.Model() as model:
24 | z = pm.MvNormal('z', mu=np.zeros(self.q), cov=np.eye(self.q), shape=(self.N, self.q))
25 | mu = pm.MvNormal('mu', mu=np.zeros(self.d), cov=np.eye(self.d)/self.beta, shape=self.d)
26 | alpha = pm.Gamma('alpha', alpha=self.a_alpha, beta=self.b_alpha, shape=self.q)
27 | w = pm.MatrixNormal('w', mu=np.zeros((self.d, self.q)), rowcov=np.eye(self.d), colcov=diag(1/alpha), shape=(self.d, self.q))
28 | tau = pm.Gamma('tau', alpha=self.a_tau, beta=self.b_tau)
29 | x = pm.math.dot(z, w.T) + mu
30 | obs_x = pm.MatrixNormal('obs_x', mu=x, rowcov=np.eye(self.N), colcov=np.eye(self.d)/tau, shape=(self.N, self.d), observed=self.X)
31 |
32 | self.model = model
33 |
34 |
35 | def fit(self, iters=10000):
36 | with self.model:
37 | inference = pm.ADVI()
38 | approx = pm.fit(n=iters, method=inference)
39 | trace = approx.sample(iters//2)
40 |
41 | # save
42 | s = len(trace)//2
43 | self.trace = trace
44 | self.inference = inference
45 | self.z = trace[s::]['z'].mean(axis=0)
46 | self.mu = trace[s::]['mu'].mean(axis=0)
47 | self.alpha = trace[s::]['alpha'].mean(axis=0)
48 | self.w = trace[s::]['w'].mean(axis=0)
49 |
50 |
51 | def transform(self):
52 | x = pm.sample_ppc(self.trace, 5000, model=self.model)
53 | return x['obs_x'].mean(axis=0)
54 |
55 |
56 | def fit_transform(self, iters=10000):
57 | self.fit(iters)
58 | return self.transform()
59 |
60 |
61 | def get_weight_matrix(self):
62 | return self.w
63 |
64 |
65 | def get_inv_variance(self):
66 | return self.alpha
67 |
68 |
69 | def get_elbos(self):
70 | return -self.inference.hist
--------------------------------------------------------------------------------
/ppca_demo.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | from numpy.random import multivariate_normal
4 | from ppca import PPCA
5 |
6 | if __name__ == '__main__':
7 |
8 | cov = np.diag([10, 9, 8, 7] + [1]*28 + [6, 5, 4, 3] + [1]*28)**2
9 | data = multivariate_normal(np.zeros(64), cov, 256)
10 |
11 | ppca1 = PPCA(n_dimension=4)
12 | ppca1.fit(data, method='EM')
13 | ppca2 = PPCA(n_dimension=4)
14 | ppca2.fit(data, method='eig')
15 |
16 | print('\n\n\n\n**TEST FITTING THE COVARIANCE MATRIX**')
17 | plt.matshow(cov);
18 | print('\n\noriginal covariance matrix')
19 | plt.show()
20 | plt.matshow(ppca1._C);
21 | print('\n\nfitted covariance matrix (fitted by EM)')
22 | plt.show()
23 | plt.matshow(ppca2._C);
24 | print('\n\nfitted covariance matrix (fitted by eigen)')
25 | plt.show()
26 |
27 | print('\n\n\n\n**TEST GENERATING DATA**')
28 | plt.scatter(data[:, 0], data[:, 1], alpha=0.2);
29 | print('\n\noriginal data (first 2 dimensions)')
30 | plt.show()
31 | gene = ppca1.generate(256)
32 | plt.scatter(gene[:, 0], gene[:, 1], alpha=0.2);
33 | print('\n\ngenerated data (first 2 dimensions) (fitted by EM)')
34 | plt.show()
35 | gene = ppca2.generate(256)
36 | plt.scatter(gene[:, 0], gene[:, 1], alpha=0.2);
37 | print('\n\ngenerated data (first 2 dimensions) (fitted by eigen)')
38 | plt.show()
39 |
40 | print('\n\n\n\n**TEST CALCULATING LIKELIHOOD**')
41 | ppca1 = PPCA(n_dimension=2)
42 | loglikelihoods = ppca1.fit(data, method='EM', keep_loglikes=True)
43 | plt.plot(loglikelihoods)
44 | plt.show()
45 |
46 | print('\n\n\n\n**TEST DIMENSION REDUCTION AND RECOVERING**')
47 | plt.matshow(data)
48 | print('\n\noriginal data')
49 | plt.show()
50 |
51 | ppca3 = PPCA(n_dimension=2)
52 | ppca3.fit(data, method='EM')
53 | plt.matshow( ppca3.inverse_transform( ppca3.transform(data) ) )
54 | print('\n\nrecovered data: 2-component')
55 | plt.show()
56 |
57 | ppca4 = PPCA(n_dimension=2)
58 | ppca4.fit(data, batchsize=16, n_iteration=2000, method='EM')
59 | plt.matshow( ppca4.inverse_transform( ppca4.transform(data) ) )
60 | print('\n\nrecovered data: 2-component (mini-batch)')
61 | plt.show()
62 |
63 | ppca5 = PPCA(n_dimension=63)
64 | ppca5.fit(data, method='EM')
65 | plt.matshow( ppca5.inverse_transform( ppca5.transform(data) ) )
66 | print('\n\nrecovered data: 63-component')
67 | plt.show()
68 |
69 |
--------------------------------------------------------------------------------
/ppca.py:
--------------------------------------------------------------------------------
1 | """
2 | Probablistic Principal Component Analysis using the EM algorithm from Tipping & Bishop 1997.
3 | (See http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.426.2749&rep=rep1&type=pdf)
4 | """
5 |
6 | import numpy as np
7 | from numpy.random import randn
8 | from numpy.random import normal
9 | from numpy.random import multivariate_normal
10 | from numpy.linalg import det
11 | from numpy.linalg import eig
12 | from numpy.linalg import pinv
13 | from numpy.linalg import multi_dot
14 | from sklearn.exceptions import NotFittedError
15 |
16 | class PPCA(object):
17 |
18 | def __init__(self, n_dimension):
19 |
20 | # mapping from latent variables to observations: x = W z + mu + epsilon
21 | # where x: observation, x in R^(d)
22 | # q: dimension of latent space
23 | self._q = n_dimension
24 |
25 | def fit(self, data, batchsize=None, n_iteration=500, method='EM',
26 | keep_loglikes=False):
27 |
28 | if method not in ('EM', 'eig'):
29 | raise ValueError('unrecognized method.')
30 | if method == 'eig' and keep_loglikes:
31 | raise ValueError('loglike not supported for eig method')
32 | if method == 'eig' and batchsize is not None:
33 | raise ValueError('mini-batch not supported for eig method')
34 |
35 | ####################### INITIALIZE OBSERVATIONS #######################
36 | # X: observations, X in R^(d*N), data assumed to be in R^(N*d)
37 | self._X = data.T
38 | # d: dimension of observations
39 | self._d = self._X.shape[0]
40 | # N: number of observations
41 | self._N = self._X.shape[1]
42 | # mu: mean of x, mu in R^(d)
43 | self._mu = np.mean(self._X, axis=1).reshape(-1, 1)
44 |
45 | ##################### INITIALIZE LATENT VARIABLES #####################
46 | # W: linear transformation matrix, W in R^(d*q)
47 | self._W = randn(self._d, self._q)
48 | # epsilon: Gaussian noise, epsilon in R^(d), epsilon ~ N(0, sigma^2 I)
49 | self._sigma2 = 0
50 | # C: covariance matrix of observation, x ~ N(mu, C)
51 | self._update_C()
52 |
53 | loglikes = [] if keep_loglikes else None
54 |
55 | if method == 'EM':
56 | loglikes = self._fit_EM(batchsize, n_iteration, keep_loglikes)
57 | else: #method == 'eig'
58 | self._fit_eig(n_iteration)
59 |
60 | return loglikes
61 |
62 | def transform(self, data_observ, probabilistic=False):
63 | """transform the observations into the latent space, when probabilistic
64 | set to True, will draw a sample from the posterior distribution of the
65 | latent variable"""
66 | assert len(data_observ.shape) == 2
67 | invM = pinv(self._calc_M())
68 | expect_data_latent = multi_dot([invM, self._W.T,
69 | data_observ.T - self._mu])
70 | assert expect_data_latent.shape == (self._q, len(data_observ))
71 | if probabilistic:
72 | cov = np.dot(self._sigma2, invM)
73 | data_latent = np.zeros(shape=(len(data_observ), self._q))
74 | for i in range(len(data_observ)):
75 | data_latent[i] = multivariate_normal(expect_data_latent[:, i].flatten(), cov)
76 | return data_latent
77 | else:
78 | return expect_data_latent.T
79 |
80 | def inverse_transform(self, data_latent, probabilistic=False):
81 | """transform the latent variable into observations, when probabilistic
82 | set to True, will draw a sample from the distribution of the observations"""
83 | assert len(data_latent.shape) == 2
84 | expect_data_observ = np.dot(self._W, data_latent.T) + self._mu
85 | if probabilistic:
86 | return (expect_data_observ
87 | + normal(scale=np.sqrt(self._sigma2), size=expect_data_observ.shape)).T
88 | else:
89 | return expect_data_observ.T
90 |
91 | def generate(self, n_sample):
92 | """generate samples from the fitted model"""
93 | try:
94 | return multivariate_normal(self._mu.flatten(), self._C, n_sample)
95 | except:
96 | raise NotFittedError('This PPCA instance is not fitted yet. Call \'fit\' with appropriate arguments before using this method.')
97 |
98 | def calc_components(self):
99 | """generate an orthonormal basis from the fitted model"""
100 | vals, vecs = eig(np.dot(self._W.T, self._W))
101 | return np.dot( self._W, pinv(np.dot(np.diag(vals**0.5), vecs.T)) ).T
102 | ######################## FITTING BY EM ALGORITHM ##########################
103 | def _fit_EM(self, batchsize, n_iteration=500, keep_loglikes=False):
104 |
105 | if batchsize is not None and batchsize > self._N:
106 | raise ValueError('batchsize exceeds number of observations')
107 |
108 | loglikes = [] if keep_loglikes else None
109 |
110 | for i in range(n_iteration):
111 | # E-step: Estimation (omitted)
112 | # M-step: Maximization
113 | if batchsize is not None:
114 | idx = self.batch_idx(i, batchsize)
115 | Xb = self._X[:, idx]
116 | self._maximize_L(Xb, np.mean(Xb, axis=1).reshape(-1, 1))
117 | else:
118 | self._maximize_L(self._X, self._mu)
119 |
120 | if keep_loglikes:
121 | loglikes.append(self._calc_loglike(self._X, self._mu))
122 |
123 | return loglikes
124 |
125 | def _maximize_L(self, X, mu):
126 | S = self._calc_S(X, mu)
127 | M = self._calc_M()
128 | self._update_W(S, M)
129 | self._update_sigma2(S, M)
130 | self._update_C()
131 |
132 | def _update_W(self, S, M):
133 | temp = pinv( self._sigma2 * np.eye(self._q) \
134 | + multi_dot([ pinv(M), self._W.T, S, self._W]) )
135 | self._W = multi_dot([ S, self._W, temp ])
136 |
137 | def _update_sigma2(self, S, M):
138 | temp = multi_dot([ S, self._W, pinv(M), self._W.T ])
139 | self._sigma2 = 1/self._d * np.trace(S - temp)
140 |
141 | ##################### FITTING BY EIGENDECOMPOSITION #######################
142 | def _fit_eig(self, n_iteration=500):
143 |
144 | S = self._calc_S(self._X, self._mu)
145 | vals, vecs = eig(S)
146 | vals, vecs = vals.real, vecs.real
147 | ordbydom = np.argsort(vals)[::-1]
148 | topq_dom = ordbydom[:self._q]
149 | less_dom = ordbydom[self._q:]
150 | self._sigma2 = np.sum(vals[less_dom]) / (self._d - self._q)
151 | self._W = np.dot( vecs[:, topq_dom],
152 | np.sqrt(np.diag(vals[topq_dom])-self._sigma2*np.eye(self._q)) )
153 | self._update_C()
154 |
155 | ########################### UTILITY FUNCTIONS #############################
156 | def _calc_S(self, X, mu):
157 | """calculate the covariance matrix of observations X"""
158 | centeredX = X - mu
159 | return np.dot(centeredX, centeredX.T) / X.shape[1]
160 |
161 | def _calc_M(self):
162 | return self._sigma2 * np.eye(self._q) + np.dot(self._W.T, self._W)
163 |
164 | def _calc_loglike(self, X, mu):
165 | """calculate the loglikelihood of observing data X"""
166 | return -self._N/2 * (self._d*np.log(2*np.pi) \
167 | + np.log(det(self._C)) \
168 | + np.trace(np.dot(pinv(self._C), self._calc_S(X, mu.reshape(-1,1)))))
169 |
170 | def _update_C(self):
171 | self._C = self._sigma2 * np.eye(self._d) + np.dot(self._W, self._W.T)
172 |
173 | def batch_idx(self, i, batchsize):
174 | if batchsize == self._N:
175 | return np.arange(self._N)
176 | idx1 = (i*batchsize) % self._N
177 | idx2 = ((i+1)*batchsize) % self._N
178 | if idx2 < idx1: idx1 -= self._N
179 | return np.arange(idx1, idx2)
--------------------------------------------------------------------------------
/bpca.py:
--------------------------------------------------------------------------------
1 | # https://pdfs.semanticscholar.org/a1fb/a67f147b16e3c4bffdab3cc6f17520c74547.pdf
2 |
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 | import scipy
6 | from scipy.stats import multivariate_normal as mvn
7 | from scipy.stats import gamma
8 |
9 | class BPCA(object):
10 |
11 | def __init__(self, a_alpha=1e-3, b_alpha=1e-3, a_tau=1e-3, b_tau=1e-3, beta=1e-3):
12 | # hyperparameters
13 | self.a_alpha = a_alpha # parameter of alpha's prior (a Gamma distribution)
14 | self.b_alpha = b_alpha # parameter of alpha's prior (a Gamma distribution)
15 | self.a_tau = a_tau # parameter of tau's prior (a Gamma distribution)
16 | self.b_tau = b_tau # parameter of tau's prior (a Gamma distribution)
17 | self.beta = beta
18 | # history of ELBOS
19 | self.elbos = None
20 | self.variations = None
21 | # history of log likelihoods
22 | self.loglikelihoods = None
23 |
24 |
25 | def update(self):
26 | """fixed-point update of the Bayesian PCA"""
27 | # inverse of the sigma^2
28 | self.tau = self.a_tau_tilde / self.b_tau_tilde
29 | # hyperparameters controlling the magnitudes of each column of the weight matrix
30 | self.alpha = self.a_alpha_tilde / self.b_alpha_tilde
31 | # covariance matrix of the latent variables
32 | self.cov_z = np.linalg.inv(np.eye(self.q) + self.tau *
33 | (np.trace(self.cov_w) + np.dot(self.mean_w.T, self.mean_w)))
34 | # mean of the latent variable
35 | self.mean_z = self.tau * np.dot(np.dot(self.cov_z, self.mean_w.T), self.Xb - self.mean_mu)
36 | # covariance matrix of the mean observation
37 | self.cov_mu = np.eye(self.d) / (self.beta + self.b * self.tau)
38 | # mean of the mean observation
39 | self.mean_mu = self.tau * np.dot(self.cov_mu, np.sum(self.Xb-np.dot(self.mean_w,
40 | self.mean_z), axis=1)).reshape(self.d, 1)
41 | # covariance matrix of each column of the weight matrix
42 | self.cov_w = np.linalg.inv(np.diag(self.alpha) + self.tau *
43 | (self.b * self.cov_z + np.dot(self.mean_z, self.mean_z.T)))
44 | # mean of each column of the weight matrix
45 | self.mean_w = self.tau * np.dot(self.cov_w, np.dot(self.mean_z, (self.Xb-self.mean_mu).T)).T
46 | # estimation of the b in alpha's Gamma distribution
47 | self.b_alpha_tilde = self.b_alpha + 0.5 * (np.trace(self.cov_w) +
48 | np.diag(np.dot(self.mean_w.T, self.mean_w)))
49 | # estimation of the b in tau's Gamma distribution
50 | self.b_tau_tilde = self.b_tau + 0.5 * np.trace(np.dot(self.Xb.T, self.Xb)) + \
51 | 0.5 * self.b*(np.trace(self.cov_mu)+np.dot(self.mean_mu.flatten(), self.mean_mu.flatten())) + \
52 | 0.5 * np.trace(np.dot(np.trace(self.cov_w)+np.dot(self.mean_w.T, self.mean_w),
53 | self.b*self.cov_z+np.dot(self.mean_z, self.mean_z.T))) + \
54 | np.sum(np.dot(np.dot(self.mean_mu.flatten(), self.mean_w), self.mean_z)) + \
55 | -np.trace(np.dot(self.Xb.T, np.dot(self.mean_w, self.mean_z))) + \
56 | -np.sum(np.dot(self.Xb.T, self.mean_mu))
57 |
58 |
59 | def calculate_log_likelihood(self):
60 | """calculate the log likelihood of observing self.X"""
61 | w = self.mean_w
62 | c = np.eye(self.d)*self.tau + np.dot(w, w.T)
63 | xc = self.X - self.X.mean(axis=1).reshape(-1,1)
64 | s = np.dot(xc, xc.T) / self.N
65 | self.s = s
66 | c_inv_s = scipy.linalg.lstsq(c, s)[0]
67 | loglikelihood = -0.5*self.N*(self.d*np.log(2*np.pi)+np.log(np.linalg.det(c))+np.trace(c_inv_s))
68 | return loglikelihood
69 |
70 |
71 | def calculate_ELBO(self):
72 | '''ELBO = E_q[-log(q(theta))+log(p(theta)+log(p(Y|theta,X)))]
73 | = -entropy + logprior + loglikelihood '''
74 |
75 | # random sample
76 | z = np.array([np.random.multivariate_normal(self.mean_z[:,i], self.cov_z) for i in range(self.b)]).T
77 | mu = np.random.multivariate_normal(self.mean_mu.flatten(), self.cov_mu)
78 | w = np.array([np.random.multivariate_normal(self.mean_w[i], self.cov_w) for i in range(self.d)])
79 | alpha = np.random.gamma(self.a_alpha_tilde, 1/self.b_alpha_tilde)
80 | tau = np.random.gamma(self.a_tau_tilde, 1/self.b_tau_tilde)
81 |
82 | # entropy
83 | # q(z)
84 | entropy = np.sum(np.array([mvn.logpdf(z[:,i], self.mean_z[:,i], self.cov_z) for i in range(self.b)]))
85 |
86 | # q(mu)
87 | entropy += mvn.logpdf(mu, self.mean_mu.flatten(), self.cov_mu)
88 |
89 | # q(W)
90 | entropy += np.sum(np.array([mvn.logpdf(w[i], self.mean_w[i], self.cov_w) for i in range(self.d)]))
91 |
92 | # q(alpha)
93 | entropy += np.sum(gamma.logpdf(alpha, self.a_alpha_tilde, scale=1/self.b_alpha_tilde))
94 |
95 | # q(tau)
96 | entropy += gamma.logpdf(tau, self.a_tau_tilde, scale=1/self.b_tau_tilde)
97 |
98 | # logprior
99 | # p(z), z ~ N(0, I)
100 | logprior = np.sum(np.array([mvn.logpdf(z[:,i], mean=np.zeros(self.q), cov=np.eye(self.q)) for i in range(self.b)]))
101 |
102 | # p(w|alpha), conditional gaussian
103 | logprior += np.sum(np.array([self.d/2*np.log(alpha[i]/(2*np.pi))-alpha[i]*np.sum(w[:,i]**2)/2 for i in range(self.q)]))
104 |
105 | # p(alpha), alpha[i] ~ Gamma(a, b)
106 | logprior += np.sum(gamma.logpdf(alpha, self.a_alpha, scale=1/self.b_alpha))
107 |
108 | # p(mu), mu ~ N(0, I/beta)
109 | logprior += mvn.logpdf(mu, mean=np.zeros(self.d), cov=np.eye(self.d)/self.beta)
110 |
111 | # p(tau), tau ~ Gamma(c, d)
112 | logprior += gamma.logpdf(tau, self.a_tau, scale=1/self.b_tau)
113 |
114 | # loglikelihood
115 | pred = np.dot(w, z) + mu.reshape(-1,1)
116 | loglikelihood = np.sum(np.array([mvn.logpdf(self.Xb[:,i], pred[:,i], np.eye(self.d)/tau) for i in range(self.b)]))
117 |
118 | return -entropy + logprior + loglikelihood
119 |
120 |
121 | def batch_idx(self, i):
122 | if self.b == self.N:
123 | return np.arange(self.N)
124 | idx1 = (i*self.b) % self.N
125 | idx2 = ((i+1)*self.b) % self.N
126 | if idx2 < idx1:
127 | idx1 -= self.N
128 | return np.arange(idx1, idx2)
129 |
130 |
131 | def fit(self, X=None, batch_size=128, iters=500, print_every=100, verbose=False, trace_elbo=False, trace_loglikelihood=False):
132 | """fit the Bayesian PCA model using fixed-point update"""
133 | # data, # of samples, dims
134 | self.X = X.T # don't need to transpose X when passing it
135 | self.d = self.X.shape[0]
136 | self.N = self.X.shape[1]
137 | self.q = self.d-1
138 | self.ed = []
139 | self.b = min(batch_size, self.N)
140 |
141 | # variational parameters
142 | self.mean_z = np.random.randn(self.q, self.b) # latent variable
143 | self.cov_z = np.eye(self.q)
144 | self.mean_mu = np.random.randn(self.d, 1)
145 | self.cov_mu = np.eye(self.d)
146 | self.mean_w = np.random.randn(self.d, self.q)
147 | self.cov_w = np.eye(self.q)
148 | self.a_alpha_tilde = self.a_alpha + self.d/2
149 | self.b_alpha_tilde = np.abs(np.random.randn(self.q))
150 | self.a_tau_tilde = self.a_tau + self.b * self.d / 2
151 | self.b_tau_tilde = np.abs(np.random.randn(1))
152 |
153 | # update
154 | order = np.arange(self.N)
155 | elbos = np.zeros(iters)
156 | loglikelihoods = np.zeros(iters)
157 | for i in range(iters):
158 | idx = order[self.batch_idx(i)]
159 | self.Xb = self.X[:,idx]
160 | self.update()
161 | if trace_elbo:
162 | elbos[i] = self.calculate_ELBO()
163 | if trace_loglikelihood:
164 | loglikelihoods[i] = self.calculate_log_likelihood()
165 | if verbose and i % print_every == 0:
166 | print('Iter %d, LL: %f, alpha: %s' % (i, loglikelihoods[i], str(self.alpha)))
167 | self.captured_dims()
168 | self.elbos = elbos if trace_elbo else None
169 | self.loglikelihoods = loglikelihoods if trace_loglikelihood else None
170 |
171 |
172 | def captured_dims(self):
173 | """return the number of captured dimensions"""
174 | sum_alpha = np.sum(1/self.alpha)
175 | self.ed = np.array([i for i, inv_alpha in enumerate(1/self.alpha) if inv_alpha < sum_alpha/self.q])
176 |
177 |
178 | def transform(self, X=None, full=True):
179 | """generate samples from the fitted model"""
180 | X = self.X if X is None else X.T
181 | if full:
182 | w = self.mean_w
183 | l = self.q
184 | else:
185 | w = self.mean_w[:,ed]
186 | l = len(self.ed)
187 | m = np.eye(l)*self.tau + np.dot(w.T, w)
188 | inv_m = np.linalg.inv(m)
189 | z = np.dot(np.dot(inv_m, w.T), X - self.mean_mu)
190 | return z.T
191 | # return np.array([np.random.multivariate_normal(z[:,i], inv_m*self.tau) for i in range(X.shape[1])])
192 |
193 |
194 | def inverse_transform(self, z, full=True):
195 | """transform the latent variable into observations"""
196 | z = z.T
197 | if full:
198 | w = self.mean_w
199 | else:
200 | w = self.mean_w[:,ed]
201 | x = np.dot(w, z) + self.mean_mu
202 | return x.T
203 | # return np.array([np.random.multivariate_normal(x[:,i], np.eye(self.d)*self.tau) for i in range(z.shape[1])])
204 |
205 |
206 | def fit_transform(self, X=None, batch_size=128, iters=500, print_every=100, verbose=False, trace_elbo=False, trace_loglikelihood=False):
207 | self.fit(X, batch_size, iters, print_every, verbose, trace_elbo)
208 | return self.transform()
209 |
210 |
211 | def generate(self, size=1):
212 | """generate samples from the fitted model"""
213 | w = self.mean_w[:, self.ed]
214 | c = np.eye(self.d)*self.tau + np.dot(w, w.T)
215 | return np.array([np.random.multivariate_normal(self.mean_mu.flatten(), c) for i in range(size)])
216 |
217 |
218 | def get_weight_matrix(self):
219 | return self.mean_w
220 |
221 |
222 | def get_inv_variance(self):
223 | return self.alpha
224 |
225 |
226 | def get_effective_dims(self):
227 | return len(self.ed)
228 |
229 |
230 | def get_cov_mat(self):
231 | w = self.mean_w[:, self.ed]
232 | c = np.eye(self.d)*self.tau + np.dot(w, w.T)
233 | return c
234 |
235 |
236 | def get_elbo(self):
237 | return self.elbos
238 |
239 |
240 | def get_loglikelihood(self):
241 | return self.loglikelihoods
242 |
243 |
--------------------------------------------------------------------------------