├── AM_205_Final_Report.pdf
├── vis_utils.py
├── pca_impute_demo.py
├── pca_impute.py
├── README.md
├── pca_all_impute.py
├── bpca_pymc.py
├── ppca_demo.py
├── ppca.py
└── bpca.py


/AM_205_Final_Report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zzaym/Probabilistic-and-Bayesian-PCA/HEAD/AM_205_Final_Report.pdf


--------------------------------------------------------------------------------
/vis_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | def hinton(matrix=None, max_weight=None, ax=None):
 5 |     """Draw Hinton diagram for visualizing a weight matrix."""
 6 |     ax = ax if ax is not None else plt.gca()
 7 | 
 8 |     if not max_weight:
 9 |         max_weight = 2 ** np.ceil(np.log(np.abs(matrix).max()) / np.log(2))
10 | 
11 |     ax.patch.set_facecolor('gray')
12 |     ax.set_aspect('equal', 'box')
13 |     ax.xaxis.set_major_locator(plt.NullLocator())
14 |     ax.yaxis.set_major_locator(plt.NullLocator())
15 | 
16 |     for (x, y), w in np.ndenumerate(matrix):
17 |         color = 'white' if w > 0 else 'black'
18 |         size = np.sqrt(np.abs(w) / max_weight)
19 |         rect = plt.Rectangle([x - size / 2, y - size / 2], size, size,
20 |                              facecolor=color, edgecolor=color)
21 |         ax.add_patch(rect)
22 | 
23 |     ax.autoscale_view()
24 |     ax.invert_yaxis()
25 |     plt.show()


--------------------------------------------------------------------------------
/pca_impute_demo.py:
--------------------------------------------------------------------------------
 1 | import numpy             as np
 2 | import matplotlib.pyplot as plt
 3 | from numpy.random        import multivariate_normal
 4 | from pca_impute          import PCAImputer
 5 | 
 6 | if __name__ == '__main__':
 7 |     
 8 |     # original data
 9 |     cov  = np.diag([10, 9, 8, 7] + [1]*28 + [6, 5, 4, 3] + [1]*28)**2
10 |     data = multivariate_normal(np.zeros(64), cov, 256)
11 |     
12 |     # missing at random
13 |     mask_missing = np.random.randint(2, size=data.shape)
14 |     data_missing = data.copy()
15 |     data_missing[np.where(mask_missing)] = np.nan
16 |     
17 |     # impute by PCA 
18 |     imputer = PCAImputer(n_dimension=8)
19 |     data_imputed = imputer.fit_transform(data_missing, n_iteration=100)
20 |     
21 |     plt.matshow(data)
22 |     plt.title('original data')
23 |     plt.show()
24 |     plt.matshow(data_missing)
25 |     plt.title('missing at random')
26 |     plt.show()
27 |     plt.matshow(data_imputed)
28 |     plt.title('imputed data')
29 |     plt.show()
30 |     
31 |     print('reconstruction err: {}'.format(np.sqrt(np.sum(np.sum(np.square(data-data_imputed))))))


--------------------------------------------------------------------------------
/pca_impute.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ppca    import PPCA
 3 | 
 4 | class PCAImputer:
 5 |     
 6 |     def __init__(self, n_dimension):
 7 |         self._q = n_dimension
 8 |     
 9 |     def fit_transform(self, data, method='eig', probabilistic=False, n_iteration=100):
10 |         """fitting a PCA to the original data by iterativly filling the missing entries
11 |         with value generated from PCA. Each missing entries are initialized with the
12 |         row mean."""
13 |         self._data     = data.copy() 
14 |         self._missing  = np.isnan(data)
15 |         self._observed = ~self._missing
16 |         self._pca      = PPCA(n_dimension = self._q)
17 |         
18 |         row_defau = np.zeros(self._data.shape[0])
19 |         row_means = np.repeat(np.nanmean(self._data, axis=1, out=row_defau).reshape(-1, 1), \
20 |                               self._data.shape[1], axis=1)
21 |         self._data[self._missing] = row_means[self._missing]
22 |         for i in range(n_iteration):
23 |             self._pca.fit(self._data, method=method)
24 |             self._data[self._missing] = self._pca.inverse_transform(self._pca.transform(self._data, \
25 |                                         probabilistic), probabilistic)[self._missing]
26 |         return self._data
27 |     


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Probabilistic and Bayesian PCA
 2 | 
 3 | ### Overview
 4 | This repository is for the final project of Harvard AM205 "Numerical Methods". We examined two generalized versions of conventional PCA from a statistical perspective: Probabilistic PCA (PPCA) and Bayesian PCA (BPCA). We compared their behaviors on synthetic data and real-world data with different distributions, and also explored the possible application for estimating missing data.
 5 | 
 6 | ### Code Specification
 7 | #### Python files:
 8 | <ul>
 9 |     <li> <i>bpca.py</i>: an implementation of Bayesian PCA by varational inference</li>
10 |     <li> <i>bpca_pymc.py</i>: another implementation of Bayesian PCA using pymc</li>
11 |     <li> <i>vis_utils.py</i>: visulization utilities (Hinton graph)</li>
12 |     <li> <i>ppca.py</i>: an implementation of probabilistic PCA</li>   
13 |     <li> <i>ppca_demo.py</i>: a demo for ppca.py</li>
14 |     <li> <i>pca_impute.py</i>: a imputer built on top of ppca.py</li>
15 |     <li> <i>pca_impute_demo.py</i>: a demo for pca_impute.py</li>
16 |     <li> <i>pca_all_impute.py</i>: another imputer that integrated PCA PPCA and BPCA</li>
17 | </ul>
18 | 
19 | #### Jupyter notebooks (for test and demo):
20 | 
21 | <ul>
22 |    <li> <i>General Test.ipynb</i>: code for experiments section in the final report</li>
23 |    <li> <i>plots.ipynb</i>: code for experiments section in the final report</li>
24 |    <li> <i>BPCA Test.ipynb</i>: experiments on the behavior of Bayesian PCA</li>
25 |    <li> <i>Imputation Test</i>.ipynb: experiments on the behavior of PCAImputer</li>
26 |    <li> <i>recovering_SST.ipynb</i>: code for recovering sea surface temperature section in the final report, the data for this notebook can be found at https://drive.google.com/drive/folders/1gaVxsZJZfinTenCBicCRggJygEA50D7S?usp=sharing</li>
27 | </ul>
28 | 


--------------------------------------------------------------------------------
/pca_all_impute.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ppca import PPCA
 3 | from sklearn.decomposition import PCA
 4 | from bpca import BPCA
 5 | 
 6 | class PCAImputer:
 7 |     
 8 |     def __init__(self, method='pca', n_dimension=0):
 9 |         self._q = n_dimension
10 |         self._method = method
11 |         if method == 'pca':
12 |             self._pca = PCA(n_components=self._q)
13 |         elif method == 'ppca':
14 |             self._pca = PPCA(n_dimension=self._q)
15 |         else:
16 |             self._pca = BPCA()
17 |     
18 |     def fit_transform(self, data, ppca_method='eig', probabilistic=False, n_iteration=100, \
19 |                         verbose=False, print_every=50, trace_mse=False, cdata=None):
20 |         self._data     = data.copy() 
21 |         self._missing  = np.isnan(data)
22 |         self._observed = ~self._missing
23 |         self._mse = np.zeros(n_iteration)
24 |               
25 |         row_defau = np.zeros(self._data.shape[0])
26 |         row_means = np.repeat(np.nanmean(self._data, axis=1, out=row_defau).reshape(-1, 1), \
27 |                               self._data.shape[1], axis=1)
28 |         self._data[self._missing] = row_means[self._missing]
29 |         self._data = np.nan_to_num(self._data)
30 | 
31 |         for i in range(n_iteration):
32 |             if self._method == 'ppca':           
33 |                 self._pca.fit(self._data, method=ppca_method)
34 |                 self._data[self._missing] = self._pca.inverse_transform(self._pca.transform(self._data, \
35 |                                             probabilistic), probabilistic)[self._missing]
36 |             else:
37 |                 self._pca.fit(self._data)
38 |                 self._data[self._missing] = self._pca.inverse_transform(self._pca.transform(self._data))[self._missing]
39 |             self._mse[i] = np.sum((cdata-self._data)**2)/cdata.shape[0]
40 |             if verbose and i % print_every == 0:
41 |                 print('Iter %d, MSE=%f' %(i, self._mse[i]))
42 |             # if np.abs(self._mse[i-1]-self._mse[i]) < 1e-6:
43 |             #     break
44 |         return self._data, self._mse


--------------------------------------------------------------------------------
/bpca_pymc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import pymc3 as pm
 4 | from theano.tensor.nlinalg import diag
 5 | 
 6 | 
 7 | class BPCA(object):
 8 | 
 9 |     def __init__(self, X, a_alpha=1e-3, b_alpha=1e-3, a_tau=1e-3, b_tau=1e-3, beta=1e-3):
10 |         # data, # of samples, dims
11 |         self.X = X
12 |         self.d = self.X.shape[1]
13 |         self.N = self.X.shape[0]
14 |         self.q = self.d-1
15 | 
16 |         # hyperparameters
17 |         self.a_alpha = a_alpha
18 |         self.b_alpha = b_alpha
19 |         self.a_tau = a_tau
20 |         self.b_tau = b_tau
21 |         self.beta = beta
22 | 
23 |         with pm.Model() as model:
24 |             z = pm.MvNormal('z', mu=np.zeros(self.q), cov=np.eye(self.q), shape=(self.N, self.q))
25 |             mu = pm.MvNormal('mu', mu=np.zeros(self.d), cov=np.eye(self.d)/self.beta, shape=self.d)
26 |             alpha = pm.Gamma('alpha', alpha=self.a_alpha, beta=self.b_alpha, shape=self.q)
27 |             w = pm.MatrixNormal('w', mu=np.zeros((self.d, self.q)), rowcov=np.eye(self.d), colcov=diag(1/alpha), shape=(self.d, self.q))
28 |             tau = pm.Gamma('tau', alpha=self.a_tau, beta=self.b_tau)
29 |             x = pm.math.dot(z, w.T) + mu
30 |             obs_x = pm.MatrixNormal('obs_x', mu=x, rowcov=np.eye(self.N), colcov=np.eye(self.d)/tau, shape=(self.N, self.d), observed=self.X)
31 | 
32 |         self.model = model
33 | 
34 |         
35 |     def fit(self, iters=10000):
36 |         with self.model:
37 |             inference = pm.ADVI()
38 |             approx = pm.fit(n=iters, method=inference)
39 |             trace = approx.sample(iters//2)
40 | 
41 |         # save
42 |         s = len(trace)//2
43 |         self.trace = trace
44 |         self.inference = inference
45 |         self.z = trace[s::]['z'].mean(axis=0)
46 |         self.mu = trace[s::]['mu'].mean(axis=0)
47 |         self.alpha = trace[s::]['alpha'].mean(axis=0)
48 |         self.w = trace[s::]['w'].mean(axis=0)
49 | 
50 | 
51 |     def transform(self):
52 |     	x = pm.sample_ppc(self.trace, 5000, model=self.model)
53 |     	return x['obs_x'].mean(axis=0)
54 | 
55 | 
56 |     def fit_transform(self, iters=10000):
57 |     	self.fit(iters)
58 |     	return self.transform()
59 | 
60 | 
61 |     def get_weight_matrix(self):
62 |     	return self.w
63 | 
64 | 
65 |     def get_inv_variance(self):
66 |     	return self.alpha
67 | 
68 | 
69 |     def get_elbos(self):
70 |     	return -self.inference.hist


--------------------------------------------------------------------------------
/ppca_demo.py:
--------------------------------------------------------------------------------
 1 | import numpy             as np
 2 | import matplotlib.pyplot as plt
 3 | from numpy.random       import multivariate_normal
 4 | from ppca               import PPCA
 5 | 
 6 | if __name__ == '__main__':
 7 |     
 8 |     cov  = np.diag([10, 9, 8, 7] + [1]*28 + [6, 5, 4, 3] + [1]*28)**2
 9 |     data = multivariate_normal(np.zeros(64), cov, 256)
10 |    
11 |     ppca1 = PPCA(n_dimension=4)
12 |     ppca1.fit(data, method='EM')
13 |     ppca2 = PPCA(n_dimension=4)
14 |     ppca2.fit(data, method='eig')
15 |     
16 |     print('\n\n\n\n**TEST FITTING THE COVARIANCE MATRIX**')
17 |     plt.matshow(cov);
18 |     print('\n\noriginal covariance matrix')
19 |     plt.show()
20 |     plt.matshow(ppca1._C);
21 |     print('\n\nfitted covariance matrix (fitted by EM)')
22 |     plt.show()
23 |     plt.matshow(ppca2._C);
24 |     print('\n\nfitted covariance matrix (fitted by eigen)')
25 |     plt.show()
26 |     
27 |     print('\n\n\n\n**TEST GENERATING DATA**')
28 |     plt.scatter(data[:, 0], data[:, 1], alpha=0.2);
29 |     print('\n\noriginal data (first 2 dimensions)')
30 |     plt.show()
31 |     gene = ppca1.generate(256)
32 |     plt.scatter(gene[:, 0], gene[:, 1], alpha=0.2);
33 |     print('\n\ngenerated data (first 2 dimensions) (fitted by EM)')
34 |     plt.show()
35 |     gene = ppca2.generate(256)
36 |     plt.scatter(gene[:, 0], gene[:, 1], alpha=0.2);
37 |     print('\n\ngenerated data (first 2 dimensions) (fitted by eigen)')
38 |     plt.show()
39 |     
40 |     print('\n\n\n\n**TEST CALCULATING LIKELIHOOD**')
41 |     ppca1 = PPCA(n_dimension=2)
42 |     loglikelihoods = ppca1.fit(data, method='EM', keep_loglikes=True)
43 |     plt.plot(loglikelihoods)
44 |     plt.show()
45 | 
46 |     print('\n\n\n\n**TEST DIMENSION REDUCTION AND RECOVERING**')
47 |     plt.matshow(data)
48 |     print('\n\noriginal data')
49 |     plt.show()
50 |     
51 |     ppca3 = PPCA(n_dimension=2)
52 |     ppca3.fit(data, method='EM')
53 |     plt.matshow( ppca3.inverse_transform( ppca3.transform(data) ) )
54 |     print('\n\nrecovered data: 2-component')
55 |     plt.show()
56 |     
57 |     ppca4 = PPCA(n_dimension=2)
58 |     ppca4.fit(data, batchsize=16, n_iteration=2000, method='EM')
59 |     plt.matshow( ppca4.inverse_transform( ppca4.transform(data) ) )
60 |     print('\n\nrecovered data: 2-component (mini-batch)')
61 |     plt.show()
62 |     
63 |     ppca5 = PPCA(n_dimension=63)
64 |     ppca5.fit(data, method='EM')
65 |     plt.matshow( ppca5.inverse_transform( ppca5.transform(data) ) )
66 |     print('\n\nrecovered data: 63-component')
67 |     plt.show()
68 |     
69 |     


--------------------------------------------------------------------------------
/ppca.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Probablistic Principal Component Analysis using the EM algorithm from Tipping & Bishop 1997.
  3 | (See http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.426.2749&rep=rep1&type=pdf)
  4 | """
  5 | 
  6 | import numpy as np
  7 | from numpy.random          import randn
  8 | from numpy.random          import normal
  9 | from numpy.random          import multivariate_normal
 10 | from numpy.linalg          import det
 11 | from numpy.linalg          import eig
 12 | from numpy.linalg          import pinv
 13 | from numpy.linalg          import multi_dot
 14 | from sklearn.exceptions    import NotFittedError
 15 | 
 16 | class PPCA(object):
 17 | 
 18 |     def __init__(self, n_dimension):
 19 |         
 20 |         # mapping from latent variables to observations: x = W z + mu + epsilon
 21 |         # where x: observation, x in R^(d)
 22 |         # q: dimension of latent space
 23 |         self._q = n_dimension 
 24 |     
 25 |     def fit(self, data, batchsize=None, n_iteration=500, method='EM', 
 26 |             keep_loglikes=False):
 27 |         
 28 |         if method not in ('EM', 'eig'):
 29 |             raise ValueError('unrecognized method.')
 30 |         if method == 'eig' and keep_loglikes: 
 31 |             raise ValueError('loglike not supported for eig method') 
 32 |         if method == 'eig' and batchsize is not None:
 33 |             raise ValueError('mini-batch not supported for eig method') 
 34 |         
 35 |         ####################### INITIALIZE OBSERVATIONS #######################
 36 |         # X: observations, X in R^(d*N), data assumed to be in R^(N*d)
 37 |         self._X = data.T
 38 |         # d: dimension of observations
 39 |         self._d = self._X.shape[0]
 40 |         # N: number of observations
 41 |         self._N = self._X.shape[1]
 42 |         # mu: mean of x, mu in R^(d)
 43 |         self._mu = np.mean(self._X, axis=1).reshape(-1, 1)
 44 |         
 45 |         ##################### INITIALIZE LATENT VARIABLES #####################     
 46 |         # W: linear transformation matrix, W in R^(d*q)
 47 |         self._W      = randn(self._d, self._q)
 48 |         # epsilon: Gaussian noise, epsilon in R^(d), epsilon ~ N(0, sigma^2 I)
 49 |         self._sigma2 = 0
 50 |         # C: covariance matrix of observation, x ~ N(mu, C)
 51 |         self._update_C()
 52 |         
 53 |         loglikes = [] if keep_loglikes else None
 54 |         
 55 |         if   method == 'EM':
 56 |             loglikes = self._fit_EM(batchsize, n_iteration, keep_loglikes)
 57 |         else: #method == 'eig'
 58 |             self._fit_eig(n_iteration) 
 59 |         
 60 |         return loglikes
 61 |     
 62 |     def transform(self, data_observ, probabilistic=False):
 63 |         """transform the observations into the latent space, when probabilistic
 64 |         set to True, will draw a sample from the posterior distribution of the
 65 |         latent variable"""
 66 |         assert len(data_observ.shape) == 2
 67 |         invM = pinv(self._calc_M())
 68 |         expect_data_latent = multi_dot([invM, self._W.T, 
 69 |                                         data_observ.T - self._mu])
 70 |         assert expect_data_latent.shape == (self._q, len(data_observ))
 71 |         if probabilistic: 
 72 |             cov   = np.dot(self._sigma2, invM)
 73 |             data_latent = np.zeros(shape=(len(data_observ), self._q))
 74 |             for i in range(len(data_observ)):
 75 |                 data_latent[i] = multivariate_normal(expect_data_latent[:, i].flatten(), cov)
 76 |             return data_latent
 77 |         else:
 78 |             return expect_data_latent.T
 79 |     
 80 |     def inverse_transform(self, data_latent, probabilistic=False):
 81 |         """transform the latent variable into observations, when probabilistic
 82 |         set to True, will draw a sample from the distribution of the observations"""
 83 |         assert len(data_latent.shape) == 2
 84 |         expect_data_observ = np.dot(self._W, data_latent.T) + self._mu
 85 |         if probabilistic:
 86 |             return (expect_data_observ
 87 |                    + normal(scale=np.sqrt(self._sigma2), size=expect_data_observ.shape)).T
 88 |         else:
 89 |             return expect_data_observ.T
 90 |         
 91 |     def generate(self, n_sample):
 92 |         """generate samples from the fitted model"""
 93 |         try:
 94 |             return multivariate_normal(self._mu.flatten(), self._C, n_sample)
 95 |         except:
 96 |             raise NotFittedError('This PPCA instance is not fitted yet. Call \'fit\' with appropriate arguments before using this method.')
 97 |     
 98 |     def calc_components(self):
 99 |         """generate an orthonormal basis from the fitted model"""
100 |         vals, vecs = eig(np.dot(self._W.T, self._W))
101 |         return np.dot( self._W, pinv(np.dot(np.diag(vals**0.5), vecs.T)) ).T
102 |     ######################## FITTING BY EM ALGORITHM ##########################
103 |     def _fit_EM(self, batchsize, n_iteration=500, keep_loglikes=False):
104 |         
105 |         if batchsize is not None and batchsize > self._N:
106 |             raise ValueError('batchsize exceeds number of observations') 
107 |         
108 |         loglikes = [] if keep_loglikes else None
109 |         
110 |         for i in range(n_iteration):
111 |             # E-step: Estimation   (omitted)
112 |             # M-step: Maximization
113 |             if batchsize is not None:
114 |                 idx = self.batch_idx(i, batchsize)
115 |                 Xb  = self._X[:, idx]
116 |                 self._maximize_L(Xb, np.mean(Xb, axis=1).reshape(-1, 1))
117 |             else:
118 |                 self._maximize_L(self._X, self._mu)
119 |                 
120 |             if keep_loglikes:
121 |                 loglikes.append(self._calc_loglike(self._X, self._mu))
122 |         
123 |         return loglikes
124 |     
125 |     def _maximize_L(self, X, mu):
126 |         S = self._calc_S(X, mu)
127 |         M = self._calc_M()
128 |         self._update_W(S, M)
129 |         self._update_sigma2(S, M)
130 |         self._update_C()
131 |     
132 |     def _update_W(self, S, M):
133 |         temp = pinv( self._sigma2 * np.eye(self._q) \
134 |                      + multi_dot([ pinv(M), self._W.T, S, self._W]) )
135 |         self._W = multi_dot([ S, self._W, temp ])
136 |     
137 |     def _update_sigma2(self, S, M):
138 |         temp = multi_dot([ S, self._W, pinv(M), self._W.T ])
139 |         self._sigma2 = 1/self._d * np.trace(S - temp)    
140 |     
141 |     ##################### FITTING BY EIGENDECOMPOSITION #######################
142 |     def _fit_eig(self, n_iteration=500):
143 |         
144 |         S = self._calc_S(self._X, self._mu)
145 |         vals, vecs = eig(S)
146 |         vals, vecs = vals.real, vecs.real
147 |         ordbydom = np.argsort(vals)[::-1]
148 |         topq_dom = ordbydom[:self._q]
149 |         less_dom = ordbydom[self._q:]
150 |         self._sigma2 = np.sum(vals[less_dom]) / (self._d - self._q)
151 |         self._W = np.dot( vecs[:, topq_dom], 
152 |                           np.sqrt(np.diag(vals[topq_dom])-self._sigma2*np.eye(self._q)) )
153 |         self._update_C()
154 |     
155 |     ########################### UTILITY FUNCTIONS #############################
156 |     def _calc_S(self, X, mu):
157 |         """calculate the covariance matrix of observations X"""
158 |         centeredX = X - mu
159 |         return np.dot(centeredX, centeredX.T) / X.shape[1]
160 |     
161 |     def _calc_M(self):
162 |         return self._sigma2 * np.eye(self._q) + np.dot(self._W.T, self._W)
163 |     
164 |     def _calc_loglike(self, X, mu):
165 |         """calculate the loglikelihood of observing data X"""
166 |         return -self._N/2 * (self._d*np.log(2*np.pi) \
167 |                + np.log(det(self._C)) \
168 |                + np.trace(np.dot(pinv(self._C), self._calc_S(X, mu.reshape(-1,1)))))
169 |       
170 |     def _update_C(self):
171 |         self._C = self._sigma2 * np.eye(self._d) + np.dot(self._W, self._W.T)
172 |         
173 |     def batch_idx(self, i, batchsize):
174 |         if batchsize == self._N:
175 |             return np.arange(self._N)
176 |         idx1 = (i*batchsize)     % self._N
177 |         idx2 = ((i+1)*batchsize) % self._N
178 |         if idx2 < idx1:    idx1 -= self._N
179 |         return np.arange(idx1, idx2)


--------------------------------------------------------------------------------
/bpca.py:
--------------------------------------------------------------------------------
  1 | # https://pdfs.semanticscholar.org/a1fb/a67f147b16e3c4bffdab3cc6f17520c74547.pdf
  2 | 
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | import scipy
  6 | from scipy.stats import multivariate_normal as mvn
  7 | from scipy.stats import gamma
  8 | 
  9 | class BPCA(object):
 10 | 
 11 |     def __init__(self, a_alpha=1e-3, b_alpha=1e-3, a_tau=1e-3, b_tau=1e-3, beta=1e-3):
 12 |         # hyperparameters
 13 |         self.a_alpha = a_alpha # parameter of alpha's prior (a Gamma distribution)
 14 |         self.b_alpha = b_alpha # parameter of alpha's prior (a Gamma distribution)
 15 |         self.a_tau = a_tau     # parameter of tau's prior (a Gamma distribution)
 16 |         self.b_tau = b_tau     # parameter of tau's prior (a Gamma distribution)
 17 |         self.beta = beta
 18 |         # history of ELBOS
 19 |         self.elbos = None
 20 |         self.variations = None
 21 |         # history of log likelihoods
 22 |         self.loglikelihoods = None
 23 | 
 24 | 
 25 |     def update(self):
 26 |         """fixed-point update of the Bayesian PCA"""
 27 |         # inverse of the sigma^2
 28 |         self.tau = self.a_tau_tilde / self.b_tau_tilde
 29 |         # hyperparameters controlling the magnitudes of each column of the weight matrix
 30 |         self.alpha = self.a_alpha_tilde / self.b_alpha_tilde
 31 |         # covariance matrix of the latent variables
 32 |         self.cov_z = np.linalg.inv(np.eye(self.q) + self.tau *
 33 |                         (np.trace(self.cov_w) + np.dot(self.mean_w.T, self.mean_w)))
 34 |         # mean of the latent variable
 35 |         self.mean_z = self.tau * np.dot(np.dot(self.cov_z, self.mean_w.T), self.Xb - self.mean_mu)
 36 |         # covariance matrix of the mean observation
 37 |         self.cov_mu = np.eye(self.d) / (self.beta + self.b * self.tau)
 38 |         # mean of the mean observation
 39 |         self.mean_mu = self.tau * np.dot(self.cov_mu, np.sum(self.Xb-np.dot(self.mean_w,
 40 |                         self.mean_z), axis=1)).reshape(self.d, 1)
 41 |         # covariance matrix of each column of the weight matrix
 42 |         self.cov_w = np.linalg.inv(np.diag(self.alpha) + self.tau *
 43 |                         (self.b * self.cov_z + np.dot(self.mean_z, self.mean_z.T)))
 44 |         # mean of each column of the weight matrix
 45 |         self.mean_w = self.tau * np.dot(self.cov_w, np.dot(self.mean_z, (self.Xb-self.mean_mu).T)).T
 46 |         # estimation of the b in alpha's Gamma distribution
 47 |         self.b_alpha_tilde = self.b_alpha + 0.5 * (np.trace(self.cov_w) +
 48 |                         np.diag(np.dot(self.mean_w.T, self.mean_w)))
 49 |         # estimation of the b in tau's Gamma distribution
 50 |         self.b_tau_tilde = self.b_tau + 0.5 * np.trace(np.dot(self.Xb.T, self.Xb)) + \
 51 |                         0.5 * self.b*(np.trace(self.cov_mu)+np.dot(self.mean_mu.flatten(), self.mean_mu.flatten())) + \
 52 |                         0.5 * np.trace(np.dot(np.trace(self.cov_w)+np.dot(self.mean_w.T, self.mean_w),
 53 |                                         self.b*self.cov_z+np.dot(self.mean_z, self.mean_z.T))) + \
 54 |                         np.sum(np.dot(np.dot(self.mean_mu.flatten(), self.mean_w), self.mean_z)) + \
 55 |                         -np.trace(np.dot(self.Xb.T, np.dot(self.mean_w, self.mean_z))) + \
 56 |                         -np.sum(np.dot(self.Xb.T, self.mean_mu))
 57 |         
 58 | 
 59 |     def calculate_log_likelihood(self):
 60 |         """calculate the log likelihood of observing self.X"""
 61 |         w = self.mean_w
 62 |         c = np.eye(self.d)*self.tau + np.dot(w, w.T) 
 63 |         xc = self.X - self.X.mean(axis=1).reshape(-1,1)
 64 |         s = np.dot(xc, xc.T) / self.N
 65 |         self.s = s
 66 |         c_inv_s = scipy.linalg.lstsq(c, s)[0]
 67 |         loglikelihood = -0.5*self.N*(self.d*np.log(2*np.pi)+np.log(np.linalg.det(c))+np.trace(c_inv_s))
 68 |         return loglikelihood
 69 | 
 70 | 
 71 |     def calculate_ELBO(self):
 72 |         '''ELBO = E_q[-log(q(theta))+log(p(theta)+log(p(Y|theta,X)))]
 73 |                 = -entropy + logprior + loglikelihood '''
 74 | 
 75 |         # random sample
 76 |         z = np.array([np.random.multivariate_normal(self.mean_z[:,i], self.cov_z) for i in range(self.b)]).T
 77 |         mu = np.random.multivariate_normal(self.mean_mu.flatten(), self.cov_mu)
 78 |         w = np.array([np.random.multivariate_normal(self.mean_w[i], self.cov_w) for i in range(self.d)])
 79 |         alpha = np.random.gamma(self.a_alpha_tilde, 1/self.b_alpha_tilde)
 80 |         tau = np.random.gamma(self.a_tau_tilde, 1/self.b_tau_tilde)
 81 | 
 82 |         # entropy
 83 |         # q(z)
 84 |         entropy = np.sum(np.array([mvn.logpdf(z[:,i], self.mean_z[:,i], self.cov_z) for i in range(self.b)]))
 85 | 
 86 |         # q(mu)
 87 |         entropy += mvn.logpdf(mu, self.mean_mu.flatten(), self.cov_mu)
 88 | 
 89 |         # q(W)
 90 |         entropy += np.sum(np.array([mvn.logpdf(w[i], self.mean_w[i], self.cov_w) for i in range(self.d)]))
 91 | 
 92 |         # q(alpha)
 93 |         entropy += np.sum(gamma.logpdf(alpha, self.a_alpha_tilde, scale=1/self.b_alpha_tilde))
 94 | 
 95 |         # q(tau)
 96 |         entropy += gamma.logpdf(tau, self.a_tau_tilde, scale=1/self.b_tau_tilde)
 97 | 
 98 |         # logprior
 99 |         # p(z), z ~ N(0, I)
100 |         logprior = np.sum(np.array([mvn.logpdf(z[:,i], mean=np.zeros(self.q), cov=np.eye(self.q)) for i in range(self.b)]))
101 | 
102 |         # p(w|alpha), conditional gaussian
103 |         logprior += np.sum(np.array([self.d/2*np.log(alpha[i]/(2*np.pi))-alpha[i]*np.sum(w[:,i]**2)/2 for i in range(self.q)]))
104 | 
105 |         # p(alpha), alpha[i] ~ Gamma(a, b)
106 |         logprior += np.sum(gamma.logpdf(alpha, self.a_alpha, scale=1/self.b_alpha))
107 | 
108 |         # p(mu), mu ~ N(0, I/beta)
109 |         logprior += mvn.logpdf(mu, mean=np.zeros(self.d), cov=np.eye(self.d)/self.beta)
110 | 
111 |         # p(tau), tau ~ Gamma(c, d)
112 |         logprior += gamma.logpdf(tau, self.a_tau, scale=1/self.b_tau)
113 | 
114 |         # loglikelihood
115 |         pred = np.dot(w, z) + mu.reshape(-1,1)
116 |         loglikelihood = np.sum(np.array([mvn.logpdf(self.Xb[:,i], pred[:,i], np.eye(self.d)/tau) for i in range(self.b)]))
117 | 
118 |         return -entropy + logprior + loglikelihood
119 | 
120 | 
121 |     def batch_idx(self, i):
122 |         if self.b == self.N:
123 |             return np.arange(self.N)
124 |         idx1 = (i*self.b) % self.N
125 |         idx2 = ((i+1)*self.b) % self.N
126 |         if idx2 < idx1:
127 |             idx1 -= self.N
128 |         return np.arange(idx1, idx2)
129 | 
130 | 
131 |     def fit(self, X=None, batch_size=128, iters=500, print_every=100, verbose=False, trace_elbo=False, trace_loglikelihood=False):
132 |         """fit the Bayesian PCA model using fixed-point update"""
133 |          # data, # of samples, dims
134 |         self.X = X.T # don't need to transpose X when passing it
135 |         self.d = self.X.shape[0]
136 |         self.N = self.X.shape[1]
137 |         self.q = self.d-1
138 |         self.ed = []
139 |         self.b = min(batch_size, self.N)
140 | 
141 |         # variational parameters
142 |         self.mean_z = np.random.randn(self.q, self.b) # latent variable
143 |         self.cov_z = np.eye(self.q)
144 |         self.mean_mu = np.random.randn(self.d, 1)
145 |         self.cov_mu = np.eye(self.d)
146 |         self.mean_w = np.random.randn(self.d, self.q)
147 |         self.cov_w = np.eye(self.q)
148 |         self.a_alpha_tilde = self.a_alpha + self.d/2
149 |         self.b_alpha_tilde = np.abs(np.random.randn(self.q))
150 |         self.a_tau_tilde = self.a_tau + self.b * self.d / 2
151 |         self.b_tau_tilde = np.abs(np.random.randn(1))
152 | 
153 |         # update
154 |         order = np.arange(self.N)
155 |         elbos = np.zeros(iters)
156 |         loglikelihoods = np.zeros(iters)
157 |         for i in range(iters):
158 |             idx = order[self.batch_idx(i)]
159 |             self.Xb = self.X[:,idx]
160 |             self.update()
161 |             if trace_elbo:
162 |                 elbos[i] = self.calculate_ELBO()
163 |             if trace_loglikelihood:
164 |                 loglikelihoods[i] = self.calculate_log_likelihood()
165 |             if verbose and i % print_every == 0:
166 |                 print('Iter %d, LL: %f, alpha: %s' % (i, loglikelihoods[i], str(self.alpha)))
167 |         self.captured_dims()
168 |         self.elbos = elbos if trace_elbo else None
169 |         self.loglikelihoods = loglikelihoods if trace_loglikelihood else None
170 | 
171 | 
172 |     def captured_dims(self):
173 |         """return the number of captured dimensions"""
174 |         sum_alpha = np.sum(1/self.alpha)
175 |         self.ed = np.array([i for i, inv_alpha in enumerate(1/self.alpha) if inv_alpha < sum_alpha/self.q])
176 | 
177 | 
178 |     def transform(self, X=None, full=True):
179 |         """generate samples from the fitted model"""
180 |         X = self.X if X is None else X.T
181 |         if full:
182 |             w = self.mean_w
183 |             l = self.q
184 |         else:
185 |             w = self.mean_w[:,ed]
186 |             l = len(self.ed)
187 |         m = np.eye(l)*self.tau + np.dot(w.T, w)
188 |         inv_m = np.linalg.inv(m)
189 |         z = np.dot(np.dot(inv_m, w.T), X - self.mean_mu)
190 |         return z.T
191 |         # return np.array([np.random.multivariate_normal(z[:,i], inv_m*self.tau) for i in range(X.shape[1])])
192 | 
193 | 
194 |     def inverse_transform(self, z, full=True):
195 |         """transform the latent variable into observations"""
196 |         z = z.T
197 |         if full:
198 |             w = self.mean_w
199 |         else:
200 |             w = self.mean_w[:,ed]
201 |         x = np.dot(w, z) + self.mean_mu
202 |         return x.T
203 |         # return np.array([np.random.multivariate_normal(x[:,i], np.eye(self.d)*self.tau) for i in range(z.shape[1])])
204 | 
205 | 
206 |     def fit_transform(self, X=None, batch_size=128, iters=500, print_every=100, verbose=False, trace_elbo=False, trace_loglikelihood=False):
207 |         self.fit(X, batch_size, iters, print_every, verbose, trace_elbo)
208 |         return self.transform()
209 | 
210 | 
211 |     def generate(self, size=1):
212 |         """generate samples from the fitted model"""
213 |         w = self.mean_w[:, self.ed]
214 |         c = np.eye(self.d)*self.tau + np.dot(w, w.T)
215 |         return np.array([np.random.multivariate_normal(self.mean_mu.flatten(), c) for i in range(size)])
216 | 
217 | 
218 |     def get_weight_matrix(self):
219 |         return self.mean_w
220 | 
221 | 
222 |     def get_inv_variance(self):
223 |         return self.alpha
224 | 
225 | 
226 |     def get_effective_dims(self):
227 |         return len(self.ed)
228 | 
229 | 
230 |     def get_cov_mat(self):
231 |         w = self.mean_w[:, self.ed]
232 |         c = np.eye(self.d)*self.tau + np.dot(w, w.T) 
233 |         return c
234 | 
235 | 
236 |     def get_elbo(self):
237 |         return self.elbos
238 | 
239 | 
240 |     def get_loglikelihood(self):
241 |         return self.loglikelihoods
242 | 
243 | 


--------------------------------------------------------------------------------