├── LICENSE
├── README.md
├── datagenp_30ktr_5rep_1.mat
├── gendat.m
└── spfactcovest_mgploadings.m


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 David B. Dunson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Sparse Bayesian Infinite Factor Models
 2 | 
 3 | We focus on sparse modelling of high-dimensional covariance matrices using Bayesian latent
 4 | factor models. We propose a multiplicative gamma process shrinkage prior on the factor loadings
 5 | which allows introduction of infinitely many factors, with the loadings increasingly shrunk
 6 | towards zero as the column index increases. We use our prior on a parameter-expanded loading
 7 | matrix to avoid the order dependence typical in factor analysis models and develop an efficient
 8 | Gibbs sampler that scales well as data dimensionality increases. The gain in efficiency is achieved
 9 | by the joint conjugacy property of the proposed prior, which allows block updating of the loadings
10 | matrix. We propose an adaptive Gibbs sampler for automatically truncating the infinite loading
11 | matrix through selection of the number of important factors. Theoretical results are provided
12 | on the support of the prior and truncation approximation bounds. A fast algorithm is proposed
13 | to produce approximate Bayes estimates. Latent factor regression methods are developed for
14 | prediction and variable selection in applications with high-dimensional correlated predictors.
15 | Operating characteristics are assessed through simulation studies, and the approach is applied to
16 | predict survival times from gene expression data.
17 | 
18 | A. Bhattacharya and D. B. Dunson (2011). Sparse Bayesian Infinite Factor Models. *Biometrika* 98(2), pp. 291–306
19 | 
20 | `gendat.m`  generates simulated data
21 | 
22 | `spfactcovest_mgploadings.m` implements the Gibbs sampler in bka paper
23 | 


--------------------------------------------------------------------------------
/datagenp_30ktr_5rep_1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-dunson/sparse_bayesian_infinite_factor_models/3aaef5bc69eff799877fcd1098eea94404470f1f/datagenp_30ktr_5rep_1.mat


--------------------------------------------------------------------------------
/gendat.m:
--------------------------------------------------------------------------------
 1 | % Anirban Bhattacharya
 2 | 
 3 | % generate covariance matrix from a factor structure 
 4 | % with sparse loadings
 5 | 
 6 | 
 7 | clear; clc; 
 8 | 
 9 | % n = # samples, p = # vars.
10 | % k = # factors, rep = # simulation replictes
11 | rep = 1;n = 200; N = n*rep;
12 | p = 30; k = 5;   
13 | 
14 | % Lambda = loadings, numeff = # non-zero entries in each column of Lambda
15 | % numeff varies between (k+1) to 2*k
16 | 
17 | Lambda = zeros(p,k);
18 | numeff = k + randperm(k); 
19 | 
20 | % generate loadings
21 | for h = 1:k
22 |     temp = randsample(p,numeff(h));
23 |     Lambda(temp,h) = normrnd(0,1,[numeff(h),1]);
24 | end
25 | 
26 | % generative model: N(0, Lambda Lambda' + sigma^2 I)
27 | mu = zeros(1,p);
28 | Ot = Lambda*Lambda' + 0.01* eye(p);
29 | 
30 | % dat: N x p data matrix
31 | % rows of dat: random vectors drawn from covariance Ot
32 | dat =  mvnrnd(mu,Ot,N); 
33 | 
34 | ktr = k; rktr = rank(Lambda); Lamtr = Lambda;
35 | 
36 | save(strcat('datagenp_',num2str(p),'ktr_',num2str(k),'rep_',num2str(rep)),...
37 |     'dat','Ot','rep','n','p','ktr','rktr','Lamtr');
38 | 
39 |  


--------------------------------------------------------------------------------
/spfactcovest_mgploadings.m:
--------------------------------------------------------------------------------
  1 | % -- Anirban Bhattacharya -- %
  2 | 
  3 | % Gibbs sampler for covariance estimation
  4 | % using mgps prior on factor loadings
  5 | 
  6 | % lambda_{jh} \sim N(0, psi_{jh}^{-1} tau_h^{-1})
  7 | 
  8 | 
  9 | clear;clc;close all; tic;
 10 | 
 11 | load datagenp_30ktr_5rep_1;
 12 | 
 13 | % --- define global constants --- %
 14 | nrun=20000; burn=5000; thin=1; sp =(nrun - burn)/thin; % number of posterior samples
 15 | 
 16 | kinit = repmat(floor(log(p)*3),rep,1);                 % number of factors to start with
 17 | b0 = 1; b1 = 0.0005;
 18 | epsilon = 1e-3;                                        % threshold limit
 19 | prop = 1.00;                                           % proportion of redundant elements within columns
 20 | 
 21 | 
 22 | %---- define output files across replicates-----%
 23 | 
 24 | mserep = zeros(rep,3);                          % mse,absolute bias(avg and maximum) in estimating cov matrix
 25 | mse1rep = zeros(rep,3);                         % same as above in original scale in estimating cov matrix
 26 | nofrep = zeros(rep,sp);                         % evolution of factors across replicates
 27 | adrep = zeros(rep,1);                           % number of adaptations across replicates
 28 | 
 29 | for g = 1:rep
 30 | 
 31 |     disp(['start replicate','',num2str(g)]);
 32 |     disp('--------------------');
 33 | 
 34 |     % ------read data--------%
 35 |     Y = dat((g-1)*n + 1:g*n,:);                 % n x p data matrix
 36 |     M=mean(Y);VY=var(Y);
 37 |     Y = bsxfun(@minus,Y,M);                     % center the training data
 38 |     Y = bsxfun(@times,Y,1./sqrt(VY));           % scale the training data
 39 | 
 40 |     Ot1 = Ot.*(1./sqrt(VY'*VY));                % true dispersion matrix of the transformed data
 41 |     num = 0;
 42 |     k=kinit(g);                                 % no. of factors to start with
 43 | 
 44 |     % ------end read data--------%
 45 | 
 46 |     % --- Define hyperparameter values --- %
 47 |     
 48 |     as = 1;bs = 0.3;                           % gamma hyperparameters for residual precision
 49 |     df = 3;                                    % gamma hyperparameters for t_{ij}
 50 |     ad1 = 2.1;bd1 = 1;                         % gamma hyperparameters for delta_1
 51 |     ad2 = 3.1;bd2 = 1;                         % gamma hyperparameters delta_h, h >= 2
 52 |     adf = 1; bdf = 1;                          % gamma hyperparameters for ad1 and ad2 or df
 53 | 
 54 |     % --- Initial values --- %
 55 |     ps=gamrnd(as,1/bs,p,1); Sigma=diag(1./ps);                  % Sigma = diagonal residual covariance
 56 |     Lambda = zeros(p,k);  ta =  normrnd(0,1,[n,k]);             % factor loadings & latent factors
 57 |     meta = zeros(n,k); veta = eye(k);                           % latent factor distribution = standrad normal
 58 |                                 
 59 |     psijh = gamrnd(df/2,2/df,[p,k]);                            % local shrinkage coefficients
 60 |     delta = ...
 61 |         [gamrnd(ad1,bd1);gamrnd(ad2,bd2,[k-1,1])];              % gobal shrinkage coefficients multilpliers
 62 |     tauh = cumprod(delta);                                      % global shrinkage coefficients
 63 |     Plam = bsxfun(@times,psijh,tauh');                          % precision of loadings rows
 64 | 
 65 |     % --- Define output files specific to replicate --- %
 66 |     nofout = zeros(nrun+1,1);                  % number of factors across iteartions
 67 |     nofout(1) = k;
 68 |     nof1out = zeros(sp,1);
 69 |     mseout = zeros(sp,3);                      % within a replicate, stores mse across mcmc iterations
 70 |     mse1out = zeros(sp,3);                     % mse in original scale
 71 |     Omegaout = zeros(p^2,1);
 72 |     Omega1out = zeros(p^2,1);
 73 | 
 74 | 
 75 | 
 76 | 
 77 |     %------start gibbs sampling-----%
 78 | 
 79 |     for i = 1:nrun
 80 | 
 81 |         % -- Update eta -- %
 82 |         Lmsg = bsxfun(@times,Lambda,ps);
 83 |         Veta1 = eye(k) + Lmsg'*Lambda;
 84 |         T = cholcov(Veta1); [Q,R] = qr(T);
 85 |         S = inv(R); Veta = S*S';                   % Veta = inv(Veta1)
 86 |         Meta = Y*Lmsg*Veta;                        % n x k 
 87 |         eta = Meta + normrnd(0,1,[n,k])*S';        % update eta in a block
 88 | 
 89 |         % -- update Lambda (rue & held) -- %
 90 |         eta2 = eta'*eta;
 91 |         for j = 1:p
 92 |             Qlam = diag(Plam(j,:)) + ps(j)*eta2; blam = ps(j)*(eta'*Y(:,j));
 93 |             Llam = chol(Qlam,'lower'); zlam = normrnd(0,1,k,1);
 94 |             vlam = Llam\blam; mlam = Llam'\vlam; ylam = Llam'\zlam;
 95 |             Lambda(j,:) = (ylam + mlam)';
 96 |         end
 97 | 
 98 |         %------Update psi_{jh}'s------%
 99 |         psijh = gamrnd(df/2 + 0.5,1./(df/2 + bsxfun(@times,Lambda.^2,tauh')));
100 | 
101 |         %------Update delta & tauh------%
102 |         mat = bsxfun(@times,psijh,Lambda.^2);
103 |         ad = ad1 + 0.5*p*k; bd = bd1 + 0.5*(1/delta(1))*sum(tauh.*sum(mat)');
104 |         delta(1) = gamrnd(ad,1/bd);
105 |         tauh = cumprod(delta);
106 | 
107 |         for h = 2:k
108 |             ad = ad2 + 0.5*p*(k-h+1); bd = bd2 + 0.5*(1/delta(h))*sum(tauh(h:end).*sum(mat(:,h:end))');
109 |             delta(h) = gamrnd(ad,1/bd); tauh = cumprod(delta);
110 |         end
111 | 
112 |         % -- Update Sigma -- %
113 |         Ytil = Y - eta*Lambda'; 
114 |         ps=gamrnd(as + 0.5*n,1./(bs+0.5*sum(Ytil.^2)))';
115 |         Sigma=diag(1./ps);
116 | 
117 |         %---update precision parameters----%
118 |         Plam = bsxfun(@times,psijh,tauh');
119 | 
120 |         % ----- make adaptations ----%
121 |         prob = 1/exp(b0 + b1*i);                % probability of adapting
122 |         uu = rand;
123 |         lind = sum(abs(Lambda) < epsilon)/p;    % proportion of elements in each column less than eps in magnitude
124 |         vec = lind >=prop;num = sum(vec);       % number of redundant columns
125 | 
126 |         if uu < prob
127 |             if  i > 20 && num == 0 && all(lind < 0.995)
128 |                 k = k + 1;
129 |                 Lambda(:,k) = zeros(p,1);
130 |                 eta(:,k) = normrnd(0,1,[n,1]);
131 |                 psijh(:,k) = gamrnd(df/2,2/df,[p,1]);
132 |                 delta(k) = gamrnd(ad2,1/bd2);
133 |                 tauh = cumprod(delta);
134 |                 Plam = bsxfun(@times,psijh,tauh');
135 |             elseif num > 0
136 |                 nonred = setdiff(1:k,find(vec)); % non-redundant loadings columns
137 |                 k = max(k - num,1);
138 |                 Lambda = Lambda(:,nonred);
139 |                 psijh = psijh(:,nonred);
140 |                 eta = eta(:,nonred);
141 |                 delta = delta(nonred);
142 |                 tauh = cumprod(delta);
143 |                 Plam = bsxfun(@times,psijh,tauh');
144 |             end
145 |         end
146 |         nofout(i+1) = k;
147 | 
148 |         % -- save sampled values (after thinning) -- %
149 |         if mod(i,thin)==0 && i > burn
150 |             Omega = Lambda*Lambda' + Sigma;
151 |             Omega1 = Omega .* sqrt(VY'*VY);
152 |             Omegaout = Omegaout + Omega(:)/sp;
153 |             Omega1out = Omega1out + Omega1(:)/sp;
154 |             nof1out((i-burn)/thin) = (nofout((i-burn)/thin) - num)*(num > 0);
155 |         end
156 | 
157 |         if mod(i,1000) == 0
158 |             disp(i);
159 |         end
160 |     end
161 | 
162 | 
163 |     %---- summary measures specifi to replicate ---%
164 |     %1. covariance matrix estimation
165 |     errcov = Omegaout - Ot1(:); err1cov = Omega1out - Ot(:);
166 |     mserep(g,:) = [mean(errcov.^2) mean(abs(errcov)) max(abs(errcov))];
167 |     mse1rep(g,:) = [mean(err1cov.^2) mean(abs(err1cov)) max(abs(err1cov))];
168 | 
169 |     %w. evolution of factors
170 |     nofrep(g,:) = nof1out';
171 | 
172 |     disp(['end replicate','',num2str(g)]);
173 |     disp('--------------------');
174 | 
175 | 
176 | 
177 | end
178 | 
179 | toc;
180 | 
181 | 


--------------------------------------------------------------------------------