├── README.md
├── lme
    ├── code
    │   ├── analyze_result.R
    │   ├── calc_wasp_cov_2d_k10.m
    │   ├── calc_wasp_cov_2d_k20.m
    │   ├── callLpSolver.m
    │   ├── comp_lme.stan
    │   ├── comp_sampler.R
    │   ├── create_samples.R
    │   ├── full_lme.stan
    │   ├── mcmc_sampler.R
    │   ├── recoverSolution.m
    │   ├── simulate_data.R
    │   ├── submit.R
    │   ├── variational_bayes.R
    │   ├── wasp_lme.stan
    │   └── wasp_sampler.R
    └── qsub
    │   ├── bbvb.q
    │   ├── comp10.q
    │   ├── comp20.q
    │   ├── comp_cov10.q
    │   ├── comp_cov20.q
    │   ├── comp_marg10.q
    │   ├── comp_marg20.q
    │   ├── mcmc.q
    │   ├── vb.q
    │   ├── wasp10.q
    │   ├── wasp20.q
    │   ├── wasp_cov10.q
    │   └── wasp_cov20.q
├── mixtures
    ├── code
    │   ├── analyze_result.R
    │   ├── calc_wasp10.m
    │   ├── calc_wasp5.m
    │   ├── callLpSolver.m
    │   ├── comp_sampler.R
    │   ├── create_samples.R
    │   ├── full_sampler.R
    │   ├── recoverSolution.m
    │   ├── simulate_data.R
    │   ├── submit.R
    │   ├── vb_sampler.R
    │   └── wasp_sampler.R
    ├── data
    │   ├── mix_100k.rds
    │   ├── wasp_mix_100k_k10.rds
    │   └── wasp_mix_100k_k5.rds
    ├── qsub
    │   ├── calcWasp10_1.q
    │   ├── calcWasp10_2.q
    │   ├── calcWasp5_1.q
    │   ├── calcWasp5_2.q
    │   ├── comp10.q
    │   ├── comp5.q
    │   ├── compDens.q
    │   ├── compRho.q
    │   ├── full.q
    │   ├── sub10.q
    │   ├── sub5.q
    │   └── vb.q
    └── result
    │   ├── accMu.rds
    │   ├── accRho.rds
    │   ├── corrList.rds
    │   ├── densList.rds
    │   ├── muList.rds
    │   ├── subMuList.rds
    │   ├── subMuList10.rds
    │   ├── subMuList5.rds
    │   ├── subRhoList.rds
    │   ├── subRhoList10.rds
    │   └── subRhoList5.rds
├── ml
    ├── code
    │   ├── analyze_result.R
    │   ├── calc_wasp_cov_2d_k10.m
    │   ├── callLpSolver.m
    │   ├── comp_lme.stan
    │   ├── comp_sampler.R
    │   ├── create_samples.R
    │   ├── full_lme.stan
    │   ├── import_data.R
    │   ├── mcmc_sampler.R
    │   ├── recoverSolution.m
    │   ├── submit.R
    │   ├── variational_bayes.R
    │   ├── wasp_lme.stan
    │   └── wasp_sampler.R
    ├── data
    │   ├── dataSel.rds
    │   ├── ml_test.rds
    │   ├── ml_train.rds
    │   ├── test_idx.rds
    │   ├── train_idx.rds
    │   └── wasp_ml_train.rds
    └── qsub
    │   ├── bbvb.q
    │   ├── comp10.q
    │   ├── comp_joint_cov.q
    │   ├── comp_marg.q
    │   ├── mcmc.q
    │   ├── vb.q
    │   ├── wasp10.q
    │   └── wasp_joint_cov.q
└── parafac
    ├── code
        ├── analyze_result.R
        ├── callLpSolver.m
        ├── obtain_samples.m
        ├── parafac_dx_com.m
        ├── parafac_dx_sub.m
        ├── recoverSolution.m
        ├── simulate_data.m
        ├── submit.R
        ├── submit_parafac_full.m
        ├── submit_parafac_sub10.m
        ├── submit_parafac_sub10_comp.m
        ├── submit_parafac_sub5.m
        └── submit_parafac_sub5_comp.m
    ├── qsub
        ├── comp10.q
        ├── comp5.q
        ├── comp_marg10.q
        ├── comp_marg5.q
        ├── full.q
        ├── wasp10.q
        └── wasp5.q
    └── result
        └── img
            ├── para_time.pdf
            └── supp_marg_para.pdf


/README.md:
--------------------------------------------------------------------------------
 1 | * Organization
 2 |   - There are four directories, each correspond to a subsection in the experiments section of the paper: lme (linear mixed effects modeling), mixtures (mixture modeling), ml (MovieLens data), and parafac (probabilistic parafac). 
 3 |   - Each directory has four sub directories: code, data, qsub, and result.
 4 |   - Directory 'code' has files of all the code (Matlab and R source code) that was used in the analysis. 
 5 |   - Directory 'data' has (if any) simulated data that was used in the analysis. This directory may be empty or absent.
 6 |   - Directory 'qsub' has SGE files (.q) that were used to submit jobs on a SGE cluster. 
 7 |   - Directory 'result' has a sub directory 'img' and stores the result (if any) produced in the analysis. This directory may be empty or absent.
 8 | 
 9 | * Files
10 |   - 'simulate_data.R' contains the code to simulate and partition the data. 
11 |   - 'analyze_result.R' contains the code for analyzing the results of MCMC, WASP, and competing methods and making plots/tables.
12 |   - 'mcmc_sampler.R' contains the  code for the known/standard MCMC/Gibbs sampler for the model.
13 |   - 'wasp_sampler.R' contains the  code for the MCMC/Gibbs sampler of a subset posterior distribution. This is a modified version of the code in 'mcmc_sampler.R' using stochastic approximation.
14 |   - 'comp_sampler.R' contains the  code for the MCMC/Gibbs sampler of a subset posterior distribution in Consensus Monte Carlo (CMC) or Semiparametric Density Product (SDP). This is a modified version of the code in 'mcmc_sampler.R' by raising the prior to a power of '1/k', where 'k' is the number of subsets.
15 |   - 'variational_bayes.R' contains the  code for the variational Bayes approach.
16 |   - 'submit.R' contains the  code for the R code for submitting a job on the cluster. The files in 'qsub' directory use this file for running simulations.  
17 | 
18 | * Citation:
19 |   If you use the code, then please cite the following three papers:
20 |   - Srivastava, S., Li, C. and Dunson, D. B. (2017+). Scalable Bayes via barycenter in Wasserstein space. [<https://arxiv.org/abs/1508.05880>]
21 |   - Li, C., Srivastava, S. and Dunson, D. B. (2017). Simple, scalable and accurate posterior interval estimation.  Biometrika 104: 665-680. [<https://arxiv.org/abs/1605.04029>]
22 |   - Srivastava, S., Cevher, V., Tran-Dinh, Q. and Dunson, D. B. (2015). WASP: Scalable Bayes via barycenters of subset posteriors. Artificial Intelligence and Statistics: 912-920.
23 |    
24 | * Contact:
25 |   Please email Cheng Li (<stalic@nus.edu.sg>) or Sanvesh Srivastava (<sanvesh-srivastava@uiowa.edu>) if you have any questions related to the code.
26 | 
27 | * Acknowledgment
28 |   - The files 'callLpSolver.m', 'recoverSolution.m', and the *.m files for computing the WASP are based on an algorithm due to Volkan Cevher (<https://lions.epfl.ch/>) and Quoc Tran-Dinh (<http://trandinhquoc.com/>). The algorithm can be found in Srivastava et al. (2015).
29 |   - Some code for MovieLens data analysis and linear mixed effects modeling has been borrowed from Patrick O. Perry (<http://ptrckprry.com/code/>).
30 |   - Please email us if you think that we have missed citations to your paper/work. 
31 | 
32 | 


--------------------------------------------------------------------------------
/lme/code/calc_wasp_cov_2d_k10.m:
--------------------------------------------------------------------------------
  1 | % dirp = '/Shared/ssrivastva/wasp/lme/result/wasp/'
  2 | function calc_wasp_cov_2d_k10(dd, nsub, ndim, dirp)
  3 | 
  4 | addpath('/opt/gurobi/6.5.1/linux64/matlab/');
  5 | 
  6 | grdsize = 60;
  7 | rtime = zeros(ndim, 2);
  8 | 
  9 | % calculate the pair-wise sq. euclidean distance between the atoms of subset
 10 | % posteriors and WASP atoms
 11 | for pp = 1:2 
 12 |     for dims = 1:ndim
 13 |         covs = {};
 14 |         for jj = 1:nsub
 15 |             covs{jj} = csvread(strcat(dirp, 'samp/joint/cov_cv_', num2str(dd), '_p_', ...
 16 |                                  num2str(pp),'_nsub_', num2str(jj), '_d_', ...
 17 |                                  num2str(dims), '_k10.csv'));         
 18 |         end
 19 |                 
 20 |         subsetPost = {};    
 21 |         for jj = 1:nsub
 22 |             subsetPost{jj} = covs{jj}(randi([1 1000], 200, 1), :);
 23 |         end         
 24 | 
 25 |         lbd1 = min(cellfun(@(x) x(1), cellfun(@(x) min(x), subsetPost, ...
 26 |                                               'UniformOutput', false)));
 27 |         lbd2 = min(cellfun(@(x) x(2), cellfun(@(x) min(x), subsetPost, ...
 28 |                                               'UniformOutput', false)));   
 29 |         ubd1 = max(cellfun(@(x) x(1), cellfun(@(x) max(x), subsetPost, ...
 30 |                                               'UniformOutput', false)));
 31 |         ubd2 = max(cellfun(@(x) x(2), cellfun(@(x) max(x), subsetPost, ...
 32 |                                               'UniformOutput', false)));   
 33 | 
 34 |         [opostx, oposty] = meshgrid(linspace(lbd1, ubd1, grdsize), linspace(lbd2, ubd2, grdsize));
 35 |         overallPost = [opostx(:) oposty(:)]; % 
 36 |         
 37 |         distMatCell = {};
 38 |         
 39 |         m00 = diag(overallPost * overallPost');
 40 |         for ii = 1:nsub
 41 |             mm = diag(subsetPost{ii} * subsetPost{ii}');    
 42 |             mm1 = overallPost * subsetPost{ii}'; 
 43 |             distMatCell{ii} = bsxfun(@plus, bsxfun(@plus, -2 * mm1, mm'), m00);    
 44 |         end
 45 |         
 46 |         % constants
 47 |         K  = nsub;
 48 |         Ni = cell2mat(cellfun(@(x) size(x, 2), distMatCell, 'UniformOutput', false));
 49 |         N  = size(overallPost, 1);
 50 |         nx = N * (N+1);
 51 |         mx = K * N + N + 1;
 52 |         In = eye(N);
 53 |         En = ones(1, N);
 54 | 
 55 |         % Generate matrix A0.
 56 |         A0  = sparse([]);
 57 |         for p = 1:K
 58 |             cc = (1:N)';                  % terribly fast version of 
 59 |             idx = cc(:, ones(Ni(p), 1));  % repmat(In, 1, Ni(p)) / Ni(p)
 60 |             Rp  = In(:, idx(:)) / Ni(p);  % in 3 steps
 61 |             A0  = blkdiag(A0, Rp); 
 62 |         end
 63 |         cc = (1:N)';                  % terribly fast version of 
 64 |         idx = cc(:, ones(K, 1));      % repmat(-In, K, 1) 
 65 |         A00  = -In(idx(:), :);        % in 3 steps
 66 |         
 67 |         A0 = sparse([A00, A0]);
 68 |         b0 = zeros(size(A0, 1), 1);
 69 |         disp('done generating A ...');        
 70 |         
 71 |         % Generate matrix B from simplex constraints.
 72 |         B = sparse([]);
 73 |         for p = 0:(sum(Ni))
 74 |             B = blkdiag(B, En);
 75 |         end
 76 |         disp('done generating B ...');        
 77 |         
 78 |         % The hold matrix C.
 79 |         A = sparse([A0; B]);
 80 | 
 81 |         % Generate the right hand size vector b.
 82 |         b = sparse([zeros(K * N, 1); ones(sum(Ni) + 1, 1)]);
 83 |         
 84 |         % Generate the cost vector
 85 |         costCell = cellfun(@(x) x(:) / size(x, 2), distMatCell, 'UniformOutput', false);
 86 |         costVec = [zeros(size(overallPost, 1), 1); cell2mat(costCell(:))];
 87 |         
 88 |         c = sparse(costVec);
 89 |         
 90 |         tic;
 91 |         lpsol = callLpSolver('gurobi', A, b, c, 10000, 1e-10);
 92 |         rtime(dims, pp) = toc;
 93 |         
 94 |         [tmats, avec] = recoverSolution(lpsol, K, N, Ni);
 95 | 
 96 |         summ = [overallPost avec];
 97 |         csvwrite(strcat(dirp, 'joint/wasp_cov_cv_',  num2str(dd), '_p_', num2str(pp), '_d_', num2str(dims), '_k10.csv'), summ);
 98 |         
 99 |         disp(['done with sim ' num2str(dd) '...' ' p ' num2str(pp) '...' ' dim ' num2str(dims) '... ']);    
100 |     end    
101 | end
102 | 
103 | csvwrite(strcat(dirp, 'joint/cov_2d_times_cv_', num2str(dd),  '_k10.csv'), rtime);    
104 | 
105 | quit
106 | 
107 | 


--------------------------------------------------------------------------------
/lme/code/calc_wasp_cov_2d_k20.m:
--------------------------------------------------------------------------------
  1 | % dirp = '/Shared/ssrivastva/wasp/lme/result/wasp/'
  2 | function calc_wasp_cov_2d_k20(dd, nsub, ndim, dirp)
  3 | 
  4 | addpath('/opt/gurobi/6.5.1/linux64/matlab/');
  5 | 
  6 | rtime = zeros(ndim, 2);
  7 | grdsize = 60;
  8 | % calculate the pair-wise sq. euclidean distance between the atoms of subset
  9 | % posteriors and WASP atoms
 10 | for pp = 1:2 
 11 |     for dims = 1:ndim
 12 |         covs = {};
 13 |         for jj = 1:nsub
 14 |             covs{jj} = csvread(strcat(dirp, 'samp/joint/cov_cv_', num2str(dd), '_p_', ...
 15 |                                  num2str(pp),'_nsub_', num2str(jj), '_d_', ...
 16 |                                  num2str(dims), '_k20.csv'));         
 17 |         end
 18 |                 
 19 |         subsetPost = {};    
 20 |         for jj = 1:nsub
 21 |             subsetPost{jj} = covs{jj}(randi([1 1000], 200, 1), :);
 22 |         end         
 23 | 
 24 |         lbd1 = min(cellfun(@(x) x(1), cellfun(@(x) min(x), subsetPost, ...
 25 |                                               'UniformOutput', false)));
 26 |         lbd2 = min(cellfun(@(x) x(2), cellfun(@(x) min(x), subsetPost, ...
 27 |                                               'UniformOutput', false)));   
 28 |         ubd1 = max(cellfun(@(x) x(1), cellfun(@(x) max(x), subsetPost, ...
 29 |                                               'UniformOutput', false)));
 30 |         ubd2 = max(cellfun(@(x) x(2), cellfun(@(x) max(x), subsetPost, ...
 31 |                                               'UniformOutput', false)));   
 32 | 
 33 |         [opostx, oposty] = meshgrid(linspace(lbd1, ubd1, grdsize), linspace(lbd2, ubd2, grdsize));
 34 |         overallPost = [opostx(:) oposty(:)]; % 
 35 |         
 36 |         distMatCell = {};
 37 |         
 38 |         m00 = diag(overallPost * overallPost');
 39 |         for ii = 1:nsub
 40 |             mm = diag(subsetPost{ii} * subsetPost{ii}');    
 41 |             mm1 = overallPost * subsetPost{ii}'; 
 42 |             distMatCell{ii} = bsxfun(@plus, bsxfun(@plus, -2 * mm1, mm'), m00);    
 43 |         end
 44 |         
 45 |         % constants
 46 |         K  = nsub;
 47 |         Ni = cell2mat(cellfun(@(x) size(x, 2), distMatCell, 'UniformOutput', false));
 48 |         N  = size(overallPost, 1);
 49 |         nx = N * (N+1);
 50 |         mx = K * N + N + 1;
 51 |         In = eye(N);
 52 |         En = ones(1, N);
 53 | 
 54 |         % Generate matrix A0.
 55 |         A0  = sparse([]);
 56 |         for p = 1:K
 57 |             cc = (1:N)';                  % terribly fast version of 
 58 |             idx = cc(:, ones(Ni(p), 1));  % repmat(In, 1, Ni(p)) / Ni(p)
 59 |             Rp  = In(:, idx(:)) / Ni(p);  % in 3 steps
 60 |             A0  = blkdiag(A0, Rp); 
 61 |         end
 62 |         cc = (1:N)';                  % terribly fast version of 
 63 |         idx = cc(:, ones(K, 1));      % repmat(-In, K, 1) 
 64 |         A00  = -In(idx(:), :);        % in 3 steps
 65 |         
 66 |         A0 = sparse([A00, A0]);
 67 |         b0 = zeros(size(A0, 1), 1);
 68 |         disp('done generating A ...');        
 69 |         
 70 |         % Generate matrix B from simplex constraints.
 71 |         B = sparse([]);
 72 |         for p = 0:(sum(Ni))
 73 |             B = blkdiag(B, En);
 74 |         end
 75 |         disp('done generating B ...');        
 76 |         
 77 |         % The hold matrix C.
 78 |         A = sparse([A0; B]);
 79 | 
 80 |         % Generate the right hand size vector b.
 81 |         b = sparse([zeros(K * N, 1); ones(sum(Ni) + 1, 1)]);
 82 |         
 83 |         % Generate the cost vector
 84 |         costCell = cellfun(@(x) x(:) / size(x, 2), distMatCell, 'UniformOutput', false);
 85 |         costVec = [zeros(size(overallPost, 1), 1); cell2mat(costCell(:))];
 86 |         
 87 |         c = sparse(costVec);
 88 |         
 89 |         tic;
 90 |         lpsol = callLpSolver('gurobi', A, b, c, 10000, 1e-10);
 91 |         rtime(dims, pp) = toc;
 92 |         
 93 |         [tmats, avec] = recoverSolution(lpsol, K, N, Ni);
 94 | 
 95 |         summ = [overallPost avec];
 96 |         csvwrite(strcat(dirp, 'joint/wasp_cov_cv_',  num2str(dd), '_p_', num2str(pp), '_d_', num2str(dims), '_k20.csv'), summ);
 97 |         
 98 |         disp(['done with sim ' num2str(dd) '...' ' p ' num2str(pp) '...' ' dim ' num2str(dims) '... ']);    
 99 |     end    
100 | end
101 | 
102 | csvwrite(strcat(dirp, 'joint/cov_2d_times_cv_', num2str(dd),  '_k20.csv'), rtime);    
103 | 
104 | quit
105 | 
106 | 


--------------------------------------------------------------------------------
/lme/code/callLpSolver.m:
--------------------------------------------------------------------------------
  1 | % FUNCTION:  [xsol, output] = callLpSolver(solver, Amat, bvec, ...
  2 | %                                          cvec, maxiters, tolx)
  3 | % PURPOSE: Call the Lp solver to solve the LP problem.
  4 | % 
  5 | %
  6 | function [xsol, output] = callLpSolver(solver, Amat, bvec, cvec, maxiters, tolx)
  7 | 
  8 | % Check the inputs.
  9 | if nargin < 3, error('At least three inputs are required!'); end
 10 | if nargin < 5, tolx = 1e-4; end
 11 | if nargin < 4, maxiters = 1000; end
 12 | if isempty(tolx), tolx = 1e-4; end
 13 | if isempty(maxiters), maxiters = 1000; end
 14 | if size(Amat, 1) ~= length(bvec), error('Inputs are inconsistent!'); end
 15 | if size(Amat, 2) ~= length(cvec), error('Inputs are inconsistent!'); end
 16 | xsol = []; output = [];
 17 | nx   = length(cvec);
 18 | 
 19 | %% Call the SeDuMi solver.
 20 | if strcmpi(solver, 'sedumi')
 21 |     pars.maxiter = maxiters;
 22 |     pars.eps     = tolx;
 23 |     Cones.l      = length(cvec);
 24 |     time3        = tic;
 25 |     [x_sedumi, y_sedumi, info] = sedumi(Amat, bvec, cvec, Cones, pars);
 26 |     x_sedumi = full(x_sedumi);
 27 |     xsol         = x_sedumi;
 28 |     output.time3        = toc(time3);
 29 |     output.info  = info;
 30 |     output.dual_sol = y_sedumi;
 31 | end
 32 | 
 33 | %% Call the Matlab LINPROG solver.
 34 | if strcmpi(solver, 'linprog')
 35 |     time5       = tic;
 36 |     opts        = optimset('Algorithm', 'interior-point', ...
 37 |                            'Display', 'iter', ...
 38 |                            'MaxIter', maxiters, 'TolX', tolx, 'TolFun', tolx);
 39 |     [xsol, fx2] = linprog(cvec, [], [], Amat, bvec, zeros(nx, 1), [], [], opts);
 40 |     output.time = toc(time5);
 41 |     output.fx   = fx2;
 42 | end
 43 | 
 44 | %% Call the SDPT3 solver.
 45 | if strcmpi(solver, 'sdpt3')
 46 |     Asdpt3 = spconvert(Amat);
 47 |     blk{1, 1} = 'l';
 48 |     blk{1, 2} = ones(1, nx);
 49 |     sdpt3opt  = struct('gaptol', tolx, 'maxit', maxiters);
 50 |     time4     = tic;
 51 |     %[X0, y0, Z0] = infeaspt(blk, Asdpt3, cvec, bvec); 
 52 |     %[fx_sdpt3, x_sdpt3, y_sdpt3, Z_sdpt3] = sqlp(blk, Asdpt3, cvec, bvec, sdpt3opt, X0, y0, Z0);
 53 |     [fx_sdpt3, x_sdpt3, y_sdpt3, Z_sdpt3] = sqlp(blk, Asdpt3, cvec, bvec, sdpt3opt);
 54 |     output.time4     = toc(time4);
 55 |     x_sdpt3   = x_sdpt3{:};
 56 |     xsol      = x_sdpt3;
 57 |     output.dual_sol = y_sdpt3;
 58 |     output.slacks   = Z_sdpt3;
 59 |     output.fx       = fx_sdpt3;
 60 | end    
 61 | 
 62 | %% Call Our Decopt solver.
 63 | if strcmpi(solver, 'decopt')
 64 |     
 65 |     % Set the parameters.
 66 |     param.MaxIters      = maxiters;
 67 |     param.Verbosity     = 2;
 68 |     param.RelTolX       = tolx;
 69 |     param.saveHistMode  = 0;
 70 |     param.Algorithm     = 3;
 71 |     param.InnerMaxIters = 20;
 72 |     param.adaptStepSize = 0;
 73 | 
 74 |     % Call the solver.
 75 |     proxLpPos    = @(x, gamma)( min( max(0, x - gamma*cvec), 1.0) );
 76 | 
 77 |     % User-define proximal-functions.
 78 |     proxOpers{1} = @(x, gamma, varargin)(proxLpPos(x, gamma));
 79 |     proxOpers{2} = @(x, gamma, varargin)(projL2norm(x, 1e-12));
 80 | 
 81 |     proxOpers{3} = @(x, varargin)( cvec'*x );
 82 |     proxOpers{4} = @(x, varargin)(0);
 83 |     
 84 |     % Generate an initial point.
 85 |     x0       = zeros(nx, 1);
 86 |     
 87 |     %% Call the solver with user-define prox-functions.
 88 |     time1 = tic;
 89 |     [xsol, out] = decoptSolver('UserDef', Amat, bvec, param, 'x0', x0, 'Prox', proxOpers, 'GammaFactor', 1.1);
 90 |     output.time = toc(time1);
 91 |     output.info = out;
 92 | 
 93 | end
 94 | 
 95 | %% Call the Gurobi solver.
 96 | if strcmpi(solver, 'gurobi')
 97 |     
 98 |     % Generate the LP model.
 99 |     time_g            = tic;
100 |     model.A           = Amat;
101 |     model.obj         = full(cvec);
102 |     model.rhs         = full(bvec);
103 |     model.modelsense  = 'min';
104 |     model.sense       = '=';
105 |     
106 |     % Define the parameters.
107 |     param.method      = 2;
108 |     param.Presolve    = 2;
109 |     param.Crossover   = 0;
110 |     param.outputflag  = 1;
111 |     
112 |     % Call the solver.
113 |     result            = gurobi(model, param);
114 |     
115 |     % Obtain the final results.
116 |     output.result     = result;
117 |     output.time       = toc(time_g);
118 |     xsol              = result.x;
119 |     
120 | end
121 | 
122 | 


--------------------------------------------------------------------------------
/lme/code/comp_lme.stan:
--------------------------------------------------------------------------------
 1 | functions {
 2 |   // prior^{1/k}; k = #subsets
 3 |   real approx_normal_prior_log(real beta1, real mn1, real sigma1, real nsub) {
 4 |     return normal_log(beta1, mn1, sigma1) / nsub;
 5 |   }
 6 |   // prior^{1/k}; k = #subsets
 7 |   real approx_cauchy_log(real beta1, real mn1, real sigma1, real nsub) {
 8 |     return cauchy_log(beta1, mn1, sigma1) / nsub;
 9 |   }  
10 |   // prior^{1/k}; k = #subsets
11 |   real approx_lkj_corr_log(matrix corrMat, real eta, real nsub) {
12 |     return lkj_corr_log(corrMat, eta) / nsub;
13 |   }
14 |   // takes care of the likelihood
15 |   real approx_lik_log (vector y, vector mu, real stdErr, matrix covMat, matrix zmat, real nsub) {
16 |     matrix[num_elements(y), num_elements(y)] covY; 
17 |     matrix[num_elements(y), num_elements(y)] L;
18 |     
19 |     covY = quad_form(covMat, zmat');
20 |     for (kk in 1:(rows(zmat)))
21 |       covY[kk, kk] = covY[kk, kk] + stdErr;
22 | 
23 |     L = cholesky_decompose(covY);
24 |     
25 |     return (nsub * multi_normal_cholesky_log(y, mu, L));
26 |   }   
27 | }
28 | 
29 | data {
30 |   int<lower=0> nobs;                          // total no. of obs
31 |   int<lower=0> nfixef;                        // total no. of fixed effects
32 |   int<lower=0> nranef;                        // total no. of random effects
33 |   int<lower=0> ngroup;                        // total no. of clusters
34 |   real<lower=0> nsub;                         // total no. of resamples
35 |   matrix[nobs, nfixef] xmat;                  // fixed effects design matrix
36 |   matrix[nobs, nranef] zmat;                  // random effects design matrix
37 |   int group[nobs];                            // cluster ids
38 |   vector[nobs] yvec;                          // observations
39 |   int pos1[ngroup];                           // database indices ...
40 |   int pos2[ngroup];                           // to handle ragged arrays  
41 | }
42 | 
43 | transformed data {
44 |   // both fix eff. and rand. eff. are apriori centered at 0
45 |   vector[nranef] meanRanef;
46 |   vector[nfixef] meanFixef;
47 |   
48 |   meanRanef = rep_vector(0.0, nranef);
49 |   meanFixef = rep_vector(0.0, nfixef);
50 | }
51 | 
52 | parameters {
53 |   corr_matrix[nranef] corrRanef;       // correlation matrix of rand. eff.
54 |   vector<lower=0>[nranef] sclRanef;    // scale matrix of rand. eff.
55 |   
56 |   vector[nfixef] fixef;                // population level fix. eff.
57 |   real<lower=0> stdErrFixef;           // std err. in pop. level. fix. eff.
58 |   
59 |   real<lower=0> stdErr;                // population level std. err.
60 | }
61 | 
62 | transformed parameters {
63 |   matrix[nranef, nranef] covRanef;
64 |   vector[nobs] mu;
65 | 
66 |   mu = xmat * fixef;
67 | 
68 |   covRanef = quad_form_diag(corrRanef, sclRanef);  
69 | }
70 | 
71 | model {  
72 | 
73 |   stdErr ~ approx_cauchy(0, 2.5, nsub);
74 | 
75 |   // prior for fix. eff.
76 |   stdErrFixef ~ cauchy(0, 2.5);  
77 |   // beta ~ prior^{1/k}
78 |   for (pp in 1:nfixef) {
79 |     fixef[pp] ~ approx_normal_prior(0.0, stdErrFixef, nsub);
80 |   }
81 |   
82 |   // sample rand. eff.
83 |   for (ii in 1:nranef) {
84 |     sclRanef[ii] ~ approx_cauchy(0.0, 2.5, nsub);
85 |   }  
86 |   corrRanef ~ approx_lkj_corr(2.0, nsub);
87 |   
88 |   
89 |   // sample data
90 |   for (ii in 1:ngroup) {
91 |     segment(yvec, pos1[ii], pos2[ii] - pos1[ii] + 1) ~ approx_lik(segment(mu, pos1[ii], pos2[ii] - pos1[ii] + 1), 
92 |                                                                    stdErr, covRanef,
93 |                                                                    block(zmat, pos1[ii], 1, pos2[ii] - pos1[ii] + 1, nranef),
94 |                                                                    1);
95 |   }  
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/lme/code/comp_sampler.R:
--------------------------------------------------------------------------------
 1 | sampleFromCompMixMdl <- function (yvec, xmat, zmat, group, nrep, niter, nburn, nthin, id) {
 2 |     library(inline)
 3 |     library(Rcpp)
 4 |     library(rstan)
 5 | 
 6 |     grpLbl <- sort(unique(group))
 7 |     ngroup <- length(grpLbl)
 8 | 
 9 |     ranefList <- list()
10 |     grpIdx <- list()
11 |     for (ii in 1:ngroup) {
12 |       grpIdx[[ii]] <- which(group == grpLbl[ii])
13 |       ranefList[[ii]] <- zmat[grpIdx[[ii]], , drop = FALSE]
14 |     }
15 |     ranefMat <- do.call(rbind, ranefList)
16 |     fixefMat <- xmat[unlist(grpIdx), ]
17 | 
18 |     pos2 <- cumsum(sapply(grpIdx, length))
19 |     pos1 <- c(1, pos2[-ngroup] + 1)
20 | 
21 |     ordY <- yvec[unlist(grpIdx)]
22 |     ordGrp <- group[unlist(grpIdx)]
23 | 
24 |     idx <- seq_along(unlist(grpIdx))
25 |     simList = list(nobs = length(yvec[idx]),
26 |       nfixef = ncol(xmat),
27 |       nranef = ncol(zmat),
28 |       ngroup = length(unique(ordGrp[idx])),
29 |       nsub = nrep,
30 |       xmat = fixefMat[idx, ],
31 |       zmat = ranefMat[idx, ],
32 |       group = ordGrp[idx],
33 |       yvec = ordY[idx],
34 |       pos1 = pos1,
35 |       pos2 = pos2)
36 | 
37 |     seeds <- (1:2000) * as.numeric(gsub(":", "", substr(Sys.time(), 12, 19)))
38 | 
39 |     startTime <- proc.time()
40 |     mdl <- stan(file = "comp_lme.stan", data = simList, iter = niter, warmup = nburn, chains = 1, thin = nthin,
41 |                 seed = seeds[id],
42 |                 init = list(list(betas = rep(0, ncol(xmat)),
43 |                                  corrRanef = diag(ncol(zmat)),
44 |                                  sclRanef = rep(2, ncol(zmat))
45 |                                  )))
46 |     endTime <- proc.time()
47 | 
48 |     lst <- mdl@sim$samples[[1]]
49 |     bs <- grep("fixef|covRanef", names(lst))
50 |     sampdf <- do.call(cbind, lst[bs])
51 | 
52 |     list(samples = sampdf[(nrow(sampdf) - (niter - nburn) / nthin + 1):nrow(sampdf), ], time = endTime - startTime)
53 | }
54 | 


--------------------------------------------------------------------------------
/lme/code/create_samples.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls())
 2 | 
 3 | setwd("/Shared/ssrivastva/wasp/lme/result/")
 4 | 
 5 | library(matrixStats)
 6 | 
 7 | waspCovSamp <- list()
 8 | 
 9 | npart <- 10
10 | 
11 | for (cc in 1:10) {
12 |     waspCovSamp[[cc]] <- list()
13 |     for (pp in 1:2) {
14 |         waspCovSamp[[cc]][[pp]] <- list()
15 |         for (kk in 1:npart) {
16 |             cat("loaded: ", paste0("wasp/samp/wasp_mixed_cv_", cc, "_p_", pp, "_k_", kk, "_nsub10.rds"), "\n")
17 |             dat <- readRDS(paste0("wasp/samp/wasp_mixed_cv_", cc, "_p_", pp, "_k_", kk, "_nsub10.rds"))
18 |             cnames <- colnames(dat$samples)
19 |             if (pp == 2) {
20 |                 waspCovSamp[[cc]][[pp]][[kk]] <- dat$samples[ , c(81:86, 88:92, 95:98, 102:104, 109:110, 116)]
21 |             } else {
22 |                 waspCovSamp[[cc]][[pp]][[kk]] <- dat$samples[ , c(5:7, 9:10, 13)]
23 |             }
24 |         }
25 |     }
26 | }
27 | 
28 | for (cc in 1:10) {
29 |     for (pp in 1:2) {
30 |         for (kk in 1:npart) {
31 |             for (dd in 1:3) {
32 |                 if (pp == 1) {
33 |                     cov2d <- cbind(c(2, 2, 3), c(3, 5, 5))
34 |                     mat <- waspCovSamp[[cc]][[pp]][[kk]][ , c(cov2d[dd, 1], cov2d[dd, 2])]
35 |                 } else {
36 |                     cov2d <- cbind(c(2, 2, 3), c(3, 8, 8))
37 |                     mat <- waspCovSamp[[cc]][[pp]][[kk]][ , c(cov2d[dd, 1], cov2d[dd, 2])]
38 |                 }
39 |                 cat("wrote: ", paste0("/Shared/ssrivastva/wasp/lme/result/wasp/samp/joint/cov_cv_", cc, "_p_", pp, "_nsub_", kk, "_d_", dd, "_k10.csv"), "\n")
40 |                 write.table(mat, file = paste0("/Shared/ssrivastva/wasp/lme/result/wasp/samp/joint/cov_cv_", cc, "_p_", pp, "_nsub_", kk, "_d_", dd, "_k10.csv"), sep = ",", row.names = FALSE, col.names = FALSE)
41 |             }
42 |         }
43 |     }
44 | }
45 | 
46 | npart <- 20
47 | 
48 | for (cc in 1:10) {
49 |     waspCovSamp[[cc]] <- list()
50 |     for (pp in 1:2) {
51 |         waspCovSamp[[cc]][[pp]] <- list()
52 |         for (kk in 1:npart) {
53 |             cat("loaded: ", paste0("wasp/samp/wasp_mixed_cv_", cc, "_p_", pp, "_k_", kk, "_nsub20.rds"), "\n")
54 |             dat <- readRDS(paste0("wasp/samp/wasp_mixed_cv_", cc, "_p_", pp, "_k_", kk, "_nsub20.rds"))
55 |             cnames <- colnames(dat$samples)
56 |             if (pp == 2) {
57 |                 waspCovSamp[[cc]][[pp]][[kk]] <- dat$samples[ , c(81:86, 88:92, 95:98, 102:104, 109:110, 116)]
58 |             } else {
59 |                 waspCovSamp[[cc]][[pp]][[kk]] <- dat$samples[ , c(5:7, 9:10, 13)]
60 |             }
61 |         }
62 |     }
63 | }
64 | 
65 | for (cc in 1:10) {
66 |     for (pp in 1:2) {
67 |         for (kk in 1:npart) {
68 |             for (dd in 1:3) {
69 |                 if (pp == 1) {
70 |                     cov2d <- cbind(c(2, 2, 3), c(3, 5, 5))
71 |                     mat <- waspCovSamp[[cc]][[pp]][[kk]][ , c(cov2d[dd, 1], cov2d[dd, 2])]
72 |                 } else {
73 |                     cov2d <- cbind(c(2, 2, 3), c(3, 8, 8))
74 |                     mat <- waspCovSamp[[cc]][[pp]][[kk]][ , c(cov2d[dd, 1], cov2d[dd, 2])]
75 |                 }
76 |                 cat("wrote: ", paste0("/Shared/ssrivastva/wasp/lme/result/wasp/samp/joint/cov_cv_", cc, "_p_", pp, "_nsub_", kk, "_d_", dd, "_k20.csv"), "\n")
77 |                 write.table(mat, file = paste0("/Shared/ssrivastva/wasp/lme/result/wasp/samp/joint/cov_cv_", cc, "_p_", pp, "_nsub_", kk, "_d_", dd, "_k20.csv"), sep = ",", row.names = FALSE, col.names = FALSE)
78 |             }
79 |         }
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/lme/code/full_lme.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   // data for model fitting
 3 |   int<lower=0> nobs;                          // total no. of individuals
 4 |   int<lower=0> nfixef;                        // total no. of fixed effects
 5 |   int<lower=0> nranef;                        // total no. of random effects
 6 |   int<lower=0> ngroup;                        // total no. of clusters
 7 |   matrix[nobs, nfixef] xmat;                  // fixed effects design matrix
 8 |   matrix[nobs, nranef] zmat;                  // random effects design matrix
 9 |   int group[nobs];                            // cluster ids
10 |   real yvec[nobs];                            // observations
11 | }
12 | 
13 | transformed data {
14 |   // both fix eff. and rand. eff. are apriori centered at 0
15 |   vector[nranef] meanRanef;
16 |   vector[nfixef] meanFixef;
17 |   
18 |   meanRanef <- rep_vector(0.0, nranef);
19 |   meanFixef <- rep_vector(0.0, nfixef);
20 | }
21 | 
22 | parameters {
23 |   corr_matrix[nranef] corrRanef;       // correlation matrix of rand. eff.
24 |   vector<lower=0>[nranef] sclRanef;    // scale matrix of rand. eff.
25 |   vector[nranef] ranef[ngroup];        // population level rand. eff.
26 | 
27 |   vector[nfixef] fixef;                // population level fix. eff.
28 |   real<lower=0> stdErrFixef;           // std err. in pop. level. fix. eff.
29 | 
30 |   real<lower=0> stdErr;                // population level std. err.
31 | }
32 | 
33 | transformed parameters {
34 |   real yHat[nobs];
35 |   matrix[nranef, nranef] covRanef;
36 | 
37 |   for (ii in 1:nobs) {
38 |     yHat[ii] <- xmat[ii] * fixef + zmat[ii] * ranef[group[ii]]; // individual level mean 
39 |   }
40 | 
41 |   covRanef <- quad_form_diag(corrRanef, sclRanef);
42 | }
43 | 
44 | model {
45 |   stdErr ~ cauchy(0, 2.5);
46 |   
47 |   // sample rand. eff.
48 |   sclRanef ~ cauchy(0, 2.5);
49 |   corrRanef ~ lkj_corr(2);
50 |   for (ii in 1:ngroup) {
51 |     ranef[ii] ~ multi_normal(meanRanef, covRanef); 
52 |   }
53 |   
54 |   // sample fix. eff.
55 |   stdErrFixef ~ cauchy(0, 2.5);  
56 |   fixef ~ normal(meanFixef, stdErrFixef);
57 | 
58 |   // sample data
59 |   yvec ~ normal(yHat, stdErr); 
60 | }
61 | 


--------------------------------------------------------------------------------
/lme/code/mcmc_sampler.R:
--------------------------------------------------------------------------------
 1 | sampleFromMixMdl <- function (yvec, xmat, zmat, group, niter, nburn, nthin, id) {
 2 |     library(inline)
 3 |     library(Rcpp)
 4 |     library(rstan)
 5 | 
 6 |     gg <- ordered(as.character(group), levels = sort(unique(group)))
 7 |     group <- as.integer(gg)
 8 | 
 9 |     simList = list(
10 |         nobs = length(yvec),
11 |         nfixef = ncol(xmat),
12 |         nranef = ncol(zmat),
13 |         ngroup = length(unique(group)),
14 |         xmat = xmat,
15 |         zmat = zmat,
16 |         group = group,
17 |         yvec = yvec)
18 | 
19 |     seeds <- (1:5000) * as.numeric(gsub(":", "", substr(Sys.time(), 12, 19)))
20 | 
21 |     stanCode <- readChar("full_lme.stan", file.info("full_lme.stan")$size)
22 |     startTime <- proc.time()
23 |     mdl <- stan(model_code = stanCode, data = simList, iter = niter, warmup = nburn, chains = 1, thin = nthin,
24 |                 seed = seeds[id],
25 |                 init = list(list(betas = rep(0, ncol(xmat)),
26 |                                  corrRanef = diag(ncol(zmat)),
27 |                                  sclRanef = rep(2, ncol(zmat))
28 |                                  )))
29 |     endTime <- proc.time()
30 | 
31 |     lst <- mdl@sim$samples[[1]]
32 |     bs <- grep("fixef|covRanef", names(lst))
33 |     sampdf <- do.call(cbind, lst[bs])
34 | 
35 |     list(samples = sampdf[(nrow(sampdf) - (niter - nburn) / nthin + 1):nrow(sampdf), ], time = endTime - startTime)
36 | }
37 | 


--------------------------------------------------------------------------------
/lme/code/recoverSolution.m:
--------------------------------------------------------------------------------
 1 | % FUNCTION: [tOptSol, aOptSol] = recoverSolution(xOptSol, nsubs, N, Ni)
 2 | % PURPOSE:  Recover the original solution.
 3 | %
 4 | function [tOptSol, aOptSol] = recoverSolution(xOptSol, nsubs, N, Ni)
 5 | 
 6 |     % Define this soft-thresholding operator to remove small elements.
 7 |     softThresOper = @(x, t)(sign(x).*max(abs(x) - t, 0));
 8 | 
 9 |     % Recover the solution.
10 |     aOptSol = xOptSol(1:N, 1);
11 |     xRest   = xOptSol(N+1:end);
12 |     for p = 1:nsubs
13 |         tOptSol{p} = reshape( xRest(1:N*Ni(p)), N, Ni(p))/Ni(p);
14 |         xRest      = xRest(N*Ni(p)+1:end);
15 |         tOptSol{p} = softThresOper(tOptSol{p}, 1e-10);
16 |     end
17 | 
18 | end
19 | % @END ...
20 | 


--------------------------------------------------------------------------------
/lme/code/simulate_data.R:
--------------------------------------------------------------------------------
  1 | rm(list=ls())
  2 | 
  3 | set.seed(12345)
  4 | 
  5 | setwd("~/wasp/lme/code")
  6 | library(matrixStats)
  7 | library(Matrix)
  8 | 
  9 | ## See mbest package at http://ptrckprry.com/code/ Perry (2017) in JRSS-B.
 10 | genData <- function (ngroup, nobs, nfixef, nranef) {
 11 |     ## fixed effects coefficients
 12 |     fixef <- rep(c(-2, 2), length = nfixef)
 13 |     if (nranef == 3) {
 14 |         ranefCorr <- matrix(c(1, -0.4, 0.3,
 15 |                               -0.4, 1, 0.001,
 16 |                               0.3, 0.001, 1),
 17 |                             nranef, nranef)
 18 |     } else {
 19 |         ranefCorr <- as.matrix(bdiag(rep(list(matrix(c(1, -0.4, 0.3,
 20 |                                                        -0.4, 1, 0.001,
 21 |                                                        0.3, 0.001, 1),
 22 |                                                      3, 3)), 2)))
 23 |     }
 24 |     ranefCov <- outer(sqrt(1:nranef), sqrt(1:nranef)) * ranefCorr
 25 |     ranefCovSqrt <- chol(ranefCov)
 26 | 
 27 |     # generate coefficients
 28 |     u <- matrix(rnorm(ngroup * nranef), ngroup, nranef)
 29 |     ranef <- u %*% ranefCovSqrt
 30 | 
 31 |     ## generate group
 32 |     suppressWarnings({ # ignore warning about using Walker's alias method
 33 |         group <- sample.int(ngroup, nobs, replace=TRUE)
 34 |     })
 35 | 
 36 |     ## generate feature  matrices with Pr(x[i,j] = +1) = P(x[i,j] = -1) = 1/2,
 37 |     x <- matrix(sample(c(-1, +1), nobs * nfixef, replace=TRUE), nobs, nfixef)
 38 |     z <- matrix(sample(c(-1, +1), nobs * nranef, replace=TRUE), nobs, nranef)
 39 | 
 40 |     ## compute linear predictors and generate observations
 41 |     mu <- drop(x %*% fixef) + rowSums(z * ranef[group,])
 42 |     y <- rnorm(nobs, mean=mu, sd=1)
 43 | 
 44 |     list(ngroup = ngroup, nobs = nobs,
 45 |          #fixef = fixef, #ranef = ranef,
 46 |          ranefCov = ranefCov,
 47 |          ranefCovSqrt = ranefCovSqrt,
 48 |          group = group, x = x, z = z, y.mean = mu, y = y)
 49 | }
 50 | 
 51 | ngroup <- 6000
 52 | nobs <- 1e5
 53 | nfixef <- c(4, 80)
 54 | nranef <- c(3, 6)
 55 | 
 56 | repData <- list()
 57 | for (cc in 1:10) {
 58 |   repData[[cc]] <- vector("list", 2)
 59 |   names(repData[[cc]]) <- paste0("p", nfixef, "q", nranef)
 60 | }
 61 | 
 62 | for (cc in 1:10) {
 63 |   cat("cc ", cc, "\n")
 64 |   for (pp in 1:2) {
 65 |       repData[[cc]][[pp]] <- genData(ngroup, nobs, nfixef[pp], nranef[pp])
 66 |   }
 67 | }
 68 | 
 69 | saveRDS(repData, "/Shared/ssrivastva/wasp/lme/data/mixed.rds")
 70 | 
 71 | rm(list=ls())
 72 | 
 73 | repData <- readRDS("/Shared/ssrivastva/wasp/lme/data/mixed.rds")
 74 | 
 75 | set.seed(12345)
 76 | 
 77 | ngroup <- 6000
 78 | nobs <- 1e5
 79 | nfixef <- c(4, 80)
 80 | nranef <- c(3, 6)
 81 | 
 82 | npart <- 10
 83 | partData <- list()
 84 | 
 85 | for (cc in 1:10) {
 86 |     partData <- vector("list", 2)
 87 |     names(partData) <- paste0("p", nfixef, "q", nranef)
 88 |     for (pp in 1:2) {
 89 |         partData[[pp]] <- vector("list", npart)
 90 |         names(partData[[pp]]) <- paste0("k", 1:npart)
 91 |         lst <- repData[[cc]][[pp]]
 92 |         grpSplit <- split(1:nrow(lst$x), lst$group)
 93 |         partsIdx <- sample(1:npart, length(grpSplit), replace = TRUE)
 94 |         for (ll in 1:npart) {
 95 |             grpIdx <- which(partsIdx == ll)
 96 |             idx <- unlist(grpSplit[grpIdx])
 97 |             partData[[pp]][[ll]]$nobs <- length(idx)
 98 |             partData[[pp]][[ll]]$x <- lst$x[idx, ]
 99 |             partData[[pp]][[ll]]$y <- lst$y[idx]
100 |             partData[[pp]][[ll]]$z <- lst$z[idx, ]
101 |             partData[[pp]][[ll]]$group <- lst$group[idx]
102 |             partData[[pp]][[ll]]$idx <- idx
103 |             partData[[pp]][[ll]]$nrep <- nobs / length(idx)
104 |         }
105 |     }
106 |     saveRDS(partData, paste0("/Shared/ssrivastva/wasp/lme/data/wasp_mixed_cv_", cc, "_k10", ".rds"))
107 | }
108 | 
109 | rm(list=ls())
110 | 
111 | repData <- readRDS("/Shared/ssrivastva/wasp/lme/data/mixed.rds")
112 | 
113 | set.seed(12345)
114 | 
115 | ngroup <- 6000
116 | nobs <- 1e5
117 | nfixef <- c(4, 80)
118 | nranef <- c(3, 6)
119 | 
120 | npart <- 20
121 | partData <- list()
122 | 
123 | for (cc in 1:10) {
124 |     partData <- vector("list", 2)
125 |     names(partData) <- paste0("p", nfixef, "q", nranef)
126 |     for (pp in 1:2) {
127 |         partData[[pp]] <- vector("list", npart)
128 |         names(partData[[pp]]) <- paste0("k", 1:npart)
129 |         lst <- repData[[cc]][[pp]]
130 |         grpSplit <- split(1:nrow(lst$x), lst$group)
131 |         partsIdx <- sample(1:npart, length(grpSplit), replace = TRUE)
132 |         for (ll in 1:npart) {
133 |             grpIdx <- which(partsIdx == ll)
134 |             idx <- unlist(grpSplit[grpIdx])
135 |             partData[[pp]][[ll]]$nobs <- length(idx)
136 |             partData[[pp]][[ll]]$x <- lst$x[idx, ]
137 |             partData[[pp]][[ll]]$y <- lst$y[idx]
138 |             partData[[pp]][[ll]]$z <- lst$z[idx, ]
139 |             partData[[pp]][[ll]]$group <- lst$group[idx]
140 |             partData[[pp]][[ll]]$idx <- idx
141 |             partData[[pp]][[ll]]$nrep <- nobs / length(idx)
142 |         }
143 |     }
144 |     saveRDS(partData, paste0("/Shared/ssrivastva/wasp/lme/data/wasp_mixed_cv_", cc, "_k20", ".rds"))
145 |     cat("cc: ", cc, "\n")
146 | }
147 | 


--------------------------------------------------------------------------------
/lme/code/submit.R:
--------------------------------------------------------------------------------
  1 | cmdArgs <- commandArgs(trailingOnly = TRUE)
  2 | 
  3 | mtd <- as.numeric(cmdArgs[1])
  4 | id <- as.numeric(cmdArgs[2])
  5 | 
  6 | if (mtd == 1) {
  7 |     source("mcmc_sampler.R")
  8 |     cvs <- rep(1:10, each = 2)
  9 |     ndims <- rep(1:2, times = 10)
 10 | 
 11 |     cid <- cvs[id]
 12 |     did <- ndims[id]
 13 | 
 14 |     cvtrain <- readRDS("/Shared/ssrivastva/wasp/lme/data/mixed.rds")
 15 |     train <- cvtrain[[cid]][[did]]
 16 | 
 17 |     res <- sampleFromMixMdl(train$y, train$x, train$z, train$group, 10000, 5000, 5, id)
 18 |     fname <- paste0("/Shared/ssrivastva/wasp/lme/result/mcmc/mcmc_lme_cv_", cid, "_p_", did, ".rds")
 19 |     saveRDS(res, fname)
 20 | } else if (mtd == 2) {
 21 |     source("comp_sampler.R")
 22 |     cvs <- rep(1:10, each = 2)
 23 |     ndims <- rep(1:2, times = 10)
 24 | 
 25 |     tmp <- cbind(cvs, ndims)
 26 |     wids <- cbind(tmp[rep(1:nrow(tmp), each = 10), ], rep(1:10, times = 20))
 27 | 
 28 |     cid <- wids[id, 1]
 29 |     did <- wids[id, 2]
 30 |     sid <- wids[id, 3]
 31 | 
 32 |     cvtrain <- readRDS(paste0("/Shared/ssrivastva/wasp/lme/data/wasp_mixed_cv_", cid,"_k10", ".rds"))
 33 |     train <- cvtrain[[did]][[sid]]
 34 |     rm(cvtrain)
 35 | 
 36 |     res <- sampleFromCompMixMdl(train$y, train$x, train$z, train$group, train$nrep, 10000, 5000, 5, id)
 37 |     fname <- paste0("/Shared/ssrivastva/wasp/lme/result/comp/comp_mixed_cv_", cid, "_p_", did, "_k_", sid, "_nsub10.rds")
 38 |     saveRDS(res, fname)
 39 | } else if (mtd == 3) {
 40 |     source("comp_sampler.R")
 41 |     cvs <- rep(1:10, each = 2)
 42 |     ndims <- rep(1:2, times = 10)
 43 | 
 44 |     tmp <- cbind(cvs, ndims)
 45 |     wids <- cbind(tmp[rep(1:nrow(tmp), each = 20), ], rep(1:20, times = 20))
 46 |     cid <- wids[id, 1]
 47 |     did <- wids[id, 2]
 48 |     sid <- wids[id, 3]
 49 | 
 50 |     cvtrain <- readRDS(paste0("/Shared/ssrivastva/wasp/lme/data/wasp_mixed_cv_", cid,"_k20", ".rds"))
 51 |     train <- cvtrain[[did]][[sid]]
 52 |     rm(cvtrain)
 53 | 
 54 |     res <- sampleFromCompMixMdl(train$y, train$x, train$z, train$group, train$nrep, 10000, 5000, 5, id)
 55 |     fname <- paste0("/Shared/ssrivastva/wasp/lme/result/comp/comp_mixed_cv_", cid, "_p_", did, "_k_", sid, "_nsub20.rds")
 56 |     saveRDS(res, fname)
 57 | } else if (mtd == 4) {
 58 |     source("wasp_sampler.R")
 59 |     cvs <- rep(1:10, each = 2)
 60 |     ndims <- rep(1:2, times = 10)
 61 | 
 62 |     tmp <- cbind(cvs, ndims)
 63 |     wids <- cbind(tmp[rep(1:nrow(tmp), each = 10), ], rep(1:10, times = 20))
 64 | 
 65 |     cid <- wids[id, 1]
 66 |     did <- wids[id, 2]
 67 |     sid <- wids[id, 3]
 68 | 
 69 |     cvtrain <- readRDS(paste0("/Shared/ssrivastva/wasp/lme/data/wasp_mixed_cv_", cid,"_k10", ".rds"))
 70 |     train <- cvtrain[[did]][[sid]]
 71 |     rm(cvtrain)
 72 | 
 73 |     res <- sampleFromWaspMixMdl(train$y, train$x, train$z, train$group, train$nrep, 10000, 5000, 5, id)
 74 |     fname <- paste0("/Shared/ssrivastva/wasp/lme/result/wasp/samp/wasp_mixed_cv_", cid, "_p_", did, "_k_", sid, "_nsub10.rds")
 75 |     saveRDS(res, fname)
 76 | } else if (mtd == 5) {
 77 |     source("wasp_sampler.R")
 78 |     cvs <- rep(1:10, each = 2)
 79 |     ndims <- rep(1:2, times = 10)
 80 | 
 81 |     tmp <- cbind(cvs, ndims)
 82 |     wids <- cbind(tmp[rep(1:nrow(tmp), each = 20), ], rep(1:20, times = 20))
 83 |     cid <- wids[id, 1]
 84 |     did <- wids[id, 2]
 85 |     sid <- wids[id, 3]
 86 | 
 87 |     cvtrain <- readRDS(paste0("/Shared/ssrivastva/wasp/lme/data/wasp_mixed_cv_", cid,"_k20", ".rds"))
 88 |     train <- cvtrain[[did]][[sid]]
 89 |     rm(cvtrain)
 90 | 
 91 |     res <- sampleFromWaspMixMdl(train$y, train$x, train$z, train$group, train$nrep, 10000, 5000, 5, id)
 92 |     fname <- paste0("/Shared/ssrivastva/wasp/lme/result/wasp/samp/wasp_mixed_cv_", cid, "_p_", did, "_k_", sid, "_nsub20.rds")
 93 |     saveRDS(res, fname)
 94 | } else if (mtd == 6) {
 95 |     library(parallelMCMCcombine)
 96 |     cvs <- rep(1:10, each = 2)
 97 |     ndims <- rep(1:2, times = 10)
 98 |     cid <- cvs[id]
 99 |     did <- ndims[id]
100 | 
101 |     subfix <- array(0.0, dim = c(c(4, 80)[did], 1000, 10))
102 |     subran <- array(0.0, dim = c(c(6, 21)[did], 1000, 10))
103 |     tmp <- numeric(10)
104 |     for (kk in 1:10) {
105 |         fname <- paste0("/Shared/ssrivastva/wasp/lme/result/comp/comp_mixed_cv_", cid, "_p_", did, "_k_", kk, "_nsub10.rds")
106 |         samp <- readRDS(fname)
107 |         cnames <- colnames(samp$samples)
108 |         if (did == 1) {
109 |             subfix[ , , kk] <- t(samp$samples[ , 1:4])
110 |             subran[ , , kk] <- t(samp$samples[ , c(5:7, 9:10, 13)])
111 |         } else {
112 |             subfix[ , , kk] <- t(samp$samples[ , 1:80])
113 |             subran[ , , kk] <- t(samp$samples[ , c(81:86, 88:92, 95:98, 102:104, 109:110, 116)])
114 |         }
115 |         tmp[kk] <- samp$time[3]
116 |     }
117 | 
118 |     stime <- rep(0, 2)
119 |     strt1 <- proc.time()
120 |     scottFix <- consensusMCindep(subchain = subfix)
121 |     end1 <- proc.time()
122 |     stime[1] <- mean(tmp) + end1[3] - strt1[3]
123 |     strt1 <- proc.time()
124 |     scottRan <- consensusMCindep(subchain = subran)
125 |     end1 <- proc.time()
126 |     stime[2] <- mean(tmp) + end1[3] - strt1[3]
127 | 
128 |     xtime <- rep(0, 2)
129 |     strt2 <- proc.time()
130 |     xingFix <- semiparamDPE(subchain = subfix)
131 |     end2 <- proc.time()
132 |     xtime[1] <- mean(tmp) + end2[3] - strt2[3]
133 |     strt2 <- proc.time()
134 |     xingRan <- semiparamDPE(subchain = subran)
135 |     end2 <- proc.time()
136 |     xtime[2] <- mean(tmp) + end2[3] - strt2[3]
137 | 
138 |     fname1 <- paste0("/Shared/ssrivastva/wasp/lme/result/cons/marg/cons_fix_ran_cv_", cid, "_p_", did, "_k10.rds")
139 |     fname2 <- paste0("/Shared/ssrivastva/wasp/lme/result/xing/marg/xing_fix_ran_cv_", cid, "_p_", did, "_k10.rds")
140 | 
141 |     saveRDS(list(fix = t(scottFix), ran = t(scottRan), time = stime), fname1)
142 |     saveRDS(list(fix = t(xingFix), ran = t(xingRan), time = xtime), fname2)
143 | } else if (mtd == 7) {
144 |     library(parallelMCMCcombine)
145 |     cvs <- rep(1:10, each = 2)
146 |     ndims <- rep(1:2, times = 10)
147 |     cid <- cvs[id]
148 |     did <- ndims[id]
149 | 
150 |     subfix <- array(0.0, dim = c(c(4, 80)[did], 1000, 20))
151 |     subran <- array(0.0, dim = c(c(6, 21)[did], 1000, 20))
152 |     tmp <- numeric(20)
153 |     for (kk in 1:20) {
154 |         fname <- paste0("/Shared/ssrivastva/wasp/lme/result/comp/comp_mixed_cv_", cid, "_p_", did, "_k_", kk, "_nsub20.rds")
155 |         samp <- readRDS(fname)
156 |         cnames <- colnames(samp$samples)
157 |         if (did == 1) {
158 |             subfix[ , , kk] <- t(samp$samples[ , 1:4])
159 |             subran[ , , kk] <- t(samp$samples[ , c(5:7, 9:10, 13)])
160 |         } else {
161 |             subfix[ , , kk] <- t(samp$samples[ , 1:80])
162 |             subran[ , , kk] <- t(samp$samples[ , c(81:86, 88:92, 95:98, 102:104, 109:110, 116)])
163 |         }
164 |         tmp[kk] <- samp$time[3]
165 |     }
166 | 
167 |     stime <- rep(0, 2)
168 |     strt1 <- proc.time()
169 |     scottFix <- consensusMCindep(subchain = subfix)
170 |     end1 <- proc.time()
171 |     stime[1] <- mean(tmp) + end1[3] - strt1[3]
172 |     strt1 <- proc.time()
173 |     scottRan <- consensusMCindep(subchain = subran)
174 |     end1 <- proc.time()
175 |     stime[2] <- mean(tmp) + end1[3] - strt1[3]
176 | 
177 |     xtime <- rep(0, 2)
178 |     strt2 <- proc.time()
179 |     xingFix <- semiparamDPE(subchain = subfix)
180 |     end2 <- proc.time()
181 |     xtime[1] <- mean(tmp) + end2[3] - strt2[3]
182 |     strt2 <- proc.time()
183 |     xingRan <- semiparamDPE(subchain = subran)
184 |     end2 <- proc.time()
185 |     xtime[2] <- mean(tmp) + end2[3] - strt2[3]
186 | 
187 |     fname1 <- paste0("/Shared/ssrivastva/wasp/lme/result/cons/marg/cons_fix_ran_cv_", cid, "_p_", did, "_k20.rds")
188 |     fname2 <- paste0("/Shared/ssrivastva/wasp/lme/result/xing/marg/xing_fix_ran_cv_", cid, "_p_", did, "_k20.rds")
189 | 
190 |     saveRDS(list(fix = t(scottFix), ran = t(scottRan), time = stime), fname1)
191 |     saveRDS(list(fix = t(xingFix), ran = t(xingRan), time = xtime), fname2)
192 | } else if (mtd == 8) {
193 |     library(parallelMCMCcombine)
194 |     cvs <- rep(1:10, each = 2)
195 |     ndims <- rep(1:2, times = 10)
196 |     cid <- cvs[id]
197 |     did <- ndims[id]
198 | 
199 |     subJtCov <- rep(list(array(0.0, dim = c(2, 1000, 10))), 3)
200 | 
201 |     tmp <- numeric(10)
202 |     for (kk in 1:10) {
203 |         fname <- paste0("/Shared/ssrivastva/wasp/lme/result/comp/comp_mixed_cv_", cid, "_p_", did, "_k_", kk, "_nsub10.rds")
204 |         samp <- readRDS(fname)
205 |         cnames <- colnames(samp$samples)
206 |         if (did == 1) {
207 |             cov2d <- cbind(c(2, 2, 3), c(3, 5, 5))
208 |             subcov <- samp$samples[ , c(5:7, 9:10, 13)]
209 |             for (ddd in 1:3) {
210 |                 subJtCov[[ddd]][ , , kk] <- t(subcov[ , c(cov2d[ddd, 1], cov2d[ddd, 2])])
211 |             }
212 |         } else {
213 |             cov2d <- cbind(c(2, 2, 3), c(3, 8, 8))
214 |             subcov <- samp$samples[ , c(81:86, 88:92, 95:98, 102:104, 109:110, 116)]
215 |             for (ddd in 1:3) {
216 |                 subJtCov[[ddd]][ , , kk] <- t(subcov[ , c(cov2d[ddd, 1], cov2d[ddd, 2])])
217 |             }
218 |         }
219 |         tmp[kk] <- samp$time[3]
220 |     }
221 | 
222 |     scottCov <- list()
223 |     scottTime <- numeric(3)
224 |     for (ddd in 1:3) {
225 |         strt1 <- proc.time()
226 |         scottCov[[ddd]] <- t(consensusMCcov(subchain = subJtCov[[ddd]]))
227 |         end1 <- proc.time()
228 |         scottTime[ddd] <- mean(tmp) + end1[3] - strt1[3]
229 |     }
230 | 
231 |     xingCov <- list()
232 |     xingTime <- numeric(3)
233 |     for (ddd in 1:3) {
234 |         strt1 <- proc.time()
235 |         xingCov[[ddd]] <- t(semiparamDPE(subchain = subJtCov[[ddd]]))
236 |         end1 <- proc.time()
237 |         xingTime[ddd] <- mean(tmp) + end1[3] - strt1[3]
238 |     }
239 | 
240 |     fname1 <- paste0("/Shared/ssrivastva/wasp/lme/result/cons/joint/cons_cov_cv_", cid, "_p_", did, "_k10.rds")
241 |     fname2 <- paste0("/Shared/ssrivastva/wasp/lme/result/xing/joint/xing_cov_cv_", cid, "_p_", did, "_k10.rds")
242 | 
243 |     saveRDS(list(cov = scottCov, time = scottTime), fname1)
244 |     saveRDS(list(cov = xingCov, time = xingTime), fname2)
245 | } else if (mtd == 9) {
246 |     library(parallelMCMCcombine)
247 |     cvs <- rep(1:10, each = 2)
248 |     ndims <- rep(1:2, times = 10)
249 |     cid <- cvs[id]
250 |     did <- ndims[id]
251 | 
252 |     subJtCov <- rep(list(array(0.0, dim = c(2, 1000, 20))), 3)
253 | 
254 |     tmp <- numeric(20)
255 |     for (kk in 1:20) {
256 |         fname <- paste0("/Shared/ssrivastva/wasp/lme/result/comp/comp_mixed_cv_", cid, "_p_", did, "_k_", kk, "_nsub20.rds")
257 |         samp <- readRDS(fname)
258 |         cnames <- colnames(samp$samples)
259 |         if (did == 1) {
260 |             cov2d <- cbind(c(2, 2, 3), c(3, 5, 5))
261 |             subcov <- samp$samples[ , c(5:7, 9:10, 13)]
262 |             for (ddd in 1:3) {
263 |                 subJtCov[[ddd]][ , , kk] <- t(subcov[ , c(cov2d[ddd, 1], cov2d[ddd, 2])])
264 |             }
265 |         } else {
266 |             cov2d <- cbind(c(2, 2, 3), c(3, 8, 8))
267 |             subcov <- samp$samples[ , c(81:86, 88:92, 95:98, 102:104, 109:110, 116)]
268 |             for (ddd in 1:3) {
269 |                 subJtCov[[ddd]][ , , kk] <- t(subcov[ , c(cov2d[ddd, 1], cov2d[ddd, 2])])
270 |             }
271 |         }
272 |         tmp[kk] <- samp$time[3]
273 |     }
274 | 
275 |     scottCov <- list()
276 |     scottTime <- numeric(3)
277 |     for (ddd in 1:3) {
278 |         strt1 <- proc.time()
279 |         scottCov[[ddd]] <- t(consensusMCcov(subchain = subJtCov[[ddd]]))
280 |         end1 <- proc.time()
281 |         scottTime[ddd] <- mean(tmp) + end1[3] - strt1[3]
282 |     }
283 | 
284 |     xingCov <- list()
285 |     xingTime <- numeric(3)
286 |     for (ddd in 1:3) {
287 |         strt1 <- proc.time()
288 |         xingCov[[ddd]] <- t(semiparamDPE(subchain = subJtCov[[ddd]]))
289 |         end1 <- proc.time()
290 |         xingTime[ddd] <- mean(tmp) + end1[3] - strt1[3]
291 |     }
292 | 
293 |     fname1 <- paste0("/Shared/ssrivastva/wasp/lme/result/cons/joint/cons_cov_cv_", cid, "_p_", did, "_k20.rds")
294 |     fname2 <- paste0("/Shared/ssrivastva/wasp/lme/result/xing/joint/xing_cov_cv_", cid, "_p_", did, "_k20.rds")
295 | 
296 |     saveRDS(list(cov = scottCov, time = scottTime), fname1)
297 |     saveRDS(list(cov = xingCov, time = xingTime), fname2)
298 | } else if (mtd == 10) {
299 |     source("variational_bayes.R")
300 |     cvs <- rep(1:10, each = 2)
301 |     ndims <- rep(1:2, times = 10)
302 | 
303 |     cid <- cvs[id]
304 |     did <- ndims[id]
305 | 
306 |     cvtrain <- readRDS("/Shared/ssrivastva/wasp/lme/data/mixed.rds")
307 |     train <- cvtrain[[cid]][[did]]
308 | 
309 |     res <- fitLinearMixefEffectsVB(train$y, train$x, train$z, train$group, 1000)
310 |     fname <- paste0("/Shared/ssrivastva/wasp/lme/result/vb/vb_lme_cv_", cid, "_p_", did, ".rds")
311 |     saveRDS(res, fname)
312 | } else (mtd == 11) {
313 |     cvs <- rep(1:10, each = 2)
314 |     ndims <- rep(1:2, times = 10)
315 | 
316 |     cid <- cvs[id]
317 |     did <- ndims[id]
318 | 
319 |     cvtrain <- readRDS("/Shared/ssrivastva/wasp/lme/data/mixed.rds")
320 |     train <- cvtrain[[cid]][[did]]
321 | 
322 |     library(inline)
323 |     library(Rcpp)
324 |     library(rstan)
325 | 
326 |     yvec <- train$y; xmat <- train$x; zmat <- train$z; group <- train$group
327 |     gg <- ordered(as.character(group), levels = sort(unique(group)))
328 |     group <- as.integer(gg)
329 | 
330 |     simList = list(
331 |         nobs = length(yvec),
332 |         nfixef = ncol(xmat),
333 |         nranef = ncol(zmat),
334 |         ngroup = length(unique(group)),
335 |         xmat = xmat,
336 |         zmat = zmat,
337 |         group = group,
338 |         yvec = yvec)
339 | 
340 |     seeds <- (1:5000) * as.numeric(gsub(":", "", substr(Sys.time(), 12, 19)))
341 |     strt1 <- proc.time()
342 |     mdl <- stan_model("full_lme.stan")
343 |     res <- vb(mdl, data = simList, output_samples = 2000, seed = seeds[id])
344 |     end1 <- proc.time()
345 | 
346 |     fname <- paste0("/Shared/ssrivastva/wasp/lme/result/bbvb/bbvb_lme_cv_", cid, "_p_", did, ".rds")
347 |     saveRDS(list(res = res, time = end1 - strt1), fname)
348 | }
349 | 


--------------------------------------------------------------------------------
/lme/code/variational_bayes.R:
--------------------------------------------------------------------------------
  1 | fitLinearMixefEffectsVB <- function (yvec, xmat, zmat, group, niter) {
  2 |     library(Matrix)
  3 |     library(MCMCpack)
  4 | 
  5 |     nfixef <- ncol(xmat)
  6 |     nranef <- ncol(zmat)
  7 |     grpLbl <- sort(unique(group))
  8 |     ngroup <- length(grpLbl)
  9 |     ndim <- nrow(xmat)
 10 | 
 11 |     elbo <- numeric(niter)
 12 | 
 13 |     ranefList <- list()
 14 |     grpIdx <- list()
 15 |     for (ii in 1:ngroup) {
 16 |         grpIdx[[ii]] <- which(group == grpLbl[ii])
 17 |         ranefList[[ii]] <- zmat[grpIdx[[ii]], , drop = FALSE]
 18 |     }
 19 |     ranefMat <- bdiag(ranefList)
 20 | 
 21 |     fixefMat <- xmat[unlist(grpIdx), ]
 22 | 
 23 |     designMat <- cBind(fixefMat, ranefMat)
 24 |     designTransDesign <- crossprod(designMat, designMat)
 25 |     ordY <- yvec[unlist(grpIdx)]
 26 |     ordGrp <- group[unlist(grpIdx)]
 27 |     designTransY <- crossprod(designMat, ordY)
 28 | 
 29 |     muErrInv <- 1; muErrAInv <- 1; muVarErrAinv <- 1; errAScl <- 1; nu = 2; muRanCovInv <- solve(rWishart(1, 2 * ncol(zmat), diag(ncol(zmat)))[ , , 1]); varBeta <- 100; ranCovAsScl <- rep(1, ncol(zmat));
 30 | 
 31 |     prev <- list(coefMu = runif(nfixef), coefCov = diag(nfixef), ranCov = diag(nranef)); conv <- 1e5
 32 | 
 33 |     startTime <- proc.time()
 34 |     for (its in 0:niter) {
 35 | 
 36 |         smat <- diag(0, ncol(xmat))
 37 |         svec <- numeric(ncol(xmat))
 38 | 
 39 |         gmat <- vector("list", ngroup)
 40 |         hmat <- vector("list", ngroup)
 41 |         for(ii in 1:ngroup) {
 42 |             gmat[[ii]] <- crossprod(xmat[grpIdx[[ii]], ], zmat[grpIdx[[ii]], ]) * muErrInv
 43 |             hmat[[ii]] <- solve(crossprod(zmat[grpIdx[[ii]], ], zmat[grpIdx[[ii]], ]) * muErrInv + muRanCovInv)
 44 |             tmp <- gmat[[ii]] %*% hmat[[ii]]
 45 |             smat <- smat + tcrossprod(tmp, gmat[[ii]])
 46 |             svec <- svec + drop(tmp %*% crossprod(zmat[grpIdx[[ii]], ], yvec[grpIdx[[ii]]]))
 47 |         }
 48 | 
 49 |         fixCov <- solve(crossprod(fixefMat, fixefMat) * muErrInv + diag(1, nfixef) / varBeta - smat)
 50 |         fixMu <- drop(muErrInv * fixCov %*% (crossprod(fixefMat, ordY) - svec))
 51 | 
 52 |         ranMu <- vector("list", ngroup)
 53 |         ranCov <- vector("list", ngroup)
 54 |         tmp1 <- numeric(ngroup)
 55 |         tmp2 <- numeric(ngroup)
 56 |         for(ii in 1:ngroup) {
 57 |             ranCov[[ii]] <- hmat[[ii]] + tcrossprod(hmat[[ii]], gmat[[ii]]) %*% fixCov %*% gmat[[ii]] %*% hmat[[ii]]
 58 |             ranMu[[ii]] <- drop(hmat[[ii]] %*% (muErrInv * crossprod(zmat[grpIdx[[ii]], ], yvec[grpIdx[[ii]]]) - crossprod(gmat[[ii]], fixMu)))
 59 |             tmp1[ii] <- sum(crossprod(zmat[grpIdx[[ii]], ], zmat[grpIdx[[ii]], ]) * ranCov[[ii]])
 60 |             tmp2[ii] <- sum(tcrossprod(gmat[[ii]] %*% hmat[[ii]], gmat[[ii]]) * fixCov)
 61 |         }
 62 |         ranMuVec <- do.call(c, ranMu)
 63 |         resids <- ordY - fixefMat %*% fixMu - ranefMat %*% ranMuVec
 64 |         scaleErr <- muErrAInv + 0.5 * (sum(resids^2) + sum(crossprod(fixefMat, fixefMat) * fixCov) + sum(tmp1) - 2 * sum(tmp2) / muErrInv)
 65 |         shapeErr <- 0.5 * (ndim + 1)
 66 |         muErrInv <- shapeErr / scaleErr
 67 | 
 68 |         ## update post. for parameter in the px-ed form of half-cauchy prior for err
 69 |         shapeErrA <- 1
 70 |         scaleErrA <- muErrInv + errAScl
 71 |         muErrAInv <- shapeErrA / scaleErrA
 72 | 
 73 |         ## update post. for parameter in the px-ed form of half-cauchy
 74 |         ## prior for the random effects covariance matrix
 75 |         scaleRanCovAs <- nu * diag(muRanCovInv) + ranCovAsScl
 76 |         shapeRanCovAs <- 0.5 * (nu + nranef)
 77 |         muRanCovAsInv <- pmax(shapeRanCovAs / scaleRanCovAs, 1e-5)
 78 | 
 79 |         ranefCoefMuMat <- matrix(ranMuVec, nrow = nranef, ncol = ngroup)
 80 |         ranefCoefCovMat <- diag(0, nranef)
 81 |         for (jj in 1:nranef) {
 82 |             ranefCoefCovMat <- ranefCoefCovMat + ranCov[[jj]]
 83 |         }
 84 |         scaleRanCovMat <- tcrossprod(ranefCoefMuMat, ranefCoefMuMat) + ranefCoefCovMat + 2 * nu * diag(muRanCovAsInv)
 85 |         rateRanCovMat <- solve(scaleRanCovMat)
 86 |         muRanCovInv <- (nu + ngroup + nranef - 1) * rateRanCovMat
 87 | 
 88 |         if ((its > 10) && (conv < 1e-10))
 89 |             break
 90 | 
 91 |         if (its %% 10 == 0) {
 92 |             cat("iteration: ", its, "\n")
 93 | 
 94 |             diff1 <- matrix(prev$coefMu[1:nfixef] - fixMu)
 95 |             diff2 <- prev$coefCov - fixCov
 96 |             diff3 <- matrix(prev$ranCov - as.matrix(scaleRanCovMat))
 97 |             conv <- norm(diff1, "O") + norm(diff2, "O") + norm(diff3, "O")
 98 | 
 99 |             prev$coefMu <- fixMu; prev$coefCov <- fixCov; prev$ranCov <- scaleRanCovMat
100 |         }
101 | 
102 |     }
103 |     endTime <- proc.time()
104 | 
105 |     list(
106 |         coefs = list(
107 |             cov = fixCov,
108 |             mu = fixMu
109 |             )
110 |         ,
111 |         err = list(
112 |             aa = scaleErr,
113 |             bb = shapeErr
114 |             )
115 |         ,
116 |         cov = list(
117 |             scale = scaleRanCovMat,
118 |             df = (nu + ngroup + nranef - 1)
119 |             )
120 |         ,
121 |         niter = its
122 |         ,
123 |         time = endTime - startTime
124 |         )
125 | }
126 | 


--------------------------------------------------------------------------------
/lme/code/wasp_lme.stan:
--------------------------------------------------------------------------------
 1 | functions {
 2 |   // takes care of the modified likelihood
 3 |   real stoc_approx_log (vector y, vector mu, real stdErr, matrix covMat, matrix zmat, real nrep) {
 4 |     matrix[num_elements(y), num_elements(y)] covY; 
 5 |     matrix[num_elements(y), num_elements(y)] L;
 6 |     
 7 |     covY <- quad_form(covMat, zmat');
 8 |     for (kk in 1:(rows(zmat)))
 9 |       covY[kk, kk] <- covY[kk, kk] + stdErr;
10 | 
11 |     L <- cholesky_decompose(covY);
12 |     
13 |     return (nrep * multi_normal_cholesky_log(y, mu, L));
14 |   } 
15 | }
16 | 
17 | data {
18 |   int<lower=0> nobs;                          // total no. of obs
19 |   int<lower=0> nfixef;                        // total no. of fixed effects
20 |   int<lower=0> nranef;                        // total no. of random effects
21 |   int<lower=0> ngroup;                        // total no. of clusters
22 |   real<lower=0> nrep;                         // total no. of resamples
23 |   matrix[nobs, nfixef] xmat;                  // fixed effects design matrix
24 |   matrix[nobs, nranef] zmat;                  // random effects design matrix
25 |   int group[nobs];                            // cluster ids
26 |   vector[nobs] yvec;                          // observations
27 |   int pos1[ngroup];                           // database indices ...
28 |   int pos2[ngroup];                           // to handle ragged arrays
29 | }
30 | 
31 | transformed data {
32 |   // both fix eff. and rand. eff. are apriori centered at 0
33 |   vector[nranef] meanRanef;
34 |   vector[nfixef] meanFixef;
35 |   
36 |   meanRanef <- rep_vector(0.0, nranef);
37 |   meanFixef <- rep_vector(0.0, nfixef);
38 | }
39 | 
40 | parameters {
41 |   corr_matrix[nranef] corrRanef;       // correlation matrix of rand. eff.
42 |   vector<lower=0>[nranef] sclRanef;    // scale matrix of rand. eff.
43 | 
44 |   vector[nfixef] fixef;                // population level fix. eff.
45 |   real<lower=0> stdErrFixef;           // std err. in pop. level. fix. eff.
46 |   
47 |   real<lower=0> stdErr;                // population level std. err.
48 | }
49 | 
50 | transformed parameters {
51 |   matrix[nranef, nranef] covRanef;
52 |   vector[nobs] mu;
53 | 
54 |   mu <- xmat * fixef;
55 | 
56 |   covRanef <- quad_form_diag(corrRanef, sclRanef);
57 | }
58 | 
59 | model {  
60 | 
61 |   stdErr ~ cauchy(0, 2.5);
62 | 
63 |   // prior for fix. eff.
64 |   stdErrFixef ~ cauchy(0, 2.5);  
65 |   fixef ~ normal(meanFixef, stdErrFixef);
66 |   
67 |   // prior for rand. eff.
68 |   sclRanef ~ cauchy(0, 2.5);
69 |   corrRanef ~ lkj_corr(2);
70 |   
71 |   for (ii in 1:ngroup) {
72 |     segment(yvec, pos1[ii], pos2[ii] - pos1[ii] + 1) ~ stoc_approx(segment(mu, pos1[ii], pos2[ii] - pos1[ii] + 1), 
73 |                                                                    stdErr, covRanef,
74 |                                                                    block(zmat, pos1[ii], 1, pos2[ii] - pos1[ii] + 1, nranef),
75 |                                                                    nrep);
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/lme/code/wasp_sampler.R:
--------------------------------------------------------------------------------
 1 | sampleFromWaspMixMdl <- function (yvec, xmat, zmat, group, nrep, niter, nburn, nthin, id) {
 2 |     library(inline)
 3 |     library(Rcpp)
 4 |     library(rstan)
 5 | 
 6 |     grpLbl <- sort(unique(group))
 7 |     ngroup <- length(grpLbl)
 8 | 
 9 |     ranefList <- list()
10 |     grpIdx <- list()
11 |     for (ii in 1:ngroup) {
12 |       grpIdx[[ii]] <- which(group == grpLbl[ii])
13 |       ranefList[[ii]] <- zmat[grpIdx[[ii]], , drop = FALSE]
14 |     }
15 |     ranefMat <- do.call(rbind, ranefList)
16 |     fixefMat <- xmat[unlist(grpIdx), ]
17 | 
18 |     pos2 <- cumsum(sapply(grpIdx, length))
19 |     pos1 <- c(1, pos2[-ngroup] + 1)
20 | 
21 |     ordY <- yvec[unlist(grpIdx)]
22 |     ordGrp <- group[unlist(grpIdx)]
23 | 
24 |     idx <- seq_along(unlist(grpIdx))
25 |     simList = list(nobs = length(yvec[idx]),
26 |       nfixef = ncol(xmat),
27 |       nranef = ncol(zmat),
28 |       ngroup = length(unique(ordGrp[idx])),
29 |       nrep = nrep,
30 |       xmat = fixefMat[idx, ],
31 |       zmat = ranefMat[idx, ],
32 |       group = ordGrp[idx],
33 |       yvec = ordY[idx],
34 |       pos1 = pos1,
35 |       pos2 = pos2)
36 | 
37 |     seeds <- (1:2000) * as.numeric(gsub(":", "", substr(Sys.time(), 12, 19)))
38 | 
39 |     stanCode <- readChar("wasp_lme.stan", file.info("wasp_lme.stan")$size)
40 |     startTime <- proc.time()
41 |     mdl <- stan(model_code = stanCode, data = simList, iter = niter, warmup = nburn, chains = 1, thin = nthin,
42 |                 seed = seeds[id],
43 |                 init = list(list(betas = rep(0, ncol(xmat)),
44 |                                  corrRanef = diag(ncol(zmat)),
45 |                                  sclRanef = rep(2, ncol(zmat))
46 |                                  )))
47 |     endTime <- proc.time()
48 | 
49 |     lst <- mdl@sim$samples[[1]]
50 |     bs <- grep("fixef|covRanef", names(lst))
51 |     sampdf <- do.call(cbind, lst[bs])
52 | 
53 |     list(samples = sampdf[(nrow(sampdf) - (niter - nburn) / nthin + 1):nrow(sampdf), ], time = endTime - startTime)
54 | }
55 | 


--------------------------------------------------------------------------------
/lme/qsub/bbvb.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N bbvb_lme
 3 | #$ -l mf=16G
 4 | #$ -pe smp 2
 5 | #$ -l h_rt=320:00:00
 6 | #$ -l s_rt=320:00:00
 7 | #$ -wd /Users/ssrivastva/wasp/lme/code/
 8 | #$ -m a
 9 | #$ -M sanvesh-srivastava@uiowa.edu
10 | #$ -t 1-20
11 | #$ -V
12 | #$ -e /Users/ssrivastva/err/
13 | #$ -o /Users/ssrivastva/out/
14 | 
15 | module load R/3.3.0
16 | 
17 | R CMD BATCH --no-save --no-restore "--args 11 $SGE_TASK_ID" submit.R vb/bbvb_$SGE_TASK_ID.rout
18 | 


--------------------------------------------------------------------------------
/lme/qsub/comp10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp10_lme
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/lme/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-200
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 3 $SGE_TASK_ID" submit.R comp/comp10_$SGE_TASK_ID.rout
17 | 
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/lme/qsub/comp20.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp20_lme
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/lme/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-400
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 3 $SGE_TASK_ID" submit.R comp/comp20_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/lme/qsub/comp_cov10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp_cov10
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=310:00:00
 5 | #$ -l s_rt=310:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/lme/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-20
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 8 $SGE_TASK_ID" submit.R comp/cov10_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/lme/qsub/comp_cov20.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp_cov20
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/lme/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-20
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 9 $SGE_TASK_ID" submit.R comp/cov20_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/lme/qsub/comp_marg10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp_marg10
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/lme/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-20
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 6 $SGE_TASK_ID" submit.R comp/marg10_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/lme/qsub/comp_marg20.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp_marg20
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/lme/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-20
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 7 $SGE_TASK_ID" submit.R comp/marg20_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/lme/qsub/mcmc.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N full_lme
 3 | #$ -l mf=64G
 4 | #$ -pe smp 2
 5 | #$ -l h_rt=320:00:00
 6 | #$ -l s_rt=320:00:00
 7 | #$ -wd /Users/ssrivastva/wasp/lme/code/
 8 | #$ -m a
 9 | #$ -M sanvesh-srivastava@uiowa.edu
10 | #$ -t 1-20
11 | #$ -V
12 | #$ -e /Users/ssrivastva/err/
13 | #$ -o /Users/ssrivastva/out/
14 | 
15 | module load R/3.3.0
16 | 
17 | R CMD BATCH --no-save --no-restore "--args 1 $SGE_TASK_ID" submit.R mcmc/mcmc_$SGE_TASK_ID.rout
18 | 


--------------------------------------------------------------------------------
/lme/qsub/vb.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N vb_lme
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/lme/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-20
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 10 $SGE_TASK_ID" submit.R vb/vb_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/lme/qsub/wasp10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N wasp10_lme
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/lme/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-200
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 4 $SGE_TASK_ID" submit.R wasp/wasp10_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/lme/qsub/wasp20.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N wasp20_lme
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/lme/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-400
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 5 $SGE_TASK_ID" submit.R wasp/wasp20_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/lme/qsub/wasp_cov10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N wasp_cov10
 3 | #$ -pe 16cpn 32
 4 | #$ -l h_rt=310:00:00
 5 | #$ -l s_rt=310:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/lme/code
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-10
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load gurobi/6.5.1
15 | 
16 | module load matlab/R2015b
17 | 
18 | matlab -nojvm -nodisplay -r "calc_wasp_cov_2d_k10($SGE_TASK_ID, 10, 3, '/Shared/ssrivastva/wasp/lme/result/wasp/')"
19 | 
20 | 


--------------------------------------------------------------------------------
/lme/qsub/wasp_cov20.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N wasp_cov20
 3 | #$ -pe 16cpn 32
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/lme/code
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-10
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load gurobi/6.5.1
15 | 
16 | module load matlab/R2015b
17 | 
18 | matlab -nojvm -nodisplay -r "calc_wasp_cov_2d_k20($SGE_TASK_ID, 20, 3, '/Shared/ssrivastva/wasp/lme/result/wasp/')"
19 | 
20 | 


--------------------------------------------------------------------------------
/mixtures/code/analyze_result.R:
--------------------------------------------------------------------------------
  1 | rm(list = ls())
  2 | setwd("/Shared/ssrivastva/wasp/mixtures/result/")
  3 | 
  4 | library(KernSmooth)
  5 | library(matrixStats)
  6 | library(xtable)
  7 | 
  8 | res <- list()
  9 | for (cc in 1:10) {
 10 |     res[[cc]] <- readRDS(paste0("/Shared/ssrivastva/wasp/mixtures/result/full/res_", cc, "_100k.rds"))
 11 | }
 12 | 
 13 | ordMat <- matrix(NA, 10, 2)
 14 | for (cc in 1:10) {
 15 |     if (all(rowMeans(res[[cc]]$mu[1, , ]) < rowMeans(res[[cc]]$mu[2, , ]))) {
 16 |         ordMat[cc, ] <- c(1, 2)
 17 |     } else {
 18 |         ordMat[cc, ] <- c(2, 1)
 19 |     }
 20 | }
 21 | 
 22 | fullCorr <- list()
 23 | for (cc in 1:10) {
 24 |     fullCorr[[cc]] <- list(numeric(1000), numeric(1000))
 25 |     for (ss in 1:1000) {
 26 |         if (all(ordMat[cc, ] == 1:2)) {
 27 |             fullCorr[[cc]][[1]][ss] <- cov2cor(res[[cc]]$cov[1, , , ss])[1, 2]
 28 |             fullCorr[[cc]][[2]][ss] <- cov2cor(res[[cc]]$cov[2, , , ss])[1, 2]
 29 |         } else {
 30 |             fullCorr[[cc]][[2]][ss] <- cov2cor(res[[cc]]$cov[1, , , ss])[1, 2]
 31 |             fullCorr[[cc]][[1]][ss] <- cov2cor(res[[cc]]$cov[2, , , ss])[1, 2]
 32 |         }
 33 |     }
 34 | }
 35 | 
 36 | muList <- list()
 37 | for (cc in 1:10) {
 38 |     muList[[cc]] <- list()
 39 |     if (all(ordMat[cc, ] == 1:2)) {
 40 |         muList[[cc]][[1]] <- t(res[[cc]]$mu[1, , ])
 41 |         muList[[cc]][[2]] <- t(res[[cc]]$mu[2, , ])
 42 |     } else {
 43 |         muList[[cc]][[2]] <- t(res[[cc]]$mu[1, , ])
 44 |         muList[[cc]][[1]] <- t(res[[cc]]$mu[2, , ])
 45 |     }
 46 | }
 47 | 
 48 | wasp10Mu1 <- list()
 49 | for (cc in 1:10) {
 50 |     dat <- read.csv(paste0("sub10/wasp_cv_", cc, "_mu1_k10.csv"), header = FALSE)
 51 |     colnames(dat) <- c("dim1", "dim2", "prob")
 52 |     dat$prob[dat$prob < 1e-10] <- 0
 53 |     wasp10Mu1[[cc]] <- dat[sample(1:nrow(dat), size = 10000, prob = dat$prob, replace = TRUE), 1:2]
 54 | }
 55 | 
 56 | wasp10Mu2 <- list()
 57 | for (cc in 1:10) {
 58 |     dat <- read.csv(paste0("sub10/wasp_cv_", cc, "_mu2_k10.csv"), header = FALSE)
 59 |     colnames(dat) <- c("dim1", "dim2", "prob")
 60 |     dat$prob[dat$prob < 1e-10] <- 0
 61 |     wasp10Mu2[[cc]] <- dat[sample(1:nrow(dat), size = 10000, prob = dat$prob, replace = TRUE), 1:2]
 62 | }
 63 | 
 64 | wasp5Mu1 <- list()
 65 | for (cc in 1:10) {
 66 |     dat <- read.csv(paste0("sub5/wasp_cv_", cc, "_mu1_k5.csv"), header = FALSE)
 67 |     colnames(dat) <- c("dim1", "dim2", "prob")
 68 |     dat$prob[dat$prob < 1e-10] <- 0
 69 |     wasp5Mu1[[cc]] <- dat[sample(1:nrow(dat), size = 10000, prob = dat$prob, replace = TRUE), 1:2]
 70 | }
 71 | 
 72 | wasp5Mu2 <- list()
 73 | for (cc in 1:10) {
 74 |     dat <- read.csv(paste0("sub5/wasp_cv_", cc, "_mu2_k5.csv"), header = FALSE)
 75 |     colnames(dat) <- c("dim1", "dim2", "prob")
 76 |     dat$prob[dat$prob < 1e-10] <- 0
 77 |     wasp5Mu2[[cc]] <- dat[sample(1:nrow(dat), size = 10000, prob = dat$prob, replace = TRUE), 1:2]
 78 | }
 79 | 
 80 | accMu <- array(0.0, dim = c(10, 2, 2))
 81 | dimnames(accMu) <- list(c(paste0("cv", 1:10)),
 82 |                          c("k5", "k10"),
 83 |                          c("mu1", "mu2")
 84 |                         )
 85 | 
 86 | for (cc in 1:10) {
 87 |         waspDens51 <- bkde2D(wasp5Mu1[[cc]], bandwidth = c(0.01, 0.01), gridsize = c(500, 500),
 88 |                             range.x =  list(c(0.95, 1.05), c(1.95, 2.05)))
 89 |         waspDens52 <- bkde2D(wasp5Mu2[[cc]], bandwidth = c(0.01, 0.01), gridsize = c(500, 500),
 90 |                              range.x =  list(c(6.95, 7.05), c(7.95, 8.05)))
 91 |         waspDens101 <- bkde2D(wasp10Mu1[[cc]], bandwidth = c(0.01, 0.01), gridsize = c(500, 500),
 92 |                             range.x =  list(c(0.95, 1.05), c(1.95, 2.05)))
 93 |         waspDens102 <- bkde2D(wasp10Mu2[[cc]], bandwidth = c(0.01, 0.01), gridsize = c(500, 500),
 94 |                             range.x =  list(c(6.95, 7.05), c(7.95, 8.05)))
 95 |         fullDens1 <- bkde2D(muList[[cc]][[1]], bandwidth = c(0.01, 0.01), gridsize = c(500, 500),
 96 |                             range.x =  list(c(0.95, 1.05), c(1.95, 2.05)))
 97 |         fullDens2 <- bkde2D(muList[[cc]][[2]], bandwidth = c(0.01, 0.01), gridsize = c(500, 500),
 98 |                             range.x =  list(c(6.95, 7.05), c(7.95, 8.05)))
 99 |         accMu[cc, 1, 1] <- 1- sum(abs(fullDens1$fhat - waspDens51$fhat) * diff(fullDens1$x1)[1] * diff(fullDens1$x2)[1]) / 2
100 |         accMu[cc, 1, 2] <- 1- sum(abs(fullDens2$fhat - waspDens52$fhat) * diff(fullDens2$x1)[1] * diff(fullDens2$x2)[1]) / 2
101 |         accMu[cc, 2, 1] <- 1- sum(abs(fullDens1$fhat - waspDens101$fhat) * diff(fullDens1$x1)[1] * diff(fullDens1$x2)[1]) / 2
102 |         accMu[cc, 2, 2] <- 1- sum(abs(fullDens2$fhat - waspDens102$fhat) * diff(fullDens2$x1)[1] * diff(fullDens2$x2)[1]) / 2
103 | }
104 | 
105 | saveRDS(accMu, "~/wasp/mixtures/result/accMu.rds")
106 | 
107 | ## see create_samples.R file for their description and structure.
108 | fullRho <- readRDS("~/wasp/mixtures/result/corrList.rds")
109 | rho5 <- readRDS("~/wasp/mixtures/result/subRhoList5.rds")
110 | rho10 <- readRDS( "~/wasp/mixtures/result/subRhoList10.rds")
111 | vbRho1 <- readRDS( "/Shared/ssrivastva/wasp/mixtures/result/vbRho1.rds")
112 | vbRho2 <- readRDS( "/Shared/ssrivastva/wasp/mixtures/result/vbRho2.rds")
113 | 
114 | scotRho <- list()
115 | for (cc in 1:10) {
116 |     scotRho[[cc]] <- list()
117 |     dat1 <- readRDS(paste0("cons/cons_rho_cv_", cc, "_k5.rds"))
118 |     dat2 <- readRDS(paste0("cons/cons_rho_cv_", cc, "_k10.rds"))
119 |     scotRho[[cc]] <- list("k5" = dat1$dens, "k10" = dat2$dens)
120 | }
121 | 
122 | xingRho <- list()
123 | for (cc in 1:10) {
124 |     dat1 <- readRDS(paste0("xing/xing_rho_cv_", cc, "_k5.rds"))
125 |     dat2 <- readRDS(paste0("xing/xing_rho_cv_", cc, "_k10.rds"))
126 |     xingRho[[cc]] <- list("k5" = dat1$dens, "k10" = dat2$dens)
127 | }
128 | 
129 | waspRho5 <- list(lapply(rho5,
130 |                         function (x) {
131 |                             rowMeans(do.call(cbind, lapply(x, function(y) quantile(y[[1]], probs = seq(0, 1, by = 0.0001)))))
132 |                         }),
133 |                  lapply(rho5,
134 |                         function (x) {
135 |                             rowMeans(do.call(cbind, lapply(x, function(y) quantile(y[[2]], probs = seq(0, 1, by = 0.0001)))))
136 |                         }))
137 | 
138 | waspRho10 <- list(lapply(rho10,
139 |                         function (x) {
140 |                             rowMeans(do.call(cbind, lapply(x, function(y) quantile(y[[1]], probs = seq(0, 1, by = 0.0001)))))
141 |                         }),
142 |                  lapply(rho10,
143 |                         function (x) {
144 |                             rowMeans(do.call(cbind, lapply(x, function(y) quantile(y[[2]], probs = seq(0, 1, by = 0.0001)))))
145 |                         }))
146 | 
147 | accRho <- array(NA, dim = c(10, 4, 2, 2))
148 | dimnames(accRho) <- list(c(paste0("cv", 1:10)),
149 |                          c("cons", "xing", "vb", "wasp"),
150 |                          c("rho1", "rho2"),
151 |                          c("k5", "k10")
152 |                         )
153 | 
154 | for (cc in 1:10) {
155 |     for (rrr in 1:2) {
156 |         rr <- range(c(fullRho[[cc]][[rrr]], waspRho5[[rrr]][[cc]], waspRho10[[rrr]][[cc]],
157 |                       as.numeric(xingRho[[cc]][[1]][ , rrr]), as.numeric(xingRho[[cc]][[2]][ , rrr]),
158 |                       as.numeric(scotRho[[cc]][[1]][ , rrr]), as.numeric(scotRho[[cc]][[2]][ , rrr]),
159 |                       as.numeric(vbRho1[[cc]]), as.numeric(vbRho2[[cc]])
160 |                       ))
161 |         fdens <- bkde(fullRho[[cc]][[rrr]], bandwidth = dpik(fullRho[[cc]][[rrr]]), range.x = rr)
162 |         wdens1 <- bkde(waspRho5[[rrr]][[cc]], bandwidth = dpik(waspRho5[[rrr]][[cc]]), range.x = rr)
163 |         wdens2 <- bkde(waspRho10[[rrr]][[cc]], bandwidth = dpik(waspRho10[[rrr]][[cc]]), range.x = rr)
164 |         xdens1 <- bkde(xingRho[[cc]][[1]][ , rrr], bandwidth = dpik(xingRho[[cc]][[1]][ , rrr]), range.x = rr)
165 |         xdens2 <- bkde(xingRho[[cc]][[2]][ , rrr], bandwidth = dpik(xingRho[[cc]][[2]][ , rrr]), range.x = rr)
166 |         cdens1 <- bkde(scotRho[[cc]][[1]][ , rrr], bandwidth = dpik(scotRho[[cc]][[1]][ , rrr]), range.x = rr)
167 |         cdens2 <- bkde(scotRho[[cc]][[2]][ , rrr], bandwidth = dpik(scotRho[[cc]][[2]][ , rrr]), range.x = rr)
168 |         if (rrr == 1) {
169 |             vdens <- bkde(vbRho1[[cc]], bandwidth = dpik(vbRho1[[cc]]), range.x = rr)
170 |         } else {
171 |             vdens <- bkde(vbRho2[[cc]], bandwidth = dpik(vbRho2[[cc]]), range.x = rr)
172 |         }
173 |         accRho[cc, "wasp", rrr, 1] <- 1- sum(abs(fdens$y - wdens1$y) * diff(fdens$x)[1]) / 2
174 |         accRho[cc, "wasp", rrr, 2] <- 1- sum(abs(fdens$y - wdens2$y) * diff(fdens$x)[1]) / 2
175 |         accRho[cc, "xing", rrr, 1] <- 1- sum(abs(fdens$y - xdens1$y) * diff(fdens$x)[1]) / 2
176 |         accRho[cc, "xing", rrr, 2] <- 1- sum(abs(fdens$y - xdens2$y) * diff(fdens$x)[1]) / 2
177 |         accRho[cc, "cons", rrr, 1] <- 1- sum(abs(fdens$y - cdens1$y) * diff(fdens$x)[1]) / 2
178 |         accRho[cc, "cons", rrr, 2] <- 1- sum(abs(fdens$y - cdens2$y) * diff(fdens$x)[1]) / 2
179 |         accRho[cc, "vb", rrr, 1] <- 1- sum(abs(fdens$y - vdens$y) * diff(fdens$x)[1]) / 2
180 |         accRho[cc, "vb", rrr, 2] <- 1- sum(abs(fdens$y - vdens$y) * diff(fdens$x)[1]) / 2
181 |     }
182 | }
183 | 
184 | saveRDS(accRho, "~/wasp/mixtures/result/accRho.rds")
185 | 
186 | rtbl <- rbind(
187 |     c(paste0(format(round(colMeans(accRho[ , "cons", , 1]), 2), nsmall = 2),
188 |              " (", format(round(colSds(accRho[ , "cons", , 1]), 2), nsmall = 2), ")"),
189 |       paste0(format(round(colMeans(accRho[ , "cons", , 2]), 2), nsmall = 2),
190 |              " (", format(round(colSds(accRho[ , "cons", , 2]), 2), nsmall = 2), ")")
191 |       ),
192 |     c(paste0(format(round(colMeans(accRho[ , "xing", , 1]), 2), nsmall = 2),
193 |              " (", format(round(colSds(accRho[ , "xing", , 1]), 2), nsmall = 2), ")"),
194 |       paste0(format(round(colMeans(accRho[ , "xing", , 2]), 2), nsmall = 2),
195 |              " (", format(round(colSds(accRho[ , "xing", , 2]), 2), nsmall = 2), ")")
196 |       ),
197 |     c(paste0(format(round(colMeans(accRho[ , "vb", , 1]), 2), nsmall = 2),
198 |              " (", format(round(colSds(accRho[ , "vb", , 1]), 2), nsmall = 2), ")"),
199 |       paste0(format(round(colMeans(accRho[ , "vb", , 2]), 2), nsmall = 2),
200 |              " (", format(round(colSds(accRho[ , "vb", , 2]), 2), nsmall = 2), ")")
201 |       ),
202 |     c(paste0(format(round(colMeans(accRho[ , "wasp", , 1]), 2), nsmall = 2),
203 |              " (", format(round(colSds(accRho[ , "wasp", , 1]), 2), nsmall = 2), ")"),
204 |       paste0(format(round(colMeans(accRho[ , "wasp", , 2]), 2), nsmall = 2),
205 |              " (", format(round(colSds(accRho[ , "wasp", , 2]), 2), nsmall = 2), ")")
206 |       )
207 | )
208 | 
209 | rownames(rtbl) <- c("cons", "xing", "vb", "wasp")
210 | colnames(rtbl) <- c("rho1-k5", "rho2-k5", "rho1-k10", "rho2-k10")
211 | 
212 | xtable(rtbl[, c(1, 3, 2, 4)])
213 | xtable(resTbl)
214 | 
215 | ### f-estimation ###
216 | 
217 | rm(list = ls())
218 | 
219 | setwd("/Shared/ssrivastva/wasp/mixtures/result/")
220 | 
221 | mcmcDens <- readRDS("mcmcDensList.rds")
222 | vbDens <- readRDS("vbDensList.rds")
223 | waspDens10 <- readRDS("waspDensList_k10.rds")
224 | waspDens5 <- readRDS("waspDensList_k5.rds")
225 | 
226 | waspDens <- list()
227 | for (cc in 1:10) {
228 |     waspDens[[cc]] <- list()
229 |     densMat <- matrix(NA, 2000, 500)
230 |     for (ss in 1:500) {
231 |         densMat[, ss] <- rowMeans(do.call(cbind, lapply(lapply(waspDens5[[cc]], function(x) x[ , ss]),
232 |                                                          function(y) quantile(y, seq(0, 1, length = 2000)))))
233 |     }
234 |     waspDens[[cc]][[1]] <- densMat
235 |     densMat <- matrix(NA, 2000, 500)
236 |     for (ss in 1:500) {
237 |         densMat[, ss] <- rowMeans(do.call(cbind, lapply(lapply(waspDens10[[cc]], function(x) x[ , ss]),
238 |                                                          function(y) quantile(y, seq(0, 1, length = 2000)))))
239 |     }
240 |     waspDens[[cc]][[2]] <- densMat
241 | }
242 | 
243 | scotDens <- list()
244 | for (cc in 1:10) {
245 |     scotDens[[cc]] <- list()
246 |     dat1 <- readRDS(paste0("cons/cons_dens_cv_", cc, "_k5.rds"))
247 |     dat2 <- readRDS(paste0("cons/cons_dens_cv_", cc, "_k10.rds"))
248 |     scotDens[[cc]] <- list("k5" = dat1$dens, "k10" = dat2$dens)
249 | }
250 | 
251 | xingDens <- list()
252 | for (cc in 1:10) {
253 |     dat1 <- readRDS(paste0("xing/xing_dens_cv_", cc, "_k5.rds"))
254 |     dat2 <- readRDS(paste0("xing/xing_dens_cv_", cc, "_k10.rds"))
255 |     xingDens[[cc]] <- list("k5" = dat1$dens, "k10" = dat2$dens)
256 | }
257 | 
258 | accMix <- array(NA, dim = c(10, 3, 4, 2))
259 | dimnames(accMix) <- list(paste0("cv", 1:10),
260 |                          c("cons", "vb", "wasp"),
261 |                          c("2.5", "5", "90", "95"),
262 |                          c("k=5", "k=10")
263 |                          )
264 | 
265 | xx <- seq(0, 10, length = 500)
266 | for (cc in 1:10) {
267 |     mf <- colQuantiles(mcmcDens[[cc]], probs = c(0.025, 0.05, 0.95, 0.975))
268 |     wf5 <- colQuantiles(waspDens[[cc]][[1]], probs = c(0.025, 0.05, 0.95, 0.975))
269 |     wf10 <- colQuantiles(waspDens[[cc]][[2]], probs = c(0.025, 0.05, 0.95, 0.975))
270 |     cf5 <- colQuantiles(scotDens[[cc]][[1]], probs = c(0.025, 0.05, 0.95, 0.975))
271 |     cf10 <- colQuantiles(scotDens[[cc]][[2]], probs = c(0.025, 0.05, 0.95, 0.975))
272 |     vf5 <- colQuantiles(vbDens[[cc]], probs = c(0.025, 0.05, 0.95, 0.975))
273 |     vf10 <- colQuantiles(vbDens[[cc]], probs = c(0.025, 0.05, 0.95, 0.975))
274 |     for (dd in 1:4) {
275 |         accMix[cc, "cons", dd, 1] <- 1 - sum(diff(xx)[1] * abs(mf - cf5[ , dd])) / 2
276 |         accMix[cc, "cons", dd, 2] <- 1 - sum(diff(xx)[1] * abs(mf - cf10[ , dd])) / 2
277 |         accMix[cc, "vb", dd, 1] <- 1 - sum(diff(xx)[1] * abs(mf - vf5[ , dd])) / 2
278 |         accMix[cc, "vb", dd, 2] <- 1 - sum(diff(xx)[1] * abs(mf - vf10[ , dd])) / 2
279 |         accMix[cc, "wasp", dd, 1] <- 1 - sum(diff(xx)[1] * abs(mf - wf5[ , dd])) / 2
280 |         accMix[cc, "wasp", dd, 2] <- 1 - sum(diff(xx)[1] * abs(mf - wf10[ , dd])) / 2
281 |     }
282 | }
283 | 
284 | rbind(
285 |     c(paste0(format(round(colMeans(accMix[ , "cons", , 1]), 2), nsmall = 2),
286 |              " (", format(round(colSds(accMix[ , "cons", , 1]), 2), nsmall = 2), ")"),
287 |       paste0(format(round(colMeans(accMix[ , "cons", , 2]), 2), nsmall = 2),
288 |              " (", format(round(colSds(accMix[ , "cons", , 2]), 2), nsmall = 2), ")")
289 |       ),
290 |     c(paste0(format(round(colMeans(accMix[ , "vb", , 1]), 2), nsmall = 2),
291 |              " (", format(round(colSds(accMix[ , "vb", , 1]), 2), nsmall = 2), ")"),
292 |       paste0(format(round(colMeans(accMix[ , "vb", , 2]), 2), nsmall = 2),
293 |              " (", format(round(colSds(accMix[ , "vb", , 2]), 2), nsmall = 2), ")")
294 |       ),
295 |     c(paste0(format(round(colMeans(accMix[ , "wasp", , 1]), 2), nsmall = 2),
296 |              " (", format(round(colSds(accMix[ , "wasp", , 1]), 2), nsmall = 2), ")"),
297 |       paste0(format(round(colMeans(accMix[ , "wasp", , 2]), 2), nsmall = 2),
298 |              " (", format(round(colSds(accMix[ , "wasp", , 2]), 2), nsmall = 2), ")")
299 |       )
300 | )
301 | 


--------------------------------------------------------------------------------
/mixtures/code/calc_wasp10.m:
--------------------------------------------------------------------------------
  1 | function calc_wasp10(dd, nsub, ndim)
  2 | 
  3 | if ndim == 1
  4 |     chr = 'mu1';
  5 | else 
  6 |     chr = 'mu2';
  7 | end
  8 | 
  9 | addpath('/opt/gurobi/6.5.1/linux64/matlab/');
 10 | 
 11 | margMat = {};
 12 | for kk = 1:nsub 
 13 |     margMat{kk} = csvread(strcat('/Shared/ssrivastva/wasp/mixtures/result/sub10/samp/csv/samp_cv_', ...
 14 |                                  num2str(dd), '_nsub_', num2str(kk), '_k10_', chr, '.csv'));
 15 | end
 16 | runtime = 0;
 17 | 
 18 | % calculate the pair-wise sq. euclidean distance between the atoms of subset
 19 | % posteriors and WASP atoms
 20 | subsetPost = {};    
 21 | for kk = 1:nsub
 22 |     subsetPost{kk} = margMat{kk}(randi([1 1000], 100, 1), :);
 23 | end         
 24 | 
 25 | lbd1 = min(cellfun(@(x) x(1), cellfun(@(x) min(x), subsetPost,'UniformOutput', false)));
 26 | lbd2 = min(cellfun(@(x) x(2), cellfun(@(x) min(x), subsetPost,'UniformOutput', false)));   
 27 | ubd1 = max(cellfun(@(x) x(1), cellfun(@(x) max(x), subsetPost,'UniformOutput', false)));
 28 | ubd2 = max(cellfun(@(x) x(2), cellfun(@(x) max(x), subsetPost,'UniformOutput', false)));   
 29 | 
 30 | [opostx, oposty] = meshgrid(linspace(lbd1, ubd1, 60), linspace(lbd2, ubd2, 60));
 31 | overallPost = [opostx(:) oposty(:)];
 32 | 
 33 | distMatCell = {};
 34 | m00 = diag(overallPost * overallPost');
 35 | for ii = 1:nsub
 36 |     mm = diag(subsetPost{ii} * subsetPost{ii}');    
 37 |     mm1 = overallPost * subsetPost{ii}'; 
 38 |     distMatCell{ii} = bsxfun(@plus, bsxfun(@plus, -2 * mm1, mm'), m00);    
 39 | end
 40 | 
 41 | % constants
 42 | K  = nsub;
 43 | Ni = cell2mat(cellfun(@(x) size(x, 2), distMatCell, 'UniformOutput', false));
 44 | N  = size(overallPost, 1);
 45 | nx = N * (N+1);
 46 | mx = K * N + N + 1;
 47 | In = eye(N);
 48 | En = ones(1, N);
 49 | 
 50 | % Generate matrix A0.
 51 | A0  = sparse([]);
 52 | for p = 1:K
 53 |     cc = (1:N)';                  % terribly fast version of 
 54 |     idx = cc(:, ones(Ni(p), 1));  % repmat(In, 1, Ni(p)) / Ni(p)
 55 |     Rp  = In(:, idx(:)) / Ni(p);  % in 3 steps
 56 |     A0  = blkdiag(A0, Rp); 
 57 | end
 58 | cc = (1:N)';                  % terribly fast version of 
 59 | idx = cc(:, ones(K, 1));      % repmat(-In, K, 1) 
 60 | A00  = -In(idx(:), :);        % in 3 steps
 61 | 
 62 | A0 = sparse([A00, A0]);
 63 | b0 = zeros(size(A0, 1), 1);
 64 | disp('done generating A ...');        
 65 | 
 66 | % Generate matrix B from simplex constraints.
 67 | B = sparse([]);
 68 | for p = 0:(sum(Ni))
 69 |     B = blkdiag(B, En);
 70 | end
 71 | disp('done generating B ...');        
 72 | 
 73 | % The hold matrix C.
 74 | A = sparse([A0; B]);
 75 | 
 76 | % Generate the right hand size vector b.
 77 | b = sparse([zeros(K * N, 1); ones(sum(Ni) + 1, 1)]);
 78 | 
 79 | % Generate the cost vector
 80 | costCell = cellfun(@(x) x(:) / size(x, 2), distMatCell, 'UniformOutput', false);
 81 | costVec = [zeros(size(overallPost, 1), 1); cell2mat(costCell(:))];
 82 | 
 83 | c = sparse(costVec);
 84 | tic;
 85 | lpsol = callLpSolver('gurobi', A, b, c, 10000, 1e-10);
 86 | runtime = toc;
 87 | 
 88 | [tmats, avec] = recoverSolution(lpsol, K, N, Ni);
 89 | 
 90 | save(strcat('/Shared/ssrivastva/wasp/mixtures/result/sub10/res_cv_', num2str(dd), ...
 91 |             '_', chr, '_k10.mat'), 'runtime', 'tmats','avec', 'subsetPost', 'overallPost');
 92 | summ = [overallPost avec];
 93 | csvwrite(strcat('/Shared/ssrivastva/wasp/mixtures/result/sub10/wasp_cv_', num2str(dd), ...
 94 |                 '_', chr, '_k10.csv'), summ);
 95 | csvwrite(strcat('/Shared/ssrivastva/wasp/mixtures/result/sub10/overall_cv_', num2str(dd), ...
 96 |                 '_', chr, '_k10.csv'), overallPost);
 97 | csvwrite(strcat('/Shared/ssrivastva/wasp/mixtures/result/sub10/time_cv_', num2str(dd), ...
 98 |                 '_', chr, '_k10.csv'), runtime);    
 99 | quit
100 | 


--------------------------------------------------------------------------------
/mixtures/code/calc_wasp5.m:
--------------------------------------------------------------------------------
  1 | function calc_wasp5(dd, nsub, ndim)
  2 | 
  3 | if ndim == 1
  4 |     chr = 'mu1';
  5 | else 
  6 |     chr = 'mu2';
  7 | end
  8 | 
  9 | addpath('/opt/gurobi/6.5.1/linux64/matlab/');
 10 | 
 11 | margMat = {};
 12 | for kk = 1:nsub 
 13 |     margMat{kk} = csvread(strcat('/Shared/ssrivastva/wasp/mixtures/result/sub5/samp/csv/samp_cv_', ...
 14 |                                  num2str(dd), '_nsub_', num2str(kk), '_k5_', chr, '.csv'));
 15 | end
 16 | runtime = 0;
 17 | 
 18 | % calculate the pair-wise sq. euclidean distance between the atoms of subset
 19 | % posteriors and WASP atoms
 20 | subsetPost = {};    
 21 | for kk = 1:nsub
 22 |     subsetPost{kk} = margMat{kk}(randi([1 1000], 100, 1), :);
 23 | end         
 24 | 
 25 | lbd1 = min(cellfun(@(x) x(1), cellfun(@(x) min(x), subsetPost,'UniformOutput', false)));
 26 | lbd2 = min(cellfun(@(x) x(2), cellfun(@(x) min(x), subsetPost,'UniformOutput', false)));   
 27 | ubd1 = max(cellfun(@(x) x(1), cellfun(@(x) max(x), subsetPost,'UniformOutput', false)));
 28 | ubd2 = max(cellfun(@(x) x(2), cellfun(@(x) max(x), subsetPost,'UniformOutput', false)));   
 29 | 
 30 | [opostx, oposty] = meshgrid(linspace(lbd1, ubd1, 60), linspace(lbd2, ubd2, 60));
 31 | overallPost = [opostx(:) oposty(:)];
 32 | 
 33 | distMatCell = {};
 34 | m00 = diag(overallPost * overallPost');
 35 | for ii = 1:nsub
 36 |     mm = diag(subsetPost{ii} * subsetPost{ii}');    
 37 |     mm1 = overallPost * subsetPost{ii}'; 
 38 |     distMatCell{ii} = bsxfun(@plus, bsxfun(@plus, -2 * mm1, mm'), m00);    
 39 | end
 40 | 
 41 | % constants
 42 | K  = nsub;
 43 | Ni = cell2mat(cellfun(@(x) size(x, 2), distMatCell, 'UniformOutput', false));
 44 | N  = size(overallPost, 1);
 45 | nx = N * (N+1);
 46 | mx = K * N + N + 1;
 47 | In = eye(N);
 48 | En = ones(1, N);
 49 | 
 50 | % Generate matrix A0.
 51 | A0  = sparse([]);
 52 | for p = 1:K
 53 |     cc = (1:N)';                  % terribly fast version of 
 54 |     idx = cc(:, ones(Ni(p), 1));  % repmat(In, 1, Ni(p)) / Ni(p)
 55 |     Rp  = In(:, idx(:)) / Ni(p);  % in 3 steps
 56 |     A0  = blkdiag(A0, Rp); 
 57 | end
 58 | cc = (1:N)';                  % terribly fast version of 
 59 | idx = cc(:, ones(K, 1));      % repmat(-In, K, 1) 
 60 | A00  = -In(idx(:), :);        % in 3 steps
 61 | 
 62 | A0 = sparse([A00, A0]);
 63 | b0 = zeros(size(A0, 1), 1);
 64 | disp('done generating A ...');        
 65 | 
 66 | % Generate matrix B from simplex constraints.
 67 | B = sparse([]);
 68 | for p = 0:(sum(Ni))
 69 |     B = blkdiag(B, En);
 70 | end
 71 | disp('done generating B ...');        
 72 | 
 73 | % The hold matrix C.
 74 | A = sparse([A0; B]);
 75 | 
 76 | % Generate the right hand size vector b.
 77 | b = sparse([zeros(K * N, 1); ones(sum(Ni) + 1, 1)]);
 78 | 
 79 | % Generate the cost vector
 80 | costCell = cellfun(@(x) x(:) / size(x, 2), distMatCell, 'UniformOutput', false);
 81 | costVec = [zeros(size(overallPost, 1), 1); cell2mat(costCell(:))];
 82 | 
 83 | c = sparse(costVec);
 84 | tic;
 85 | lpsol = callLpSolver('gurobi', A, b, c, 10000, 1e-10);
 86 | runtime = toc;
 87 | 
 88 | [tmats, avec] = recoverSolution(lpsol, K, N, Ni);
 89 | 
 90 | save(strcat('/Shared/ssrivastva/wasp/mixtures/result/sub5/res_cv_', num2str(dd), ...
 91 |             '_', chr, '_k5.mat'), 'runtime', 'tmats','avec', 'subsetPost', 'overallPost');
 92 | summ = [overallPost avec];
 93 | csvwrite(strcat('/Shared/ssrivastva/wasp/mixtures/result/sub5/wasp_cv_', num2str(dd), ...
 94 |                 '_', chr, '_k5.csv'), summ);
 95 | csvwrite(strcat('/Shared/ssrivastva/wasp/mixtures/result/sub5/overall_cv_', num2str(dd), ...
 96 |                 '_', chr, '_k5.csv'), overallPost);
 97 | csvwrite(strcat('/Shared/ssrivastva/wasp/mixtures/result/sub5/time_cv_', num2str(dd), ...
 98 |                 '_', chr, '_k5.csv'), runtime);    
 99 | quit
100 | 


--------------------------------------------------------------------------------
/mixtures/code/callLpSolver.m:
--------------------------------------------------------------------------------
  1 | % FUNCTION:  [xsol, output] = callLpSolver(solver, Amat, bvec, ...
  2 | %                                          cvec, maxiters, tolx)
  3 | % PURPOSE: Call the Lp solver to solve the LP problem.
  4 | % 
  5 | %
  6 | function [xsol, output] = callLpSolver(solver, Amat, bvec, cvec, maxiters, tolx)
  7 | 
  8 | % Check the inputs.
  9 | if nargin < 3, error('At least three inputs are required!'); end
 10 | if nargin < 5, tolx = 1e-4; end
 11 | if nargin < 4, maxiters = 1000; end
 12 | if isempty(tolx), tolx = 1e-4; end
 13 | if isempty(maxiters), maxiters = 1000; end
 14 | if size(Amat, 1) ~= length(bvec), error('Inputs are inconsistent!'); end
 15 | if size(Amat, 2) ~= length(cvec), error('Inputs are inconsistent!'); end
 16 | xsol = []; output = [];
 17 | nx   = length(cvec);
 18 | 
 19 | %% Call the SeDuMi solver.
 20 | if strcmpi(solver, 'sedumi')
 21 |     pars.maxiter = maxiters;
 22 |     pars.eps     = tolx;
 23 |     Cones.l      = length(cvec);
 24 |     time3        = tic;
 25 |     [x_sedumi, y_sedumi, info] = sedumi(Amat, bvec, cvec, Cones, pars);
 26 |     x_sedumi = full(x_sedumi);
 27 |     xsol         = x_sedumi;
 28 |     output.time3        = toc(time3);
 29 |     output.info  = info;
 30 |     output.dual_sol = y_sedumi;
 31 | end
 32 | 
 33 | %% Call the Matlab LINPROG solver.
 34 | if strcmpi(solver, 'linprog')
 35 |     time5       = tic;
 36 |     opts        = optimset('Algorithm', 'interior-point', ...
 37 |                            'Display', 'iter', ...
 38 |                            'MaxIter', maxiters, 'TolX', tolx, 'TolFun', tolx);
 39 |     [xsol, fx2] = linprog(cvec, [], [], Amat, bvec, zeros(nx, 1), [], [], opts);
 40 |     output.time = toc(time5);
 41 |     output.fx   = fx2;
 42 | end
 43 | 
 44 | %% Call the SDPT3 solver.
 45 | if strcmpi(solver, 'sdpt3')
 46 |     Asdpt3 = spconvert(Amat);
 47 |     blk{1, 1} = 'l';
 48 |     blk{1, 2} = ones(1, nx);
 49 |     sdpt3opt  = struct('gaptol', tolx, 'maxit', maxiters);
 50 |     time4     = tic;
 51 |     %[X0, y0, Z0] = infeaspt(blk, Asdpt3, cvec, bvec); 
 52 |     %[fx_sdpt3, x_sdpt3, y_sdpt3, Z_sdpt3] = sqlp(blk, Asdpt3, cvec, bvec, sdpt3opt, X0, y0, Z0);
 53 |     [fx_sdpt3, x_sdpt3, y_sdpt3, Z_sdpt3] = sqlp(blk, Asdpt3, cvec, bvec, sdpt3opt);
 54 |     output.time4     = toc(time4);
 55 |     x_sdpt3   = x_sdpt3{:};
 56 |     xsol      = x_sdpt3;
 57 |     output.dual_sol = y_sdpt3;
 58 |     output.slacks   = Z_sdpt3;
 59 |     output.fx       = fx_sdpt3;
 60 | end    
 61 | 
 62 | %% Call Our Decopt solver.
 63 | if strcmpi(solver, 'decopt')
 64 |     
 65 |     % Set the parameters.
 66 |     param.MaxIters      = maxiters;
 67 |     param.Verbosity     = 2;
 68 |     param.RelTolX       = tolx;
 69 |     param.saveHistMode  = 0;
 70 |     param.Algorithm     = 3;
 71 |     param.InnerMaxIters = 20;
 72 |     param.adaptStepSize = 0;
 73 | 
 74 |     % Call the solver.
 75 |     proxLpPos    = @(x, gamma)( min( max(0, x - gamma*cvec), 1.0) );
 76 | 
 77 |     % User-define proximal-functions.
 78 |     proxOpers{1} = @(x, gamma, varargin)(proxLpPos(x, gamma));
 79 |     proxOpers{2} = @(x, gamma, varargin)(projL2norm(x, 1e-12));
 80 | 
 81 |     proxOpers{3} = @(x, varargin)( cvec'*x );
 82 |     proxOpers{4} = @(x, varargin)(0);
 83 |     
 84 |     % Generate an initial point.
 85 |     x0       = zeros(nx, 1);
 86 |     
 87 |     %% Call the solver with user-define prox-functions.
 88 |     time1 = tic;
 89 |     [xsol, out] = decoptSolver('UserDef', Amat, bvec, param, 'x0', x0, 'Prox', proxOpers, 'GammaFactor', 1.1);
 90 |     output.time = toc(time1);
 91 |     output.info = out;
 92 | 
 93 | end
 94 | 
 95 | %% Call the Gurobi solver.
 96 | if strcmpi(solver, 'gurobi')
 97 |     
 98 |     % Generate the LP model.
 99 |     time_g            = tic;
100 |     model.A           = Amat;
101 |     model.obj         = full(cvec);
102 |     model.rhs         = full(bvec);
103 |     model.modelsense  = 'min';
104 |     model.sense       = '=';
105 |     
106 |     % Define the parameters.
107 |     param.method      = 2;
108 |     param.Presolve    = 2;
109 |     param.Crossover   = 0;
110 |     param.outputflag  = 1;
111 |     
112 |     % Call the solver.
113 |     result            = gurobi(model, param);
114 |     
115 |     % Obtain the final results.
116 |     output.result     = result;
117 |     output.time       = toc(time_g);
118 |     xsol              = result.x;
119 |     
120 | end
121 | 
122 | 


--------------------------------------------------------------------------------
/mixtures/code/comp_sampler.R:
--------------------------------------------------------------------------------
 1 | mvnCompMix <- function (dataMat, ncomp = 2, nrep, niter = 10000, nburn = 5000, nthin = 5) {
 2 |     library(matrixStats)
 3 |     library(mvtnorm)
 4 |     library(MCMCpack)
 5 | 
 6 |     nobs <- nrow(dataMat)
 7 |     ndim <- ncol(dataMat)
 8 |     alpha <- rep(1 / ncomp, ncomp)
 9 | 
10 |     probs <- as.numeric(rdirichlet(1, alpha))
11 |     zMat <- rmultinom(nobs, 1, probs)
12 |     densMat <- matrix(0.0, nrow = nobs, ncol = ncomp)
13 |     muMat <- matrix(0.0, nrow = ncomp, ncol = ndim)
14 |     sigArr <- aperm(array(diag(1.0, ndim), c(ndim, ndim, ncomp)), perm = c(3, 1, 2))
15 | 
16 |     kap0 <- 0.01; m0 <- rep(0.0, ndim) # mu hyper-pars
17 |     nu0 <- 2; s0 <- 2 * nu0 * diag(1.0, ndim) # sigma hyper-pars
18 | 
19 |     probsSamp <- matrix(0.0, nrow = (niter - nburn) / nthin, ncol = ncomp)
20 |     muMatSamp <- array(0.0, dim = c(ncomp, ndim, (niter - nburn) / nthin))
21 |     sigMatSamp <- array(0.0, dim = c(ncomp, ndim, ndim, (niter - nburn) / nthin))
22 | 
23 |     cts <- 0
24 |     startTime <- proc.time()
25 |     for (ii in 0:niter) {
26 |         idxList <- lapply(split(zMat, row(zMat)),
27 |                           function (x) {
28 |                               which(x == 1)
29 |                           })
30 |         ns <- sapply(idxList, length)
31 |         # sample probs
32 |         probs <- rdirichlet(1, ns + (alpha - 1 + nrep) / nrep)
33 | 
34 |         if (ii %% 100 == 0) {cat("gibbs: ", ii, "\n"); cat("ns: ", ns, "\n")}
35 | 
36 |         for (jj in seq_along(ns)) {
37 |             datMean <- colMeans(dataMat[idxList[[jj]], , drop = FALSE])
38 |             ## mean
39 |             muCov <- sigArr[jj, , ] / (kap0 / nrep + ns[jj])
40 |             muMean <- (kap0 * m0 / nrep + ns[jj] * datMean) / (kap0 / nrep + ns[jj])
41 |             muMat[jj, ] <- rmvnorm(1, mean = muMean, sigma = muCov, method = "chol")
42 |             ## cov
43 |             mat1 <- (kap0 / nrep * ns[jj] /  (kap0 / nrep + ns[jj])) * tcrossprod(datMean - m0, datMean - m0)
44 |             centMat <- dataMat[idxList[[jj]], , drop = FALSE] - matrix(datMean, nrow = length(idxList[[jj]]), ncol = ndim, byrow = TRUE)
45 |             mat2 <- crossprod(centMat, centMat)
46 |             covSclMat <- mat1 + mat2 + s0 / nrep
47 |             covDf <- ns[jj] + (nu0 + ndim + 1) / nrep - (ndim + 1)
48 |             sigArr[jj, , ]<- riwish(covDf, covSclMat)
49 |             densMat[ , jj] <- dmvnorm(dataMat, muMat[jj, ], sigArr[jj, , ], log = TRUE)
50 |         }
51 | 
52 |         lprobs <- densMat + log(matrix(probs, nrow = nobs, ncol = ncol(densMat), byrow = TRUE))
53 |         eprobs <- exp(lprobs - rowMaxs(lprobs)) / rowSums(exp(lprobs - rowMaxs(lprobs)))
54 | 
55 |         for (kk in seq_len(nobs)) {
56 |             ppp <- eprobs[kk, ]
57 |             zMat[ , kk] <- rmultinom(1, 1, ppp)
58 |         }
59 | 
60 |         if ((ii > nburn) && (ii %% nthin == 0)) {
61 |             cts <- cts + 1
62 |             probsSamp[cts, ] <- probs
63 |             muMatSamp[ , , cts] <- muMat
64 |             sigMatSamp[ , , , cts] <- sigArr
65 |         }
66 |     }
67 |     endTime <- proc.time()
68 | 
69 | 
70 |     list(
71 |         'mu' = muMatSamp,
72 |         'cov' = sigMatSamp,
73 |         'prob' = probsSamp,
74 |         'time' = endTime[3] - startTime[3]
75 |     )
76 | }
77 | 


--------------------------------------------------------------------------------
/mixtures/code/create_samples.R:
--------------------------------------------------------------------------------
  1 | ## full
  2 | rm(list = ls())
  3 | 
  4 | res <- list()
  5 | for (cc in 1:10) {
  6 |     res[[cc]] <- readRDS(paste0("/Shared/ssrivastva/wasp/mixtures/result/full/res_", cc, "_100k.rds"))
  7 | }
  8 | 
  9 | ordMat <- matrix(NA, 10, 2)
 10 | for (cc in 1:10) {
 11 |     if (all(rowMeans(res[[cc]]$mu[1, , ]) < rowMeans(res[[cc]]$mu[2, , ]))) {
 12 |         ordMat[cc, ] <- c(1, 2)
 13 |     } else {
 14 |         ordMat[cc, ] <- c(2, 1)
 15 |     }
 16 | }
 17 | 
 18 | fullCorr <- list()
 19 | for (cc in 1:10) {
 20 |     fullCorr[[cc]] <- list(numeric(1000), numeric(1000))
 21 |     for (ss in 1:1000) {
 22 |         if (all(ordMat[cc, ] == 1:2)) {
 23 |             fullCorr[[cc]][[1]][ss] <- cov2cor(res[[cc]]$cov[1, , , ss])[1, 2]
 24 |             fullCorr[[cc]][[2]][ss] <- cov2cor(res[[cc]]$cov[2, , , ss])[1, 2]
 25 |         } else {
 26 |             fullCorr[[cc]][[2]][ss] <- cov2cor(res[[cc]]$cov[1, , , ss])[1, 2]
 27 |             fullCorr[[cc]][[1]][ss] <- cov2cor(res[[cc]]$cov[2, , , ss])[1, 2]
 28 |         }
 29 |     }
 30 | }
 31 | 
 32 | muList <- list()
 33 | for (cc in 1:10) {
 34 |     muList[[cc]] <- list()
 35 |     if (all(ordMat[cc, ] == 1:2)) {
 36 |         muList[[cc]][[1]] <- t(res[[cc]]$mu[1, , ])
 37 |         muList[[cc]][[2]] <- t(res[[cc]]$mu[2, , ])
 38 |     } else {
 39 |         muList[[cc]][[2]] <- t(res[[cc]]$mu[1, , ])
 40 |         muList[[cc]][[1]] <- t(res[[cc]]$mu[2, , ])
 41 |     }
 42 | }
 43 | 
 44 | saveRDS(muList, "~/wasp/mixtures/result/muList.rds")
 45 | saveRDS(fullCorr, "~/wasp/mixtures/result/corrList.rds")
 46 | 
 47 | library(mvtnorm)
 48 | xx <- seq(0, 10, length = 500)
 49 | densSampMCMC <- rep(list(matrix(NA, 1000, 500)), 10)
 50 | for (cc in 1:10) {
 51 |     cat("cc: ", cc, "\n")
 52 |     for (ii in 1:1000) {
 53 |         cat("ii: ", ii, "\n")
 54 |         for (gg in seq_along(xx)) {
 55 |             yy <- c(xx[gg], xx[gg])
 56 |             dens1 <- dmvnorm(yy, mean = res[[cc]]$mu[1, , ii], sigma = res[[cc]]$cov[1, , , ii])
 57 |             dens2 <- dmvnorm(yy, mean = res[[cc]]$mu[2, , ii], sigma = res[[cc]]$cov[2, , , ii])
 58 |             densSampMCMC[[cc]][ii, gg] <- res[[cc]]$prob[ii, 1] * dens1 + res[[cc]]$prob[ii, 2] * dens2
 59 |         }
 60 |     }
 61 | }
 62 | 
 63 | saveRDS(densSampMCMC, "/Shared/ssrivastva/wasp/mixtures/result/mcmcDensList.rds")
 64 | 
 65 | ## wasp
 66 | 
 67 | rm(list=ls())
 68 | setwd("/Shared/ssrivastva/wasp/mixtures/result/sub5/samp")
 69 | 
 70 | nsub <- 5
 71 | res <- list()
 72 | for (cc in 1:10) {
 73 |     res[[cc]] <- list()
 74 |     for (kk in 1:nsub) {
 75 |         res[[cc]][[kk]] <- readRDS(paste0("res_cv_", cc,"_nsub_", kk, "_k5.rds"))
 76 |     }
 77 | }
 78 | 
 79 | muList <- list()
 80 | rhoList <- list()
 81 | for (cc in 1:10) {
 82 |     muList[[cc]] <- list()
 83 |     rhoList[[cc]] <- list()
 84 |     for (kk in 1:nsub) {
 85 |         muList[[cc]][[kk]] <- vector("list", 2)
 86 |         names(muList[[cc]][[kk]]) <- c("(1, 2)", "(7, 8)")
 87 |         rhoList[[cc]][[kk]] <- vector("list", 2)
 88 |         names(rhoList[[cc]][[kk]]) <- c("(1, 2)", "(7, 8)")
 89 |     }
 90 | }
 91 | 
 92 | for (cc in 1:10) {
 93 |     for (kk in 1:nsub) {
 94 |         if (all(rowMeans(res[[cc]][[kk]]$mu[1, , ]) < rowMeans(res[[cc]][[kk]]$mu[2, , ]))) {
 95 |             muList[[cc]][[kk]][[1]] <- t(res[[cc]][[kk]]$mu[1, , ])
 96 |             muList[[cc]][[kk]][[2]] <- t(res[[cc]][[kk]]$mu[2, , ])
 97 |             rhoList[[cc]][[kk]][[1]] <- numeric(1000)
 98 |             rhoList[[cc]][[kk]][[2]] <- numeric(1000)
 99 |             for (ss in 1:1000) {
100 |                 rhoList[[cc]][[kk]][[1]][ss] <- cov2cor(res[[cc]][[kk]]$cov[1, , , ss])[1, 2]
101 |                 rhoList[[cc]][[kk]][[2]][ss] <- cov2cor(res[[cc]][[kk]]$cov[2, , , ss])[1, 2]
102 |             }
103 |         } else {
104 |             muList[[cc]][[kk]][[1]] <- t(res[[cc]][[kk]]$mu[2, , ])
105 |             muList[[cc]][[kk]][[2]] <- t(res[[cc]][[kk]]$mu[1, , ])
106 |             rhoList[[cc]][[kk]][[1]] <- numeric(1000)
107 |             rhoList[[cc]][[kk]][[2]] <- numeric(1000)
108 |             for (ss in 1:1000) {
109 |                 rhoList[[cc]][[kk]][[1]][ss] <- cov2cor(res[[cc]][[kk]]$cov[2, , , ss])[1, 2]
110 |                 rhoList[[cc]][[kk]][[2]][ss] <- cov2cor(res[[cc]][[kk]]$cov[1, , , ss])[1, 2]
111 |             }
112 |         }
113 |     }
114 | }
115 | 
116 | saveRDS(rhoList, "~/wasp/mixtures/result/subRhoList5.rds")
117 | saveRDS(muList, "~/wasp/mixtures/result/subMuList5.rds")
118 | 
119 | for (cc in 1:10) {
120 |     for (kk in 1:nsub) {
121 |         write.table(muList[[cc]][[kk]][[1]], file = paste0("csv/samp_cv_", cc, "_nsub_", kk, "_k5_mu1.csv"),
122 |                     sep = ",", row.names = FALSE, col.names = FALSE)
123 |         write.table(muList[[cc]][[kk]][[2]], file = paste0("csv/samp_cv_", cc, "_nsub_", kk, "_k5_mu2.csv"),
124 |                     sep = ",", row.names = FALSE, col.names = FALSE)
125 |     }
126 | }
127 | 
128 | library(mvtnorm)
129 | xx <- seq(0, 10, length = 500)
130 | densSampK5 <- rep(list(rep(list(matrix(NA, 1000, 500)), 5)), 10)
131 | for (cc in 1:10) {
132 |     cat("cc: ", cc, "\n")
133 |     for (kk in 1:nsub) {
134 |         for (ii in 1:1000) {
135 |             cat("ii: ", ii, "\n")
136 |             for (gg in seq_along(xx)) {
137 |                 yy <- c(xx[gg], xx[gg])
138 |                 dens1 <- dmvnorm(yy, mean = res[[cc]][[kk]]$mu[1, , ii], sigma = res[[cc]][[kk]]$cov[1, , , ii])
139 |                 dens2 <- dmvnorm(yy, mean = res[[cc]][[kk]]$mu[2, , ii], sigma = res[[cc]][[kk]]$cov[2, , , ii])
140 |                 densSampK5[[cc]][[kk]][ii, gg] <- res[[cc]][[kk]]$prob[ii, 1] * dens1 + res[[cc]][[kk]]$prob[ii, 2] * dens2
141 |             }
142 |         }
143 |     }
144 | }
145 | 
146 | saveRDS(densSampK5, "/Shared/ssrivastva/wasp/mixtures/result/waspDensList_k5.rds")
147 | 
148 | rm(list=ls())
149 | 
150 | setwd("/Shared/ssrivastva/wasp/mixtures/result/sub10/samp")
151 | nsub <- 10
152 | 
153 | res <- list()
154 | for (cc in 1:10) {
155 |     res[[cc]] <- list()
156 |     for (kk in 1:nsub) {
157 |         res[[cc]][[kk]] <- readRDS(paste0("res_cv_", cc,"_nsub_", kk, "_k10.rds"))
158 |     }
159 | }
160 | 
161 | muList <- list()
162 | rhoList <- list()
163 | for (cc in 1:10) {
164 |     muList[[cc]] <- list()
165 |     rhoList[[cc]] <- list()
166 |     for (kk in 1:nsub) {
167 |         muList[[cc]][[kk]] <- vector("list", 2)
168 |         names(muList[[cc]][[kk]]) <- c("(1, 2)", "(7, 8)")
169 |         rhoList[[cc]][[kk]] <- vector("list", 2)
170 |         names(rhoList[[cc]][[kk]]) <- c("(1, 2)", "(7, 8)")
171 |     }
172 | }
173 | 
174 | for (cc in 1:10) {
175 |     for (kk in 1:nsub) {
176 |         if (all(rowMeans(res[[cc]][[kk]]$mu[1, , ]) < rowMeans(res[[cc]][[kk]]$mu[2, , ]))) {
177 |             muList[[cc]][[kk]][[1]] <- t(res[[cc]][[kk]]$mu[1, , ])
178 |             muList[[cc]][[kk]][[2]] <- t(res[[cc]][[kk]]$mu[2, , ])
179 |             rhoList[[cc]][[kk]][[1]] <- numeric(1000)
180 |             rhoList[[cc]][[kk]][[2]] <- numeric(1000)
181 |             for (ss in 1:1000) {
182 |                 rhoList[[cc]][[kk]][[1]][ss] <- cov2cor(res[[cc]][[kk]]$cov[1, , , ss])[1, 2]
183 |                 rhoList[[cc]][[kk]][[2]][ss] <- cov2cor(res[[cc]][[kk]]$cov[2, , , ss])[1, 2]
184 |             }
185 |         } else {
186 |             muList[[cc]][[kk]][[1]] <- t(res[[cc]][[kk]]$mu[2, , ])
187 |             muList[[cc]][[kk]][[2]] <- t(res[[cc]][[kk]]$mu[1, , ])
188 |             rhoList[[cc]][[kk]][[1]] <- numeric(1000)
189 |             rhoList[[cc]][[kk]][[2]] <- numeric(1000)
190 |             for (ss in 1:1000) {
191 |                 rhoList[[cc]][[kk]][[1]][ss] <- cov2cor(res[[cc]][[kk]]$cov[2, , , ss])[1, 2]
192 |                 rhoList[[cc]][[kk]][[2]][ss] <- cov2cor(res[[cc]][[kk]]$cov[1, , , ss])[1, 2]
193 |             }
194 |         }
195 |     }
196 | }
197 | 
198 | saveRDS(rhoList, "~/wasp/mixtures/result/subRhoList10.rds")
199 | saveRDS(muList, "~/wasp/mixtures/result/subMuList10.rds")
200 | 
201 | for (cc in 1:10) {
202 |     for (kk in 1:nsub) {
203 |         write.table(muList[[cc]][[kk]][[1]], file = paste0("csv/samp_cv_", cc, "_nsub_", kk, "_k10_mu1.csv"),
204 |                     sep = ",", row.names = FALSE, col.names = FALSE)
205 |         write.table(muList[[cc]][[kk]][[2]], file = paste0("csv/samp_cv_", cc, "_nsub_", kk, "_k10_mu2.csv"),
206 |                     sep = ",", row.names = FALSE, col.names = FALSE)
207 |     }
208 | }
209 | 
210 | library(mvtnorm)
211 | xx <- seq(0, 10, length = 500)
212 | densSampK10 <- rep(list(rep(list(matrix(NA, 1000, 500)), 10)), 10)
213 | for (cc in 1:10) {
214 |     cat("cc: ", cc, "\n")
215 |     for (kk in 1:nsub) {
216 |         for (ii in 1:1000) {
217 |             cat("ii: ", ii, "\n")
218 |             for (gg in seq_along(xx)) {
219 |                 yy <- c(xx[gg], xx[gg])
220 |                 dens1 <- dmvnorm(yy, mean = res[[cc]][[kk]]$mu[1, , ii], sigma = res[[cc]][[kk]]$cov[1, , , ii])
221 |                 dens2 <- dmvnorm(yy, mean = res[[cc]][[kk]]$mu[2, , ii], sigma = res[[cc]][[kk]]$cov[2, , , ii])
222 |                 densSampK10[[cc]][[kk]][ii, gg] <- res[[cc]][[kk]]$prob[ii, 1] * dens1 + res[[cc]][[kk]]$prob[ii, 2] * dens2
223 |             }
224 |         }
225 |     }
226 | }
227 | 
228 | saveRDS(densSampK10, "/Shared/ssrivastva/wasp/mixtures/result/waspDensList_k10.rds")
229 | 
230 | ## vb
231 | 
232 | library(mvtnorm)
233 | xx <- seq(0, 10, length = 500)
234 | vbDens <- list()
235 | vbRho1 <- list()
236 | vbRho2 <- list()
237 | for (cc in 1:10) {
238 |     cat("cc: ", cc, "\n")
239 |     dat <- readRDS(paste0("vb/vb_cv_", cc, ".rds"))
240 |     vbRho1[[cc]] <- numeric(1000)
241 |     vbRho2[[cc]] <- numeric(1000)
242 |     vbDens[[cc]] <- matrix(NA, 1000, 500)
243 |     for (ii in 1:1000) {
244 |         cat("ii: ", ii, "\n")
245 |         infMat1 <- rwish(v = dat$v[1], dat$W[[1]])
246 |         sig1 <- chol2inv(chol(infMat1))
247 |         mu1 <- as.numeric(crossprod(chol(sig1) / sqrt(dat$beta[1]), rnorm(2)) + dat$mu[[1]])
248 |         infMat2 <- rwish(v = dat$v[2], dat$W[[2]])
249 |         sig2 <- chol2inv(chol(infMat2))
250 |         mu2 <- as.numeric(crossprod(chol(sig2) / sqrt(dat$beta[2]), rnorm(2)) + dat$mu[[2]])
251 |         vbRho1[[cc]][ii] <- cov2cor(sig2)[1, 2] # labels are flipped in VB results (2 is 1)
252 |         vbRho2[[cc]][ii] <- cov2cor(sig1)[1, 2] # labels are flipped in VB results (1 is 2)
253 |         pp <- as.numeric(rdirichlet(1, dat$alpha))
254 |         for (gg in seq_along(xx)) {
255 |             yy <- c(xx[gg], xx[gg])
256 |             dens1 <- dmvnorm(yy, mean = mu1, sigma = sig1)
257 |             dens2 <- dmvnorm(yy, mean = mu2, sigma = sig2)
258 |             vbDens[[cc]][ii, gg] <- pp[1] * dens1 + pp[2] * dens2
259 |         }
260 |     }
261 | }
262 | 
263 | saveRDS(vbRho1, "/Shared/ssrivastva/wasp/mixtures/result/vbRho1.rds")
264 | saveRDS(vbRho2, "/Shared/ssrivastva/wasp/mixtures/result/vbRho2.rds")
265 | saveRDS(vbDens, "/Shared/ssrivastva/wasp/mixtures/result/vbDensList.rds")
266 | 


--------------------------------------------------------------------------------
/mixtures/code/full_sampler.R:
--------------------------------------------------------------------------------
 1 | mvnMix <- function (dataMat, ncomp = 2, niter = 10000, nburn = 5000, nthin = 5) {
 2 |     library(matrixStats)
 3 |     library(mvtnorm)
 4 |     library(MCMCpack)
 5 | 
 6 |     nobs <- nrow(dataMat)
 7 |     ndim <- ncol(dataMat)
 8 |     alpha <- rep(1 / ncomp, ncomp)
 9 | 
10 |     probs <- as.numeric(rdirichlet(1, alpha))
11 |     zMat <- rmultinom(nobs, 1, probs)
12 |     densMat <- matrix(0.0, nrow = nobs, ncol = ncomp)
13 |     muMat <- matrix(0.0, nrow = ncomp, ncol = ndim)
14 |     sigArr <- aperm(array(diag(1.0, ndim), c(ndim, ndim, ncomp)), perm = c(3, 1, 2))
15 | 
16 |     kap0 <- 0.01; m0 <- rep(0.0, ndim) # mu hyper-pars
17 |     nu0 <- 2; s0 <- 2 * nu0 * diag(1.0, ndim) # sigma hyper-pars
18 | 
19 |     probsSamp <- matrix(0.0, nrow = (niter - nburn) / nthin, ncol = ncomp)
20 |     muMatSamp <- array(0.0, dim = c(ncomp, ndim, (niter - nburn) / nthin))
21 |     sigMatSamp <- array(0.0, dim = c(ncomp, ndim, ndim, (niter - nburn) / nthin))
22 | 
23 |     cts <- 0
24 |     startTime <- proc.time()
25 |     for (ii in 0:niter) {
26 |         idxList <- lapply(split(zMat, row(zMat)),
27 |                           function (x) {
28 |                               which(x == 1)
29 |                           })
30 |         ns <- sapply(idxList, length)
31 |         # sample probs
32 |         probs <- (rdirichlet(1, ns + alpha))
33 | 
34 |         if (ii %% 100 == 0) {cat("gibbs: ", ii, "\n"); cat("ns: ", ns, "\n")}
35 | 
36 |         for (jj in seq_along(ns)) {
37 |             datMean <- colMeans(dataMat[idxList[[jj]], , drop = FALSE])
38 |             ## mean
39 |             muCov <- sigArr[jj, , ] / (kap0 + ns[jj])
40 |             muMean <- (kap0 * m0 + ns[jj] * datMean) / (kap0 + ns[jj])
41 |             muMat[jj, ] <- rmvnorm(1, mean = muMean, sigma = muCov, method = "chol")
42 |             ## cov
43 |             mat1 <- (kap0 * ns[jj] /  (kap0 + ns[jj])) * tcrossprod(datMean - m0, datMean - m0)
44 |             centMat <- dataMat[idxList[[jj]], , drop = FALSE] - matrix(datMean, nrow = length(idxList[[jj]]), ncol = ndim, byrow = TRUE)
45 |             mat2 <- crossprod(centMat, centMat)
46 |             covSclMat <- mat1 + mat2 + s0
47 |             covDf <- ns[jj] + nu0 + 1
48 |             sigArr[jj, , ]<- riwish(covDf, covSclMat)
49 |             densMat[ , jj] <- dmvnorm(dataMat, muMat[jj, ], sigArr[jj, , ], log = TRUE)
50 |         }
51 | 
52 |         lprobs <- densMat + log(matrix(probs, nrow = nobs, ncol = ncol(densMat), byrow = TRUE))
53 |         eprobs <- exp(lprobs - rowMaxs(lprobs)) / rowSums(exp(lprobs - rowMaxs(lprobs)))
54 | 
55 |         for (kk in seq_len(nobs)) {
56 |             ppp <- eprobs[kk, ]
57 |             zMat[ , kk] <- rmultinom(1, 1, ppp)
58 |         }
59 | 
60 |         if ((ii > nburn) && (ii %% nthin == 0)) {
61 |             cts <- cts + 1
62 |             probsSamp[cts, ] <- probs
63 |             muMatSamp[ , , cts] <- muMat
64 |             sigMatSamp[ , , , cts] <- sigArr
65 |         }
66 |     }
67 |     endTime <- proc.time()
68 | 
69 | 
70 |     list(
71 |         'mu' = muMatSamp,
72 |         'cov' = sigMatSamp,
73 |         'prob' = probsSamp,
74 |         'time' = endTime[3] - startTime[3]
75 |     )
76 | }
77 | 


--------------------------------------------------------------------------------
/mixtures/code/recoverSolution.m:
--------------------------------------------------------------------------------
 1 | % FUNCTION: [tOptSol, aOptSol] = recoverSolution(xOptSol, nsubs, N, Ni)
 2 | % PURPOSE:  Recover the original solution.
 3 | %
 4 | function [tOptSol, aOptSol] = recoverSolution(xOptSol, nsubs, N, Ni)
 5 | 
 6 |     % Define this soft-thresholding operator to remove small elements.
 7 |     softThresOper = @(x, t)(sign(x).*max(abs(x) - t, 0));
 8 | 
 9 |     % Recover the solution.
10 |     aOptSol = xOptSol(1:N, 1);
11 |     xRest   = xOptSol(N+1:end);
12 |     for p = 1:nsubs
13 |         tOptSol{p} = reshape( xRest(1:N*Ni(p)), N, Ni(p))/Ni(p);
14 |         xRest      = xRest(N*Ni(p)+1:end);
15 |         tOptSol{p} = softThresOper(tOptSol{p}, 1e-10);
16 |     end
17 | 
18 | end
19 | % @END ...
20 | 


--------------------------------------------------------------------------------
/mixtures/code/simulate_data.R:
--------------------------------------------------------------------------------
  1 | rm(list=ls())
  2 | setwd("/Users/ssrivastva/wasp/mixtures/code/")
  3 | 
  4 | genData <- function (muList = list('1' = c(1, 2), '2' = c(7, 8)),
  5 |                      sigMat = matrix(c(1, 0.5, 0.5, 2), 2, 2),
  6 |                      probs = c(0.3, 0.7),
  7 |                      nobs = 1000) {
  8 |     library(mvtnorm)
  9 |     zs <- rmultinom(nobs, 1, probs)
 10 | 
 11 |     idxList <- lapply(split(zs, row(zs)),
 12 |                       function (x) {
 13 |                           which(x == 1)
 14 |                       })
 15 | 
 16 | 
 17 | 
 18 |     dataMat <- matrix(0.0, nrow = nobs, ncol = 2)
 19 |     clusts <- numeric(nobs)
 20 |     for (ii in seq_along(muList)) {
 21 |         idx <- idxList[[ii]]
 22 |         clusts[idx] <- ii
 23 |         dataMat[idx, ] <- rmvnorm(length(idx), muList[[ii]], sigMat)
 24 |     }
 25 |     rownames(dataMat) <- paste("data", seq_len(nrow(dataMat)), clusts, sep = "_")
 26 | 
 27 |     list(data = dataMat,
 28 |          cluster = clusts,
 29 |          zs = zs
 30 |          )
 31 | }
 32 | 
 33 | #### full data
 34 | 
 35 | set.seed(12345)
 36 | nrep <- 10
 37 | reps <- vector("list", length = nrep)
 38 | nobs <- 1e5
 39 | for (r in seq_len(nrep)) {
 40 |     cat(r, "rep\n")
 41 |     reps[[r]] <- genData(nobs = nobs)
 42 | }
 43 | saveRDS(reps, "../data/mix_100k.rds")
 44 | 
 45 | #### k = 10
 46 | 
 47 | rm(list = ls())
 48 | reps <- readRDS("../data/mix_100k.rds")
 49 | 
 50 | npart <- 10
 51 | nrep <- 10
 52 | nclust <- 3
 53 | 
 54 | parts <- vector("list", length = nrep)
 55 | partsIdx <- vector("list", length = nrep)
 56 | 
 57 | for (ii in 1:nrep) {
 58 |     parts[[ii]] <- vector("list", length = npart)
 59 | }
 60 | 
 61 | set.seed(12345)
 62 | for (r in seq_len(nrep)) {
 63 |     kmns <- kmeans(reps[[r]]$data, nclust)
 64 |     partsIdx[[r]] <- numeric(nrow(reps[[r]]$data))
 65 | 
 66 |     for (cc in 1:nclust) {
 67 |         ccIdx <- which(kmns$cluster == cc)
 68 |         partsIdx[[r]][ccIdx] <- sample(1:npart, length(ccIdx), replace = TRUE)
 69 |     }
 70 | 
 71 |     for (ii in 1:npart) {
 72 |         parts[[r]][[ii]] <- reps[[r]]$data[partsIdx[[r]] == ii, ]
 73 |     }
 74 | }
 75 | 
 76 | saveRDS(parts, "../data/wasp_mix_100k_k10.rds")
 77 | 
 78 | #### k = 5
 79 | 
 80 | rm(list = ls())
 81 | reps <- readRDS("../data/mix_100k.rds")
 82 | 
 83 | npart <- 5
 84 | nrep <- 10
 85 | nclust <- 3
 86 | 
 87 | parts <- vector("list", length = nrep)
 88 | partsIdx <- vector("list", length = nrep)
 89 | 
 90 | for (ii in 1:nrep) {
 91 |     parts[[ii]] <- vector("list", length = npart)
 92 | }
 93 | 
 94 | set.seed(12345)
 95 | for (r in seq_len(nrep)) {
 96 |     kmns <- kmeans(reps[[r]]$data, nclust)
 97 |     partsIdx[[r]] <- numeric(nrow(reps[[r]]$data))
 98 | 
 99 |     for (cc in 1:nclust) {
100 |         ccIdx <- which(kmns$cluster == cc)
101 |         partsIdx[[r]][ccIdx] <- sample(1:npart, length(ccIdx), replace = TRUE)
102 |     }
103 | 
104 |     for (ii in 1:npart) {
105 |         parts[[r]][[ii]] <- reps[[r]]$data[partsIdx[[r]] == ii, ]
106 |     }
107 | }
108 | 
109 | saveRDS(parts, "../data/wasp_mix_100k_k5.rds")
110 | 


--------------------------------------------------------------------------------
/mixtures/code/submit.R:
--------------------------------------------------------------------------------
  1 | cmdArgs <- commandArgs(trailingOnly = TRUE)
  2 | 
  3 | mtd <- as.numeric(cmdArgs[1])
  4 | id <- as.numeric(cmdArgs[2])
  5 | 
  6 | if (mtd == 1) {
  7 |     source("full_sampler.R")
  8 |     cvtrain <- readRDS("../data/mix_100k.rds")
  9 |     train <- cvtrain[[id]]
 10 |     dataMat <- train$data
 11 |     res <- mvnMix(dataMat, ncomp = 2, niter = 10000, nburn = 5000, nthin = 5)
 12 |     fname <- paste0("/Shared/ssrivastva/wasp/mixtures/result/full/res_", id, "_100k.rds")
 13 |     saveRDS(res, fname)
 14 | } else if (mtd == 2) {
 15 |     source("wasp_sampler.R")
 16 |     cvtrain <- readRDS("../data/wasp_mix_100k_k10.rds")
 17 |     reps <- rep(1:10, each = 10)
 18 |     subs <- rep(1:10, times = 10)
 19 | 
 20 |     wids <- cbind(reps, subs)
 21 |     cid <- wids[id, 1]
 22 |     sid <- wids[id, 2]
 23 | 
 24 |     train <- cvtrain[[cid]][[sid]]
 25 |     res <- mvnWaspMix(train, ncomp = 2, nrep = 10, niter = 10000, nburn = 5000, nthin = 5)
 26 |     fname <- paste0("/Shared/ssrivastva/wasp/mixtures/result/sub10/samp/res_cv_", cid, "_nsub_", sid, "_k10.rds")
 27 |     saveRDS(res, fname)
 28 | } else if (mtd == 3) {
 29 |     source("wasp_sampler.R")
 30 |     cvtrain <- readRDS("../data/wasp_mix_100k_k5.rds")
 31 | 
 32 |     reps <- rep(1:10, each = 5)
 33 |     subs <- rep(1:5, times = 10)
 34 | 
 35 |     wids <- cbind(reps, subs)
 36 |     cid <- wids[id, 1]
 37 |     sid <- wids[id, 2]
 38 | 
 39 |     train <- cvtrain[[cid]][[sid]]
 40 | 
 41 |     res <- mvnWaspMix(train, ncomp = 2, nrep = 5, niter = 10000, nburn = 5000, nthin = 5)
 42 |     fname <- paste0("/Shared/ssrivastva/wasp/mixtures/result/sub5/samp/res_cv_", cid, "_nsub_", sid, "_k5.rds")
 43 |     saveRDS(res, fname)
 44 | } else if (mtd == 4) {
 45 |     source("comp_sampler.R")
 46 |     cvtrain <- readRDS("../data/wasp_mix_100k_k10.rds")
 47 |     reps <- rep(1:10, each = 10)
 48 |     subs <- rep(1:10, times = 10)
 49 | 
 50 |     wids <- cbind(reps, subs)
 51 |     cid <- wids[id, 1]
 52 |     sid <- wids[id, 2]
 53 | 
 54 |     train <- cvtrain[[cid]][[sid]]
 55 |     res <- mvnCompMix(train, ncomp = 2, nrep = 10, niter = 10000, nburn = 5000, nthin = 5)
 56 |     fname <- paste0("/Shared/ssrivastva/wasp/mixtures/result/comp/sub10/comp_cv_", cid, "_nsub_", sid, "_k10.rds")
 57 |     saveRDS(res, fname)
 58 | } else if (mtd == 5) {
 59 |     source("comp_sampler.R")
 60 |     cvtrain <- readRDS("../data/wasp_mix_100k_k5.rds")
 61 | 
 62 |     reps <- rep(1:10, each = 5)
 63 |     subs <- rep(1:5, times = 10)
 64 | 
 65 |     wids <- cbind(reps, subs)
 66 |     cid <- wids[id, 1]
 67 |     sid <- wids[id, 2]
 68 | 
 69 |     train <- cvtrain[[cid]][[sid]]
 70 | 
 71 |     res <- mvnCompMix(train, ncomp = 2, nrep = 5, niter = 10000, nburn = 5000, nthin = 5)
 72 |     fname <- paste0("/Shared/ssrivastva/wasp/mixtures/result/comp/sub5/comp_cv_", cid, "_nsub_", sid, "_k5.rds")
 73 |     saveRDS(res, fname)
 74 | } else if (mtd == 6) {
 75 |     source("vb_sampler.R")
 76 | 
 77 |     cvtrain <- readRDS("../data/mix_100k.rds")
 78 |     train <- cvtrain[[id]]
 79 |     dataMat <- train$data
 80 | 
 81 |     res <- mvnVbMix(dataMat, ncomp = 2, niter = 1000)
 82 |     fname <- paste0("/Shared/ssrivastva/wasp/mixtures/result/vb/vb_cv_", id, ".rds")
 83 |     saveRDS(res, fname)
 84 | } else if (mtd == 7) {
 85 |     library(parallelMCMCcombine)
 86 | 
 87 |     cvs <- rep(1:10, each = 2)
 88 |     subs <- rep(1:2, times = 10)
 89 |     cid <- cvs[id]
 90 |     sid <- subs[id]
 91 | 
 92 |     xx <- seq(0, 10, length = 500)
 93 |     if (sid == 1) {
 94 |         subdens <- array(0.0, dim = c(500, 1000, 5))
 95 |         tmp <- numeric(5)
 96 |         for (kk in 1:5) {
 97 |             fname <- paste0("/Shared/ssrivastva/wasp/mixtures/result/comp/sub5/comp_cv_", cid, "_nsub_", kk, "_k5.rds")
 98 |             samp <- readRDS(fname)
 99 |             cat("kk: ", kk, "\n")
100 |             for (ii in 1:1000) {
101 |                 for (gg in seq_along(xx)) {
102 |                     yy <- c(xx[gg], xx[gg])
103 |                     dens1 <- dmvnorm(yy, mean = samp$mu[1, , ii], sigma = samp$cov[1, , , ii])
104 |                     dens2 <- dmvnorm(yy, mean = samp$mu[2, , ii], sigma = samp$cov[2, , , ii])
105 |                     subdens[gg, ii, kk] <- samp$prob[ii, 1] * dens1 + samp$prob[ii, 2] * dens2
106 |                 }
107 |             }
108 |             tmp[kk] <- samp$time
109 |         }
110 |         fname1 <- paste0("/Shared/ssrivastva/wasp/mixtures/result/cons/cons_dens_cv_", cid, "_k5.rds")
111 |         fname2 <- paste0("/Shared/ssrivastva/wasp/mixtures/result/xing/xing_dens_cv_", cid, "_k5.rds")
112 |     } else {
113 |         subdens <- array(0.0, dim = c(500, 1000, 10))
114 |         tmp <- numeric(10)
115 |         for (kk in 1:10) {
116 |             fname <- paste0("/Shared/ssrivastva/wasp/mixtures/result/comp/sub10/comp_cv_", cid, "_nsub_", kk, "_k10.rds")
117 |             samp <- readRDS(fname)
118 |             for (ii in 1:1000) {
119 |                 for (gg in seq_along(xx)) {
120 |                     yy <- c(xx[gg], xx[gg])
121 |                     dens1 <- dmvnorm(yy, mean = samp$mu[1, , ii], sigma = samp$cov[1, , , ii])
122 |                     dens2 <- dmvnorm(yy, mean = samp$mu[2, , ii], sigma = samp$cov[2, , , ii])
123 |                     subdens[gg, ii, kk] <- samp$prob[ii, 1] * dens1 + samp$prob[ii, 2] * dens2
124 |                 }
125 |             }
126 |             tmp[kk] <- samp$time
127 |         }
128 |         fname1 <- paste0("/Shared/ssrivastva/wasp/mixtures/result/cons/cons_dens_cv_", cid, "_k10.rds")
129 |         fname2 <- paste0("/Shared/ssrivastva/wasp/mixtures/result/xing/xing_dens_cv_", cid, "_k10.rds")
130 |     }
131 | 
132 |     strt1 <- proc.time()
133 |     scottDens <- consensusMCindep(subchain = subdens)
134 |     end1 <- proc.time()
135 |     strt2 <- proc.time()
136 |     try1 <- tryCatch(xingDens <- semiparamDPE(subchain = subdens),
137 |                      error = function(e) e)
138 |     if (any(class(try1) == "simpleError")) {
139 |         xingDens <- try1
140 |     }
141 |     end2 <- proc.time()
142 | 
143 |     saveRDS(list("dens" = t(scottDens), time = mean(tmp) + end1[3] - strt1[3]), fname1)
144 |     saveRDS(list("dens" = t(xingDens), time = mean(tmp) + end2[3] - strt2[3]), fname2)
145 | } else (mtd == 8) {
146 |     library(parallelMCMCcombine)
147 | 
148 |     cvs <- rep(1:10, each = 2)
149 |     subs <- rep(1:2, times = 10)
150 |     cid <- cvs[id]
151 |     sid <- subs[id]
152 | 
153 |     if (sid == 1) {
154 |         subdensRho <- array(0.0, dim = c(2, 1000, 5))
155 |         tmp <- numeric(5)
156 |         for (kk in 1:5) {
157 |             fname <- paste0("/Shared/ssrivastva/wasp/mixtures/result/comp/sub5/comp_cv_", cid, "_nsub_", kk, "_k5.rds")
158 |             samp <- readRDS(fname)
159 |             ppp <- colMeans(samp$prob)
160 |             if (ppp[1] < ppp[2]) {
161 |                 for (ii in 1:1000) {
162 |                     subdensRho[1, ii, kk] <- cov2cor(samp$cov[1, , , ii])[1, 2]
163 |                     subdensRho[2, ii, kk] <- cov2cor(samp$cov[2, , , ii])[1, 2]
164 |                 }
165 |             } else {
166 |                 for (ii in 1:1000) {
167 |                     subdensRho[2, ii, kk] <- cov2cor(samp$cov[1, , , ii])[1, 2]
168 |                     subdensRho[1, ii, kk] <- cov2cor(samp$cov[2, , , ii])[1, 2]
169 |                 }
170 |             }
171 |             tmp[kk] <- samp$time
172 |         }
173 |         fname1 <- paste0("/Shared/ssrivastva/wasp/mixtures/result/cons/cons_rho_cv_", cid, "_k5.rds")
174 |         fname2 <- paste0("/Shared/ssrivastva/wasp/mixtures/result/xing/xing_rho_cv_", cid, "_k5.rds")
175 |     } else {
176 |         subdensRho <- array(0.0, dim = c(2, 1000, 10))
177 |         tmp <- numeric(10)
178 |         for (kk in 1:10) {
179 |             fname <- paste0("/Shared/ssrivastva/wasp/mixtures/result/comp/sub10/comp_cv_", cid, "_nsub_", kk, "_k10.rds")
180 |             samp <- readRDS(fname)
181 |             ppp <- colMeans(samp$prob)
182 |             if (ppp[1] < ppp[2]) {
183 |                 for (ii in 1:1000) {
184 |                     subdensRho[1, ii, kk] <- cov2cor(samp$cov[1, , , ii])[1, 2]
185 |                     subdensRho[2, ii, kk] <- cov2cor(samp$cov[2, , , ii])[1, 2]
186 |                 }
187 |             } else {
188 |                 for (ii in 1:1000) {
189 |                     subdensRho[2, ii, kk] <- cov2cor(samp$cov[1, , , ii])[1, 2]
190 |                     subdensRho[1, ii, kk] <- cov2cor(samp$cov[2, , , ii])[1, 2]
191 |                 }
192 |             }
193 |             tmp[kk] <- samp$time
194 |         }
195 |         fname1 <- paste0("/Shared/ssrivastva/wasp/mixtures/result/cons/cons_rho_cv_", cid, "_k10.rds")
196 |         fname2 <- paste0("/Shared/ssrivastva/wasp/mixtures/result/xing/xing_rho_cv_", cid, "_k10.rds")
197 |     }
198 | 
199 |     strt1 <- proc.time()
200 |     scottDens <- consensusMCindep(subchain = subdensRho)
201 |     end1 <- proc.time()
202 |     strt2 <- proc.time()
203 |     try1 <- tryCatch(xingDens <- semiparamDPE(subchain = subdensRho),
204 |                      error = function(e) e)
205 |     if (any(class(try1) == "simpleError")) {
206 |         xingDens <- try1
207 |     }
208 |     end2 <- proc.time()
209 | 
210 |     saveRDS(list("dens" = t(scottDens), time = mean(tmp) + end1[3] - strt1[3]), fname1)
211 |     saveRDS(list("dens" = t(xingDens), time = mean(tmp) + end2[3] - strt2[3]), fname2)
212 | }
213 | 


--------------------------------------------------------------------------------
/mixtures/code/vb_sampler.R:
--------------------------------------------------------------------------------
  1 | # Bishop's approach copied from http://www.cs.ubc.ca/~murphyk/Software/VBEMGMM/index.html
  2 | mvnVbMix <- function (dataMat, ncomp = 2, niter = 1000) {
  3 |     library(matrixStats)
  4 |     library(mvtnorm)
  5 |     library(MCMCpack)
  6 | 
  7 |     nobs <- nrow(dataMat)
  8 |     ndim <- ncol(dataMat)
  9 | 
 10 |     alpha0 <- rep(1 / ncomp, ncomp)
 11 |     beta0 <- 1; m0 <- rep(0.0, ndim) # mu hyper-pars;
 12 |     v0 <- 2; W0inv <- 2 * v0 * diag(1.0, ndim); W0 <- solve(W0inv) # sigma hyper-pars
 13 | 
 14 |     rho <- rdirichlet(nobs, alpha0)
 15 |     r <- rho
 16 |     Nk <- colSums(r)
 17 |     xbar <- vector("list", ncomp)
 18 |     S <- vector("list", ncomp)
 19 |     for (kk in 1:ncomp) {
 20 |         xbar[[kk]] <- colSums(r[ , kk] * dataMat) / Nk[kk]
 21 |         S[[kk]] <- crossprod((dataMat - matrix(xbar[[kk]], nrow = nobs, ncol = ndim, byrow = TRUE)) * r[ , kk], (dataMat - matrix(xbar[[kk]], nrow = nobs, ncol = ndim, byrow = TRUE))) / Nk[kk]
 22 |     }
 23 | 
 24 |     alpha <- alpha0 + Nk
 25 |     betas <- beta0 + Nk
 26 |     v <- v0 + Nk
 27 |     m <- vector("list", ncomp)
 28 |     W <- vector("list", ncomp)
 29 |     for (kk in 1:ncomp) {
 30 |         m[[kk]] <- (beta0 * m0 + Nk[kk] * xbar[[kk]]) / betas[kk]
 31 |         mlt <- (beta0 * Nk[kk]) / (beta0 + Nk[kk])
 32 |         W[[kk]] <- solve(W0inv + Nk[kk] * S[[kk]] + mlt * tcrossprod(xbar[[kk]] - m0))
 33 |     }
 34 | 
 35 |     logLambdaTilde <- numeric(ncomp)
 36 |     E <- matrix(NA, nobs, ncomp)
 37 |     trSW <- numeric(ncomp)
 38 |     xbarWxbar <- numeric(ncomp)
 39 |     mWm <- numeric(ncomp)
 40 |     trW0invW <- numeric(ncomp)
 41 |     L <- numeric(niter + 1)
 42 | 
 43 |     startTime <- proc.time()
 44 |     for (ii in 0:niter) {
 45 |         if (ii %% 10 == 0) cat("iter: ", ii, "\n")
 46 | 
 47 |         psiAlphaHat <- psigamma(sum(alpha))
 48 |         logPiTilde <- psigamma(alpha) - psiAlphaHat
 49 |         const <- ndim * log(2)
 50 |         for (kk in 1:ncomp) {
 51 |             logLambdaTilde[kk] <- sum(psigamma((v[kk] - (1:ndim) + 1) * 0.5)) + const + as.numeric(determinant(W[[kk]])$modulus)
 52 |             for (nn in 1:nobs) {
 53 |                 tmp1 <- dataMat[nn, ] - m[[kk]]
 54 |                 E[nn, kk] <- (ndim / betas[kk]) + v[kk] * crossprod(tmp1, W[[kk]] %*% tmp1)
 55 |             }
 56 |         }
 57 | 
 58 |         logRho <- matrix(logPiTilde, nobs, ncomp, byrow = TRUE) + 0.5 * matrix(logLambdaTilde, nobs, ncomp, byrow = TRUE) - 0.5 * E
 59 |         r <- exp(logRho - rowMaxs(logRho)) / rowSums(exp(logRho - rowMaxs(logRho)))
 60 | 
 61 |         Nk <- colSums(r)
 62 |         for (kk in 1:ncomp) {
 63 |             xbar[[kk]] <- colSums(r[ , kk] * dataMat) / Nk[kk]
 64 |             S[[kk]] <- crossprod((dataMat - matrix(xbar[[kk]], nrow = nobs, ncol = ndim, byrow = TRUE)) * r[ , kk], (dataMat - matrix(xbar[[kk]], nrow = nobs, ncol = ndim, byrow = TRUE))) / Nk[kk]
 65 |         }
 66 | 
 67 |         logCalpha0 <- lgamma(ncomp * alpha0) - ncomp * lgamma(alpha0)
 68 |         logB0 <- (v0 / 2) * as.numeric(determinant(W0inv)$modulus) - (v0 * ndim/2) * log(2) - (ndim*(ndim-1)/4) * log(pi) - sum(lgamma(0.5 * (v0 + 1 - (1:ndim))))
 69 |         logCalpha <- lgamma(sum(alpha)) - sum(lgamma(alpha))
 70 | 
 71 |         H <- 0
 72 |         for (kk in 1:ncomp) {
 73 |             logBk <- -(v[kk]/2)*log(det(W[[kk]])) - (v[kk]*ndim/2)*log(2) - (ndim*(ndim-1)/4)*log(pi) - sum(lgamma(0.5*(v[kk] + 1 - (1:ndim))))
 74 |             H <- H -logBk - 0.5*(v[kk] -ndim-1) * logLambdaTilde[kk] + 0.5*v[kk]*ndim
 75 |             trSW[kk] <- sum(diag(v[kk] * S[[kk]] %*% W[[kk]]))
 76 |             diff1 <- xbar[[kk]] - m[[kk]]
 77 |             xbarWxbar[[kk]] <- sum(diff1 * (W[[kk]] %*% diff1))
 78 |             diff1 <- m[[kk]] - m0
 79 |             mWm[kk] <- sum(diff1 * (W[[kk]] %*% diff1))
 80 |             trW0invW[kk] <- sum(diag(W0inv %*% W[[kk]]))
 81 |         }
 82 | 
 83 |         Lt1 <- 0.5 * sum(Nk * (logLambdaTilde - ndim / betas - trSW - v * xbarWxbar - ndim * log(2*pi)));
 84 |         Lt2 <- sum(Nk * logPiTilde)
 85 |         Lt3 <- sum(logCalpha0) + sum((alpha0 - 1) * sum(logPiTilde))
 86 |         Lt41 <- 0.5 * sum(ndim * log(beta0/(2*pi)) + logLambdaTilde - ndim * beta0 / betas - beta0 * v * mWm)
 87 |         Lt42 <- ncomp * logB0 + 0.5 * (v0-ndim-1) * sum(logLambdaTilde) - 0.5*sum(v * trW0invW)
 88 |         Lt4 <- Lt41 + Lt42
 89 |         Lt5 <- sum(sum(r * log(r)))
 90 |         Lt6 <- sum((alpha - 1) * logPiTilde) + logCalpha
 91 |         Lt7 <- 0.5 * sum(logLambdaTilde + ndim * log(betas / (2*pi))) - 0.5 * ndim * ncomp - H
 92 | 
 93 |         L[ii] <- Lt1 + Lt2 + Lt3 + Lt4 - Lt5 - Lt6 - Lt7
 94 | 
 95 |         alpha <- alpha0 + Nk
 96 |         betas <- beta0 + Nk
 97 |         v <- v0 + Nk
 98 |         m <- vector("list", ncomp)
 99 |         W <- vector("list", ncomp)
100 |         for (kk in 1:ncomp) {
101 |             m[[kk]] <- (beta0 * m0 + Nk[kk] * xbar[[kk]]) / betas[kk]
102 |             mlt <- (beta0 * Nk[kk]) / (beta0 + Nk[kk])
103 |             W[[kk]] <- solve(W0inv + Nk[kk] * S[[kk]] + mlt * tcrossprod(xbar[[kk]] - m0))
104 |         }
105 |     }
106 |     endTime <- proc.time()
107 | 
108 |     list(
109 |         'alpha' = alpha,
110 |         'beta' = betas,
111 |         'mu' = m,
112 |         'W' = W,  ## W is information matrix!
113 |         'v' = v,
114 |         'L' = L,
115 |         'time' = endTime[3] - startTime[3]
116 |     )
117 | }
118 | 


--------------------------------------------------------------------------------
/mixtures/code/wasp_sampler.R:
--------------------------------------------------------------------------------
 1 | mvnWaspMix <- function (dataMat, ncomp = 2, nrep = 10, niter = 10000, nburn = 5000, nthin = 5) {
 2 |     library(matrixStats)
 3 |     library(mvtnorm)
 4 |     library(MCMCpack)
 5 | 
 6 |     nobs <- nrow(dataMat)
 7 |     ndim <- ncol(dataMat)
 8 |     alpha <- rep(1 / ncomp, ncomp)
 9 | 
10 |     probs <- as.numeric(rdirichlet(1, alpha))
11 |     zMat <- rmultinom(nobs, 1, probs)
12 |     densMat <- matrix(0.0, nrow = nobs, ncol = ncomp)
13 |     muMat <- matrix(0.0, nrow = ncomp, ncol = ndim)
14 |     sigArr <- aperm(array(diag(1.0, ndim), c(ndim, ndim, ncomp)), perm = c(3, 1, 2))
15 | 
16 |     kap0 <- 0.01; m0 <- rep(0.0, ndim) # mu hyper-pars
17 |     nu0 <- 2; s0 <- 2 * nu0 * diag(1.0, ndim) # sigma hyper-pars
18 | 
19 |     probsSamp <- matrix(0.0, nrow = (niter - nburn) / nthin, ncol = ncomp)
20 |     muMatSamp <- array(0.0, dim = c(ncomp, ndim, (niter - nburn) / nthin))
21 |     sigMatSamp <- array(0.0, dim = c(ncomp, ndim, ndim, (niter - nburn) / nthin))
22 | 
23 |     cts <- 0
24 |     startTime <- proc.time()
25 |     for (ii in 0:niter) {
26 |         idxList <- lapply(split(zMat, row(zMat)),
27 |                           function (x) {
28 |                               which(x == 1)
29 |                           })
30 |         ns <- sapply(idxList, length)
31 |         # sample probs
32 |         probs <- (rdirichlet(1, ns * nrep + alpha))
33 | 
34 |         if (ii %% 100 == 0) {cat("gibbs: ", ii, "\n"); cat("ns: ", ns, "\n")}
35 | 
36 |         for (jj in seq_along(ns)) {
37 |             datMean <- colMeans(dataMat[idxList[[jj]], , drop = FALSE])
38 |             ## mean
39 |             muCov <- sigArr[jj, , ] / (kap0 + ns[jj] * nrep)
40 |             muMean <- (kap0 * m0 + ns[jj] * nrep * datMean) / (kap0 + ns[jj] * nrep)
41 |             muMat[jj, ] <- rmvnorm(1, mean = muMean, sigma = muCov, method = "chol")
42 |             ## cov
43 |             mat1 <- (kap0 * ns[jj] * nrep /  (kap0 + ns[jj] * nrep)) * tcrossprod(datMean - m0, datMean - m0)
44 |             centMat <- dataMat[idxList[[jj]], , drop = FALSE] - matrix(datMean, nrow = length(idxList[[jj]]), ncol = ndim, byrow = TRUE)
45 |             mat2 <- crossprod(centMat, centMat) * nrep
46 |             covSclMat <- mat1 + mat2 + s0
47 |             covDf <- ns[jj] * nrep + nu0 + 1
48 |             sigArr[jj, , ]<- riwish(covDf, covSclMat)
49 |             densMat[ , jj] <- dmvnorm(dataMat, muMat[jj, ], sigArr[jj, , ], log = TRUE)
50 |         }
51 | 
52 |         lprobs <- densMat + log(matrix(probs, nrow = nobs, ncol = ncol(densMat), byrow = TRUE))
53 |         eprobs <- exp(lprobs - rowMaxs(lprobs)) / rowSums(exp(lprobs - rowMaxs(lprobs)))
54 | 
55 |         for (kk in seq_len(nobs)) {
56 |             ppp <- eprobs[kk, ]
57 |             zMat[ , kk] <- rmultinom(1, 1, ppp)
58 |         }
59 | 
60 |         if ((ii > nburn) && (ii %% nthin == 0)) {
61 |             cts <- cts + 1
62 |             probsSamp[cts, ] <- probs
63 |             muMatSamp[ , , cts] <- muMat
64 |             sigMatSamp[ , , , cts] <- sigArr
65 |         }
66 |     }
67 |     endTime <- proc.time()
68 | 
69 | 
70 |     list(
71 |         'mu' = muMatSamp,
72 |         'cov' = sigMatSamp,
73 |         'prob' = probsSamp,
74 |         'time' = endTime[3] - startTime[3]
75 |     )
76 | }
77 | 


--------------------------------------------------------------------------------
/mixtures/data/mix_100k.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/data/mix_100k.rds


--------------------------------------------------------------------------------
/mixtures/data/wasp_mix_100k_k10.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/data/wasp_mix_100k_k10.rds


--------------------------------------------------------------------------------
/mixtures/data/wasp_mix_100k_k5.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/data/wasp_mix_100k_k5.rds


--------------------------------------------------------------------------------
/mixtures/qsub/calcWasp10_1.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N wasp10_1
 3 | #$ -l mf=16G
 4 | #$ -pe smp 2
 5 | #$ -l h_rt=320:00:00
 6 | #$ -l s_rt=320:00:00
 7 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 8 | #$ -m a
 9 | #$ -M sanvesh-srivastava@uiowa.edu
10 | #$ -t 1-10
11 | #$ -V
12 | #$ -e /Users/ssrivastva/err/
13 | #$ -o /Users/ssrivastva/out/
14 | 
15 | module load gurobi/6.5.1
16 | 
17 | module load matlab/R2015b
18 | 
19 | matlab -nojvm -nodisplay -singleCompThread -r "calc_wasp10($SGE_TASK_ID, 10, 1)"
20 | 


--------------------------------------------------------------------------------
/mixtures/qsub/calcWasp10_2.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N wasp10_2
 3 | #$ -l mf=16G
 4 | #$ -pe smp 2
 5 | #$ -l h_rt=320:00:00
 6 | #$ -l s_rt=320:00:00
 7 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 8 | #$ -m a
 9 | #$ -M sanvesh-srivastava@uiowa.edu
10 | #$ -t 1-10
11 | #$ -V
12 | #$ -e /Users/ssrivastva/err/
13 | #$ -o /Users/ssrivastva/out/
14 | 
15 | module load gurobi/6.5.1
16 | 
17 | module load matlab/R2015b
18 | 
19 | matlab -nojvm -nodisplay -singleCompThread -r "calc_wasp10($SGE_TASK_ID, 10, 2)"
20 | 


--------------------------------------------------------------------------------
/mixtures/qsub/calcWasp5_1.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N wasp5_1
 3 | #$ -l mf=16G
 4 | #$ -pe smp 2
 5 | #$ -l h_rt=320:00:00
 6 | #$ -l s_rt=320:00:00
 7 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 8 | #$ -m a
 9 | #$ -M sanvesh-srivastava@uiowa.edu
10 | #$ -t 1-10
11 | #$ -V
12 | #$ -e /Users/ssrivastva/err/
13 | #$ -o /Users/ssrivastva/out/
14 | 
15 | module load gurobi/6.5.1
16 | 
17 | module load matlab/R2015b
18 | 
19 | matlab -nojvm -nodisplay -singleCompThread -r "calc_wasp5($SGE_TASK_ID, 5, 1)"
20 | 


--------------------------------------------------------------------------------
/mixtures/qsub/calcWasp5_2.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N wasp5_2
 3 | #$ -l mf=16G
 4 | #$ -pe smp 2
 5 | #$ -l h_rt=320:00:00
 6 | #$ -l s_rt=320:00:00
 7 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 8 | #$ -m a
 9 | #$ -M sanvesh-srivastava@uiowa.edu
10 | #$ -t 1-10
11 | #$ -V
12 | #$ -e /Users/ssrivastva/err/
13 | #$ -o /Users/ssrivastva/out/
14 | 
15 | module load gurobi/6.5.1
16 | 
17 | module load matlab/R2015b
18 | 
19 | matlab -nojvm -nodisplay -singleCompThread -r "calc_wasp5($SGE_TASK_ID, 5, 2)"
20 | 


--------------------------------------------------------------------------------
/mixtures/qsub/comp10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp10
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-100
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 4 $SGE_TASK_ID" submit.R comp/10_$SGE_TASK_ID.rout
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/mixtures/qsub/comp5.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N sub5
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 24
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 5 $SGE_TASK_ID" submit.R comp/5_$SGE_TASK_ID.rout
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/mixtures/qsub/compDens.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N compDens
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-20
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 7 $SGE_TASK_ID" submit.R comp/dens_$SGE_TASK_ID.rout
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/mixtures/qsub/compRho.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N compRho
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-20
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 8 $SGE_TASK_ID" submit.R comp/rho_$SGE_TASK_ID.rout
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/mixtures/qsub/full.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N mix_full
 3 | #$ -l mf=64G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-10
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 1 $SGE_TASK_ID" submit.R full/full_$SGE_TASK_ID.rout
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/mixtures/qsub/sub10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N sub10
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 11-100
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 2 $SGE_TASK_ID" submit.R wasp/10_$SGE_TASK_ID.rout
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/mixtures/qsub/sub5.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N sub5
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 6-50
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 3 $SGE_TASK_ID" submit.R wasp/5_$SGE_TASK_ID.rout
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/mixtures/qsub/vb.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N mix_vb
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/mixtures/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-10
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 6 $SGE_TASK_ID" submit.R vb/vb_$SGE_TASK_ID.rout
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/mixtures/result/accMu.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/result/accMu.rds


--------------------------------------------------------------------------------
/mixtures/result/accRho.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/result/accRho.rds


--------------------------------------------------------------------------------
/mixtures/result/corrList.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/result/corrList.rds


--------------------------------------------------------------------------------
/mixtures/result/densList.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/result/densList.rds


--------------------------------------------------------------------------------
/mixtures/result/muList.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/result/muList.rds


--------------------------------------------------------------------------------
/mixtures/result/subMuList.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/result/subMuList.rds


--------------------------------------------------------------------------------
/mixtures/result/subMuList10.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/result/subMuList10.rds


--------------------------------------------------------------------------------
/mixtures/result/subMuList5.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/result/subMuList5.rds


--------------------------------------------------------------------------------
/mixtures/result/subRhoList.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/result/subRhoList.rds


--------------------------------------------------------------------------------
/mixtures/result/subRhoList10.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/result/subRhoList10.rds


--------------------------------------------------------------------------------
/mixtures/result/subRhoList5.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/mixtures/result/subRhoList5.rds


--------------------------------------------------------------------------------
/ml/code/calc_wasp_cov_2d_k10.m:
--------------------------------------------------------------------------------
  1 | % dirp = '/Shared/ssrivastva/wasp/ml/result/wasp/'
  2 | function calc_wasp_cov_2d_k10(dd, nsub, dirp)
  3 | 
  4 | addpath('/opt/gurobi/6.5.1/linux64/matlab/');
  5 | 
  6 | grdsize = 90;
  7 | ndim = 4;
  8 | rtime = zeros(ndim, 1);
  9 | 
 10 | % calculate the pair-wise sq. euclidean distance between the atoms of subset
 11 | % posteriors and WASP atoms
 12 | for dims = 1:ndim
 13 |     covs = {};
 14 |     for jj = 1:nsub
 15 |         covs{jj} = csvread(strcat(dirp, 'samp/joint/cov_cv_', num2str(dd), ...
 16 |                                    '_nsub_', num2str(jj), '_d_', num2str(dims), '_k10.csv'));         
 17 |     end
 18 |     
 19 |     subsetPost = {};    
 20 |     for jj = 1:nsub
 21 |         subsetPost{jj} = covs{jj}(randi([1 1000], 100, 1), :);
 22 |     end         
 23 | 
 24 |     lbd1 = min(cellfun(@(x) x(1), cellfun(@(x) min(x), subsetPost,'UniformOutput', false)));
 25 |     lbd2 = min(cellfun(@(x) x(2), cellfun(@(x) min(x), subsetPost,'UniformOutput', false)));   
 26 |     ubd1 = max(cellfun(@(x) x(1), cellfun(@(x) max(x), subsetPost,'UniformOutput', false)));
 27 |     ubd2 = max(cellfun(@(x) x(2), cellfun(@(x) max(x), subsetPost,'UniformOutput', false)));   
 28 | 
 29 |     [opostx, oposty] = meshgrid(linspace(lbd1, ubd1, grdsize), linspace(lbd2, ubd2, grdsize));
 30 |     overallPost = [opostx(:) oposty(:)]; % 
 31 |     
 32 |     distMatCell = {};
 33 |     
 34 |     m00 = diag(overallPost * overallPost');
 35 |     for ii = 1:nsub
 36 |         mm = diag(subsetPost{ii} * subsetPost{ii}');    
 37 |         mm1 = overallPost * subsetPost{ii}'; 
 38 |         distMatCell{ii} = bsxfun(@plus, bsxfun(@plus, -2 * mm1, mm'), m00);    
 39 |     end
 40 |     
 41 |     % constants
 42 |     K  = nsub;
 43 |     Ni = cell2mat(cellfun(@(x) size(x, 2), distMatCell, 'UniformOutput', false));
 44 |     N  = size(overallPost, 1);
 45 |     nx = N * (N+1);
 46 |     mx = K * N + N + 1;
 47 |     In = eye(N);
 48 |     En = ones(1, N);
 49 | 
 50 |     % Generate matrix A0.
 51 |     A0  = sparse([]);
 52 |     for p = 1:K
 53 |         cc = (1:N)';                  % terribly fast version of 
 54 |         idx = cc(:, ones(Ni(p), 1));  % repmat(In, 1, Ni(p)) / Ni(p)
 55 |         Rp  = In(:, idx(:)) / Ni(p);  % in 3 steps
 56 |         A0  = blkdiag(A0, Rp); 
 57 |     end
 58 |     cc = (1:N)';                  % terribly fast version of 
 59 |     idx = cc(:, ones(K, 1));      % repmat(-In, K, 1) 
 60 |     A00  = -In(idx(:), :);        % in 3 steps
 61 |     
 62 |     A0 = sparse([A00, A0]);
 63 |     b0 = zeros(size(A0, 1), 1);
 64 |     disp('done generating A ...');        
 65 |     
 66 |     % Generate matrix B from simplex constraints.
 67 |     B = sparse([]);
 68 |     for p = 0:(sum(Ni))
 69 |         B = blkdiag(B, En);
 70 |     end
 71 |     disp('done generating B ...');        
 72 |     
 73 |     % The hold matrix C.
 74 |     A = sparse([A0; B]);
 75 | 
 76 |     % Generate the right hand size vector b.
 77 |     b = sparse([zeros(K * N, 1); ones(sum(Ni) + 1, 1)]);
 78 |     
 79 |     % Generate the cost vector
 80 |     costCell = cellfun(@(x) x(:) / size(x, 2), distMatCell, 'UniformOutput', false);
 81 |     costVec = [zeros(size(overallPost, 1), 1); cell2mat(costCell(:))];
 82 |     
 83 |     c = sparse(costVec);
 84 |     
 85 |     tic;
 86 |     lpsol = callLpSolver('gurobi', A, b, c, 10000, 1e-10);
 87 |     rtime(dims, 1) = toc;
 88 |     
 89 |     [tmats, avec] = recoverSolution(lpsol, K, N, Ni);
 90 | 
 91 |     save(strcat(dirp, 'joint/joint_cov_cv_',  num2str(dd), '_d_', num2str(dims), '_k10.mat'), 'tmats','avec', 'subsetPost', 'overallPost');    
 92 |     summ = [overallPost avec];
 93 |     csvwrite(strcat(dirp, 'joint/wasp_cov_cv_',  num2str(dd), '_d_', num2str(dims), '_k10.csv'), summ);
 94 |     
 95 |     disp(['done with dim ' num2str(dims)]);    
 96 | end    
 97 | 
 98 | csvwrite(strcat(dirp, 'joint/cov_2d_times_cv_', num2str(dd),  '_k10.csv'), rtime);    
 99 | 
100 | quit
101 | 
102 | 


--------------------------------------------------------------------------------
/ml/code/callLpSolver.m:
--------------------------------------------------------------------------------
  1 | % FUNCTION:  [xsol, output] = callLpSolver(solver, Amat, bvec, ...
  2 | %                                          cvec, maxiters, tolx)
  3 | % PURPOSE: Call the Lp solver to solve the LP problem.
  4 | % 
  5 | %
  6 | function [xsol, output] = callLpSolver(solver, Amat, bvec, cvec, maxiters, tolx)
  7 | 
  8 | % Check the inputs.
  9 | if nargin < 3, error('At least three inputs are required!'); end
 10 | if nargin < 5, tolx = 1e-4; end
 11 | if nargin < 4, maxiters = 1000; end
 12 | if isempty(tolx), tolx = 1e-4; end
 13 | if isempty(maxiters), maxiters = 1000; end
 14 | if size(Amat, 1) ~= length(bvec), error('Inputs are inconsistent!'); end
 15 | if size(Amat, 2) ~= length(cvec), error('Inputs are inconsistent!'); end
 16 | xsol = []; output = [];
 17 | nx   = length(cvec);
 18 | 
 19 | %% Call the SeDuMi solver.
 20 | if strcmpi(solver, 'sedumi')
 21 |     pars.maxiter = maxiters;
 22 |     pars.eps     = tolx;
 23 |     Cones.l      = length(cvec);
 24 |     time3        = tic;
 25 |     [x_sedumi, y_sedumi, info] = sedumi(Amat, bvec, cvec, Cones, pars);
 26 |     x_sedumi = full(x_sedumi);
 27 |     xsol         = x_sedumi;
 28 |     output.time3        = toc(time3);
 29 |     output.info  = info;
 30 |     output.dual_sol = y_sedumi;
 31 | end
 32 | 
 33 | %% Call the Matlab LINPROG solver.
 34 | if strcmpi(solver, 'linprog')
 35 |     time5       = tic;
 36 |     opts        = optimset('Algorithm', 'interior-point', ...
 37 |                            'Display', 'iter', ...
 38 |                            'MaxIter', maxiters, 'TolX', tolx, 'TolFun', tolx);
 39 |     [xsol, fx2] = linprog(cvec, [], [], Amat, bvec, zeros(nx, 1), [], [], opts);
 40 |     output.time = toc(time5);
 41 |     output.fx   = fx2;
 42 | end
 43 | 
 44 | %% Call the SDPT3 solver.
 45 | if strcmpi(solver, 'sdpt3')
 46 |     Asdpt3 = spconvert(Amat);
 47 |     blk{1, 1} = 'l';
 48 |     blk{1, 2} = ones(1, nx);
 49 |     sdpt3opt  = struct('gaptol', tolx, 'maxit', maxiters);
 50 |     time4     = tic;
 51 |     %[X0, y0, Z0] = infeaspt(blk, Asdpt3, cvec, bvec); 
 52 |     %[fx_sdpt3, x_sdpt3, y_sdpt3, Z_sdpt3] = sqlp(blk, Asdpt3, cvec, bvec, sdpt3opt, X0, y0, Z0);
 53 |     [fx_sdpt3, x_sdpt3, y_sdpt3, Z_sdpt3] = sqlp(blk, Asdpt3, cvec, bvec, sdpt3opt);
 54 |     output.time4     = toc(time4);
 55 |     x_sdpt3   = x_sdpt3{:};
 56 |     xsol      = x_sdpt3;
 57 |     output.dual_sol = y_sdpt3;
 58 |     output.slacks   = Z_sdpt3;
 59 |     output.fx       = fx_sdpt3;
 60 | end    
 61 | 
 62 | %% Call Our Decopt solver.
 63 | if strcmpi(solver, 'decopt')
 64 |     
 65 |     % Set the parameters.
 66 |     param.MaxIters      = maxiters;
 67 |     param.Verbosity     = 2;
 68 |     param.RelTolX       = tolx;
 69 |     param.saveHistMode  = 0;
 70 |     param.Algorithm     = 3;
 71 |     param.InnerMaxIters = 20;
 72 |     param.adaptStepSize = 0;
 73 | 
 74 |     % Call the solver.
 75 |     proxLpPos    = @(x, gamma)( min( max(0, x - gamma*cvec), 1.0) );
 76 | 
 77 |     % User-define proximal-functions.
 78 |     proxOpers{1} = @(x, gamma, varargin)(proxLpPos(x, gamma));
 79 |     proxOpers{2} = @(x, gamma, varargin)(projL2norm(x, 1e-12));
 80 | 
 81 |     proxOpers{3} = @(x, varargin)( cvec'*x );
 82 |     proxOpers{4} = @(x, varargin)(0);
 83 |     
 84 |     % Generate an initial point.
 85 |     x0       = zeros(nx, 1);
 86 |     
 87 |     %% Call the solver with user-define prox-functions.
 88 |     time1 = tic;
 89 |     [xsol, out] = decoptSolver('UserDef', Amat, bvec, param, 'x0', x0, 'Prox', proxOpers, 'GammaFactor', 1.1);
 90 |     output.time = toc(time1);
 91 |     output.info = out;
 92 | 
 93 | end
 94 | 
 95 | %% Call the Gurobi solver.
 96 | if strcmpi(solver, 'gurobi')
 97 |     
 98 |     % Generate the LP model.
 99 |     time_g            = tic;
100 |     model.A           = Amat;
101 |     model.obj         = full(cvec);
102 |     model.rhs         = full(bvec);
103 |     model.modelsense  = 'min';
104 |     model.sense       = '=';
105 |     
106 |     % Define the parameters.
107 |     param.method      = 2;
108 |     param.Presolve    = 2;
109 |     param.Crossover   = 0;
110 |     param.outputflag  = 1;
111 |     
112 |     % Call the solver.
113 |     result            = gurobi(model, param);
114 |     
115 |     % Obtain the final results.
116 |     output.result     = result;
117 |     output.time       = toc(time_g);
118 |     xsol              = result.x;
119 |     
120 | end
121 | 
122 | 


--------------------------------------------------------------------------------
/ml/code/comp_lme.stan:
--------------------------------------------------------------------------------
 1 | functions {
 2 |   // prior^{1/k}; k = #subsets
 3 |   real approx_normal_prior_log(real beta1, real mn1, real sigma1, real nsub) {
 4 |     return normal_log(beta1, mn1, sigma1) / nsub;
 5 |   }
 6 |   // prior^{1/k}; k = #subsets
 7 |   real approx_cauchy_log(real beta1, real mn1, real sigma1, real nsub) {
 8 |     return cauchy_log(beta1, mn1, sigma1) / nsub;
 9 |   }  
10 |   // prior^{1/k}; k = #subsets
11 |   real approx_lkj_corr_log(matrix corrMat, real eta, real nsub) {
12 |     return lkj_corr_log(corrMat, eta) / nsub;
13 |   }
14 |   // takes care of the likelihood
15 |   real approx_lik_log (vector y, vector mu, real stdErr, matrix covMat, matrix zmat, real nsub) {
16 |     matrix[num_elements(y), num_elements(y)] covY; 
17 |     matrix[num_elements(y), num_elements(y)] L;
18 |     
19 |     covY = quad_form(covMat, zmat');
20 |     for (kk in 1:(rows(zmat)))
21 |       covY[kk, kk] = covY[kk, kk] + stdErr;
22 | 
23 |     L = cholesky_decompose(covY);
24 |     
25 |     return (nsub * multi_normal_cholesky_log(y, mu, L));
26 |   }   
27 | }
28 | 
29 | data {
30 |   int<lower=0> nobs;                          // total no. of obs
31 |   int<lower=0> nfixef;                        // total no. of fixed effects
32 |   int<lower=0> nranef;                        // total no. of random effects
33 |   int<lower=0> ngroup;                        // total no. of clusters
34 |   real<lower=0> nsub;                         // total no. of resamples
35 |   matrix[nobs, nfixef] xmat;                  // fixed effects design matrix
36 |   matrix[nobs, nranef] zmat;                  // random effects design matrix
37 |   int group[nobs];                            // cluster ids
38 |   vector[nobs] yvec;                          // observations
39 |   int pos1[ngroup];                           // database indices ...
40 |   int pos2[ngroup];                           // to handle ragged arrays  
41 | }
42 | 
43 | transformed data {
44 |   // both fix eff. and rand. eff. are apriori centered at 0
45 |   vector[nranef] meanRanef;
46 |   vector[nfixef] meanFixef;
47 |   
48 |   meanRanef = rep_vector(0.0, nranef);
49 |   meanFixef = rep_vector(0.0, nfixef);
50 | }
51 | 
52 | parameters {
53 |   corr_matrix[nranef] corrRanef;       // correlation matrix of rand. eff.
54 |   vector<lower=0>[nranef] sclRanef;    // scale matrix of rand. eff.
55 |   
56 |   vector[nfixef] fixef;                // population level fix. eff.
57 |   real<lower=0> stdErrFixef;           // std err. in pop. level. fix. eff.
58 |   
59 |   real<lower=0> stdErr;                // population level std. err.
60 | }
61 | 
62 | transformed parameters {
63 |   matrix[nranef, nranef] covRanef;
64 |   vector[nobs] mu;
65 | 
66 |   mu = xmat * fixef;
67 | 
68 |   covRanef = quad_form_diag(corrRanef, sclRanef);  
69 | }
70 | 
71 | model {  
72 | 
73 |   stdErr ~ approx_cauchy(0, 2.5, nsub);
74 | 
75 |   // prior for fix. eff.
76 |   stdErrFixef ~ cauchy(0, 2.5);  
77 |   // beta ~ prior^{1/k}
78 |   for (pp in 1:nfixef) {
79 |     fixef[pp] ~ approx_normal_prior(0.0, stdErrFixef, nsub);
80 |   }
81 |   
82 |   // sample rand. eff.
83 |   for (ii in 1:nranef) {
84 |     sclRanef[ii] ~ approx_cauchy(0.0, 2.5, nsub);
85 |   }  
86 |   corrRanef ~ approx_lkj_corr(2.0, nsub);
87 |   
88 |   
89 |   // sample data
90 |   for (ii in 1:ngroup) {
91 |     segment(yvec, pos1[ii], pos2[ii] - pos1[ii] + 1) ~ approx_lik(segment(mu, pos1[ii], pos2[ii] - pos1[ii] + 1), 
92 |                                                                    stdErr, covRanef,
93 |                                                                    block(zmat, pos1[ii], 1, pos2[ii] - pos1[ii] + 1, nranef),
94 |                                                                    1);
95 |   }  
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/ml/code/comp_sampler.R:
--------------------------------------------------------------------------------
 1 | sampleFromCompMixMdl <- function (yvec, xmat, zmat, group, nrep, niter, nburn, nthin, id) {
 2 |     library(inline)
 3 |     library(Rcpp)
 4 |     library(rstan)
 5 | 
 6 |     grpLbl <- sort(unique(group))
 7 |     ngroup <- length(grpLbl)
 8 | 
 9 |     ranefList <- list()
10 |     grpIdx <- list()
11 |     for (ii in 1:ngroup) {
12 |       grpIdx[[ii]] <- which(group == grpLbl[ii])
13 |       ranefList[[ii]] <- zmat[grpIdx[[ii]], , drop = FALSE]
14 |     }
15 |     ranefMat <- do.call(rbind, ranefList)
16 |     fixefMat <- xmat[unlist(grpIdx), ]
17 | 
18 |     pos2 <- cumsum(sapply(grpIdx, length))
19 |     pos1 <- c(1, pos2[-ngroup] + 1)
20 | 
21 |     ordY <- yvec[unlist(grpIdx)]
22 |     ordGrp <- group[unlist(grpIdx)]
23 | 
24 |     idx <- seq_along(unlist(grpIdx))
25 |     simList = list(nobs = length(yvec[idx]),
26 |       nfixef = ncol(xmat),
27 |       nranef = ncol(zmat),
28 |       ngroup = length(unique(ordGrp[idx])),
29 |       nsub = nrep,
30 |       xmat = fixefMat[idx, ],
31 |       zmat = ranefMat[idx, ],
32 |       group = ordGrp[idx],
33 |       yvec = ordY[idx],
34 |       pos1 = pos1,
35 |       pos2 = pos2)
36 | 
37 |     seeds <- (1:2000) * as.numeric(gsub(":", "", substr(Sys.time(), 12, 19)))
38 | 
39 |     startTime <- proc.time()
40 |     mdl <- stan(file = "comp_lme.stan", data = simList, iter = niter, warmup = nburn, chains = 1, thin = nthin,
41 |                 seed = seeds[id],
42 |                 init = list(list(betas = rep(0, ncol(xmat)),
43 |                                  corrRanef = diag(ncol(zmat)),
44 |                                  sclRanef = rep(2, ncol(zmat))
45 |                                  )))
46 |     endTime <- proc.time()
47 | 
48 |     lst <- mdl@sim$samples[[1]]
49 |     bs <- grep("fixef|covRanef", names(lst))
50 |     sampdf <- do.call(cbind, lst[bs])
51 | 
52 |     list(samples = sampdf[(nrow(sampdf) - (niter - nburn) / nthin + 1):nrow(sampdf), ], time = endTime - startTime)
53 | }
54 | 


--------------------------------------------------------------------------------
/ml/code/create_samples.R:
--------------------------------------------------------------------------------
 1 | rm(list=ls())
 2 | setwd("/Shared/ssrivastva/wasp/ml/result/")
 3 | 
 4 | meanMap <- 1:6
 5 | covMap <- c(7:12, 14:18, 21:24, 28:30, 35:36, 42)
 6 | 
 7 | meanList <- list()
 8 | covList <- list()
 9 | for (cc in 1:10) {
10 |     meanList[[cc]] <- list()
11 |     covList[[cc]] <- list()
12 |     for (kk in 1:10) {
13 |         meanList[[cc]][[kk]] <- readRDS(paste0("wasp/samp/wasp_cv_", cc, "_sub_", kk, "_k10.rds"))$samples[ , meanMap]
14 |         covList[[cc]][[kk]] <- readRDS(paste0("wasp/samp/wasp_cv_", cc, "_sub_", kk, "_k10.rds"))$samples[ , covMap]
15 |     }
16 | }
17 | 
18 | cov2d <- cbind(rep(2, 4), 3:6)
19 | 
20 | for (cc in 1:10) {
21 |     for (kk in 1:10) {
22 |         for (ddd in 1:4) {
23 |             write.table(covList[[cc]][[kk]][ , c(cov2d[ddd, 1], cov2d[ddd, 2])],
24 |                         file = paste0("wasp/samp/joint/cov_cv_", cc, "_nsub_", kk, "_d_", ddd,"_k10.csv"),
25 |                         sep = ",", row.names = FALSE, col.names = FALSE)
26 |         }
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/ml/code/full_lme.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   // data for model fitting
 3 |   int<lower=0> nobs;                          // total no. of individuals
 4 |   int<lower=0> nfixef;                        // total no. of fixed effects
 5 |   int<lower=0> nranef;                        // total no. of random effects
 6 |   int<lower=0> ngroup;                        // total no. of clusters
 7 |   matrix[nobs, nfixef] xmat;                  // fixed effects design matrix
 8 |   matrix[nobs, nranef] zmat;                  // random effects design matrix
 9 |   int group[nobs];                            // cluster ids
10 |   real yvec[nobs];                            // observations
11 | }
12 | 
13 | transformed data {
14 |   // both fix eff. and rand. eff. are apriori centered at 0
15 |   vector[nranef] meanRanef;
16 |   vector[nfixef] meanFixef;
17 |   
18 |   meanRanef <- rep_vector(0.0, nranef);
19 |   meanFixef <- rep_vector(0.0, nfixef);
20 | }
21 | 
22 | parameters {
23 |   corr_matrix[nranef] corrRanef;       // correlation matrix of rand. eff.
24 |   vector<lower=0>[nranef] sclRanef;    // scale matrix of rand. eff.
25 |   vector[nranef] ranef[ngroup];        // population level rand. eff.
26 | 
27 |   vector[nfixef] fixef;                // population level fix. eff.
28 |   real<lower=0> stdErrFixef;           // std err. in pop. level. fix. eff.
29 | 
30 |   real<lower=0> stdErr;                // population level std. err.
31 | }
32 | 
33 | transformed parameters {
34 |   real yHat[nobs];
35 |   matrix[nranef, nranef] covRanef;
36 | 
37 |   for (ii in 1:nobs) {
38 |     yHat[ii] <- xmat[ii] * fixef + zmat[ii] * ranef[group[ii]]; // individual level mean 
39 |   }
40 | 
41 |   covRanef <- quad_form_diag(corrRanef, sclRanef);
42 | }
43 | 
44 | model {
45 |   stdErr ~ cauchy(0, 2.5);
46 |   
47 |   // sample rand. eff.
48 |   sclRanef ~ cauchy(0, 2.5);
49 |   corrRanef ~ lkj_corr(2);
50 |   for (ii in 1:ngroup) {
51 |     ranef[ii] ~ multi_normal(meanRanef, covRanef); 
52 |   }
53 |   
54 |   // sample fix. eff.
55 |   stdErrFixef ~ cauchy(0, 2.5);  
56 |   fixef ~ normal(meanFixef, stdErrFixef);
57 | 
58 |   // sample data
59 |   yvec ~ normal(yHat, stdErr); 
60 | }
61 | 


--------------------------------------------------------------------------------
/ml/code/import_data.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | setwd("~/wasp/ml/code/")
 3 | fdata <- readRDS("~/wasp/ml/data/dataSel.rds")
 4 | 
 5 | set.seed(12345)
 6 | 
 7 | ncv <- 10
 8 | cvgrps <- sample(1:ncv, length(unique(fdata$group)), replace = TRUE)
 9 | grpSplit <- split(1:nrow(fdata$x), fdata$group)
10 | 
11 | testIdx <- list()
12 | trainIdx <- list()
13 | train <- list()
14 | test <- list()
15 | for (ii in 1:10) {
16 |     idx <- (cvgrps == ii)
17 |     testIdx[[ii]] <- unlist(grpSplit[idx])
18 |     test[[ii]] <- list(x = fdata$x[unlist(grpSplit[idx]), ],
19 |                        z = fdata$z[unlist(grpSplit[idx]), ],
20 |                        y = fdata$y[unlist(grpSplit[idx])],
21 |                        group = fdata$group[unlist(grpSplit[idx])]
22 |                        )
23 |     trainIdx[[ii]] <- setdiff(1:nrow(fdata$x), unlist(grpSplit[idx]))
24 |     train[[ii]] <- list(x = fdata$x[-unlist(grpSplit[idx]), ],
25 |                         z = fdata$z[-unlist(grpSplit[idx]), ],
26 |                         y = fdata$y[-unlist(grpSplit[idx])],
27 |                         group = fdata$group[-unlist(grpSplit[idx])]
28 |                         )
29 | }
30 | 
31 | saveRDS(train, "../data/ml_train.rds")
32 | saveRDS(test, "../data/ml_test.rds")
33 | 
34 | saveRDS(testIdx, "../data/test_idx.rds")
35 | saveRDS(trainIdx, "../data/train_idx.rds")
36 | 
37 | rm(list = ls())
38 | 
39 | train <- readRDS("../data/ml_train.rds")
40 | 
41 | ncv <- 10
42 | npart <- 10
43 | parts <- list()
44 | for (jj in 1:ncv) {
45 |     parts[[jj]] <- vector("list", length = npart)
46 |     names(parts[[jj]]) <- paste0("part", 1:npart)
47 | }
48 | names(parts) <- paste0("cv", 1:ncv)
49 | 
50 | set.seed(12345)
51 | npart <- 10
52 | parts <- list()
53 | for (cc in 1:10) {
54 |     parts[[cc]] <- vector("list", npart)
55 |     names(parts[[cc]]) <- paste0("k", 1:npart)
56 |     lst <- train[[cc]]
57 |     grpSplit <- split(1:nrow(lst$x), lst$group)
58 |     partsIdx <- sample(1:npart, length(grpSplit), replace = TRUE)
59 |     for (ll in 1:npart) {
60 |         grpIdx <- which(partsIdx == ll)
61 |         idx <- unlist(grpSplit[grpIdx])
62 |         parts[[cc]][[ll]]$nobs <- length(idx)
63 |         parts[[cc]][[ll]]$x <- lst$x[idx, ]
64 |         parts[[cc]][[ll]]$y <- lst$y[idx]
65 |         parts[[cc]][[ll]]$z <- lst$z[idx, ]
66 |         parts[[cc]][[ll]]$group <- lst$group[idx]
67 |         parts[[cc]][[ll]]$idx <- idx
68 |         parts[[cc]][[ll]]$nrep <- nrow(train[[cc]]$x) / length(idx)
69 |     }
70 | }
71 | 
72 | saveRDS(parts, "../data/wasp_ml_train.rds")
73 | 


--------------------------------------------------------------------------------
/ml/code/mcmc_sampler.R:
--------------------------------------------------------------------------------
 1 | sampleFromMixMdl <- function (yvec, xmat, zmat, group, niter, nburn, nthin, id) {
 2 |     library(inline)
 3 |     library(Rcpp)
 4 |     library(rstan)
 5 | 
 6 |     gg <- ordered(as.character(group), levels = sort(unique(group)))
 7 |     group <- as.integer(gg)
 8 | 
 9 |     simList = list(
10 |         nobs = length(yvec),
11 |         nfixef = ncol(xmat),
12 |         nranef = ncol(zmat),
13 |         ngroup = length(unique(group)),
14 |         xmat = xmat,
15 |         zmat = zmat,
16 |         group = group,
17 |         yvec = yvec)
18 | 
19 |     seeds <- (1:5000) * as.numeric(gsub(":", "", substr(Sys.time(), 12, 19)))
20 | 
21 |     stanCode <- readChar("full_lme.stan", file.info("full_lme.stan")$size)
22 |     startTime <- proc.time()
23 |     mdl <- stan(model_code = stanCode, data = simList, iter = niter, warmup = nburn, chains = 1, thin = nthin,
24 |                 seed = seeds[id],
25 |                 init = list(list(betas = rep(0, ncol(xmat)),
26 |                                  corrRanef = diag(ncol(zmat)),
27 |                                  sclRanef = rep(2, ncol(zmat))
28 |                                  )))
29 |     endTime <- proc.time()
30 | 
31 |     lst <- mdl@sim$samples[[1]]
32 |     bs <- grep("fixef|covRanef", names(lst))
33 |     sampdf <- do.call(cbind, lst[bs])
34 | 
35 |     list(samples = sampdf[(nrow(sampdf) - (niter - nburn) / nthin + 1):nrow(sampdf), ], time = endTime - startTime)
36 | }
37 | 


--------------------------------------------------------------------------------
/ml/code/recoverSolution.m:
--------------------------------------------------------------------------------
 1 | % FUNCTION: [tOptSol, aOptSol] = recoverSolution(xOptSol, nsubs, N, Ni)
 2 | % PURPOSE:  Recover the original solution.
 3 | %
 4 | function [tOptSol, aOptSol] = recoverSolution(xOptSol, nsubs, N, Ni)
 5 | 
 6 |     % Define this soft-thresholding operator to remove small elements.
 7 |     softThresOper = @(x, t)(sign(x).*max(abs(x) - t, 0));
 8 | 
 9 |     % Recover the solution.
10 |     aOptSol = xOptSol(1:N, 1);
11 |     xRest   = xOptSol(N+1:end);
12 |     for p = 1:nsubs
13 |         tOptSol{p} = reshape( xRest(1:N*Ni(p)), N, Ni(p))/Ni(p);
14 |         xRest      = xRest(N*Ni(p)+1:end);
15 |         tOptSol{p} = softThresOper(tOptSol{p}, 1e-10);
16 |     end
17 | 
18 | end
19 | % @END ...
20 | 


--------------------------------------------------------------------------------
/ml/code/submit.R:
--------------------------------------------------------------------------------
  1 | cmdArgs <- commandArgs(trailingOnly = TRUE)
  2 | 
  3 | mtd <- as.numeric(cmdArgs[1])
  4 | id <- as.numeric(cmdArgs[2])
  5 | 
  6 | if (mtd == 1) {
  7 |     source("mcmc_sampler.R")
  8 |     cvtrain <- readRDS("../data/ml_train.rds")
  9 |     train <- cvtrain[[id]]
 10 |     xmat <- as.matrix(train$x)
 11 |     zmat <- as.matrix(train$z)
 12 |     yvec <- as.numeric(train$y)
 13 |     group <- as.integer(train$group)
 14 |     res <- sampleFromMixMdl(yvec, xmat, zmat, group, 10000, 5000, 5, id)
 15 |     fname <- paste0("/Shared/ssrivastva/wasp/ml/result/full/full_res_", id, ".rds")
 16 |     saveRDS(res, fname)
 17 | } else if (mtd == 2) {
 18 |     source("variational_bayes.R")
 19 |     cvtrain <- readRDS("../data/ml_train.rds")
 20 |     train <- cvtrain[[id]]
 21 |     xmat <- as.matrix(train$x)
 22 |     zmat <- as.matrix(train$z)
 23 |     yvec <- as.numeric(train$y)
 24 |     group <- as.integer(train$group)
 25 |     res <- fitLinearMixefEffectsVB(yvec, xmat, zmat, group, 1000)
 26 |     fname <- paste0("/Shared/ssrivastva/wasp/ml/result/vb/vb_res_", id, ".rds")
 27 |     saveRDS(res, fname)
 28 | } else if (mtd == 3) {
 29 |     source("wasp_sampler.R")
 30 |     cvs <- rep(1:10, each = 10)
 31 |     wids <- cbind(cvs, rep(1:10, times = 10))
 32 | 
 33 |     cid <- wids[id, 1]
 34 |     sid <- wids[id, 2]
 35 | 
 36 |     cvtrain <- readRDS("../data/wasp_ml_train.rds")
 37 |     train <- cvtrain[[cid]][[sid]]
 38 |     rm(cvtrain)
 39 |     xmat <- as.matrix(train$x)
 40 |     zmat <- as.matrix(train$z)
 41 |     yvec <- as.numeric(train$y)
 42 |     group <- as.integer(train$group)
 43 |     res <- sampleFromWaspMixMdl(yvec, xmat, zmat, group, as.numeric(train$nrep), 10000, 5000, 5, id)
 44 |     fname <- paste0("/Shared/ssrivastva/wasp/ml/result/wasp/samp/wasp_cv_", cid, "_sub_", sid, "_k10.rds")
 45 |     saveRDS(res, fname)
 46 | } else if (mtd == 4) {
 47 |     source("comp_sampler.R")
 48 |     cvs <- rep(1:10, each = 10)
 49 |     wids <- cbind(cvs, rep(1:10, times = 10))
 50 | 
 51 |     cid <- wids[id, 1]
 52 |     sid <- wids[id, 2]
 53 | 
 54 |     cvtrain <- readRDS("../data/wasp_ml_train.rds")
 55 |     train <- cvtrain[[cid]][[sid]]
 56 |     rm(cvtrain)
 57 |     xmat <- as.matrix(train$x)
 58 |     zmat <- as.matrix(train$z)
 59 |     yvec <- as.numeric(train$y)
 60 |     group <- as.integer(train$group)
 61 |     res <- sampleFromCompMixMdl (yvec, xmat, zmat, group, as.numeric(train$nrep), 10000, 5000, 5, id)
 62 |     fname <- paste0("/Shared/ssrivastva/wasp/ml/result/comp/samp/comp_cv_", cid, "_sub_", sid, "_k10.rds")
 63 |     saveRDS(res, fname)
 64 | } else if (mtd == 5) {
 65 |     library(parallelMCMCcombine)
 66 | 
 67 |     cid <- id
 68 | 
 69 |     subfix <- array(0.0, dim = c(6, 1000, 10))
 70 |     subran <- array(0.0, dim = c(21, 1000, 10))
 71 |     tmp <- numeric(10)
 72 | 
 73 |     meanMap <- 1:6
 74 |     covMap <- c(7:12, 14:18, 21:24, 28:30, 35:36, 42)
 75 |     for (kk in 1:10) {
 76 |         fname <- paste0("/Shared/ssrivastva/wasp/ml/result/comp/samp/comp_cv_", cid, "_sub_", kk, "_k10.rds")
 77 |         samp <- readRDS(fname)
 78 |         subfix[ , , kk] <- t(samp$samples[ , meanMap])
 79 |         subran[ , , kk] <- t(samp$samples[ , covMap])
 80 |         tmp[kk] <- samp$time[3]
 81 |     }
 82 | 
 83 |     stime <- rep(0, 2)
 84 |     strt1 <- proc.time()
 85 |     scottFix <- consensusMCindep(subchain = subfix)
 86 |     end1 <- proc.time()
 87 |     stime[1] <- mean(tmp) + end1[3] - strt1[3]
 88 |     strt1 <- proc.time()
 89 |     scottRan <- consensusMCindep(subchain = subran)
 90 |     end1 <- proc.time()
 91 |     stime[2] <- mean(tmp) + end1[3] - strt1[3]
 92 | 
 93 |     xtime <- rep(0, 2)
 94 |     strt2 <- proc.time()
 95 |     xingFix <- semiparamDPE(subchain = subfix)
 96 |     end2 <- proc.time()
 97 |     xtime[1] <- mean(tmp) + end2[3] - strt2[3]
 98 |     strt2 <- proc.time()
 99 |     xingRan <- semiparamDPE(subchain = subran)
100 |     end2 <- proc.time()
101 |     xtime[2] <- mean(tmp) + end2[3] - strt2[3]
102 | 
103 |     fname1 <- paste0("/Shared/ssrivastva/wasp/ml/result/cons/marg/cons_fix_ran_cv_", cid, "_k10.rds")
104 |     fname2 <- paste0("/Shared/ssrivastva/wasp/ml/result/xing/marg/xing_fix_ran_cv_", cid, "_k10.rds")
105 | 
106 |     saveRDS(list(fix = t(scottFix), ran = t(scottRan), time = stime), fname1)
107 |     saveRDS(list(fix = t(xingFix), ran = t(xingRan), time = xtime), fname2)
108 | } else if (mtd == 6) {
109 |     library(parallelMCMCcombine)
110 | 
111 |     cid <- id
112 | 
113 |     subJtCov <- rep(list(array(0.0, dim = c(2, 1000, 10))), 4)
114 | 
115 |     tmp <- numeric(10)
116 | 
117 |     covMap <- c(7:12, 14:18, 21:24, 28:30, 35:36, 42)
118 |     for (kk in 1:10) {
119 |         fname <- paste0("/Shared/ssrivastva/wasp/ml/result/comp/samp/comp_cv_", cid, "_sub_", kk, "_k10.rds")
120 |         samp <- readRDS(fname)
121 |         cnames <- colnames(samp$samples)
122 |         subran <- samp$samples[ , covMap]
123 |         cov2d <- cbind(rep(2, 4), 3:6)
124 |         for (ddd in 1:4) {
125 |             subJtCov[[ddd]][ , , kk] <- t(subran[ , c(cov2d[ddd, 1], cov2d[ddd, 2])])
126 |         }
127 |         tmp[kk] <- samp$time[3]
128 |     }
129 | 
130 |     scottCov <- list()
131 |     scottTime <- numeric(4)
132 |     for (ddd in 1:4) {
133 |         strt1 <- proc.time()
134 |         scottCov[[ddd]] <- t(consensusMCcov(subchain = subJtCov[[ddd]]))
135 |         end1 <- proc.time()
136 |         scottTime[ddd] <- mean(tmp) + end1[3] - strt1[3]
137 |     }
138 | 
139 |     xingCov <- list()
140 |     xingTime <- numeric(4)
141 |     for (ddd in 1:4) {
142 |         strt1 <- proc.time()
143 |         xingCov[[ddd]] <- t(semiparamDPE(subchain = subJtCov[[ddd]]))
144 |         end1 <- proc.time()
145 |         xingTime[ddd] <- mean(tmp) + end1[3] - strt1[3]
146 |     }
147 | 
148 |     fname1 <- paste0("/Shared/ssrivastva/wasp/ml/result/cons/joint/cons_cov_cv_", cid, "_k10.rds")
149 |     fname2 <- paste0("/Shared/ssrivastva/wasp/ml/result/xing/joint/xing_cov_cv_", cid, "_k10.rds")
150 | 
151 |     saveRDS(list(cov = scottCov, time = scottTime), fname1)
152 |     saveRDS(list(cov = xingCov, time = xingTime), fname2)
153 | } else (mtd == 7) {
154 |     cvtrain <- readRDS("../data/ml_train.rds")
155 |     train <- cvtrain[[id]]
156 |     xmat <- as.matrix(train$x)
157 |     zmat <- as.matrix(train$z)
158 |     yvec <- as.numeric(train$y)
159 |     group <- as.integer(train$group)
160 | 
161 |     library(inline)
162 |     library(Rcpp)
163 |     library(rstan)
164 | 
165 |     gg <- ordered(as.character(group), levels = sort(unique(group)))
166 |     group <- as.integer(gg)
167 | 
168 |     simList = list(
169 |         nobs = length(yvec),
170 |         nfixef = ncol(xmat),
171 |         nranef = ncol(zmat),
172 |         ngroup = length(unique(group)),
173 |         xmat = xmat,
174 |         zmat = zmat,
175 |         group = group,
176 |         yvec = yvec)
177 | 
178 |     seeds <- (1:5000) * as.numeric(gsub(":", "", substr(Sys.time(), 12, 19)))
179 |     strt1 <- proc.time()
180 |     mdl <- stan_model("full_lme.stan")
181 |     res <- vb(mdl, data = simList, output_samples = 2000, seed = seeds[id])
182 |     end1 <- proc.time()
183 | 
184 |     fname <- paste0("/Shared/ssrivastva/wasp/ml/result/bbvb/bbvb_res_", id, ".rds")
185 |     saveRDS(list(res = res, time = end1 - strt1), fname)
186 | }
187 | 


--------------------------------------------------------------------------------
/ml/code/variational_bayes.R:
--------------------------------------------------------------------------------
  1 | fitLinearMixefEffectsVB <- function (yvec, xmat, zmat, group, niter) {
  2 |     library(Matrix)
  3 |     library(MCMCpack)
  4 | 
  5 |     nfixef <- ncol(xmat)
  6 |     nranef <- ncol(zmat)
  7 |     grpLbl <- sort(unique(group))
  8 |     ngroup <- length(grpLbl)
  9 |     ndim <- nrow(xmat)
 10 | 
 11 |     elbo <- numeric(niter)
 12 | 
 13 |     ranefList <- list()
 14 |     grpIdx <- list()
 15 |     for (ii in 1:ngroup) {
 16 |         grpIdx[[ii]] <- which(group == grpLbl[ii])
 17 |         ranefList[[ii]] <- zmat[grpIdx[[ii]], , drop = FALSE]
 18 |     }
 19 |     ranefMat <- bdiag(ranefList)
 20 | 
 21 |     fixefMat <- xmat[unlist(grpIdx), ]
 22 | 
 23 |     designMat <- cBind(fixefMat, ranefMat)
 24 |     designTransDesign <- crossprod(designMat, designMat)
 25 |     ordY <- yvec[unlist(grpIdx)]
 26 |     ordGrp <- group[unlist(grpIdx)]
 27 |     designTransY <- crossprod(designMat, ordY)
 28 | 
 29 |     muErrInv <- 1; muErrAInv <- 1; muVarErrAinv <- 1; errAScl <- 1; nu = 2; muRanCovInv <- solve(rWishart(1, 2 * ncol(zmat), diag(ncol(zmat)))[ , , 1]); varBeta <- 100; ranCovAsScl <- rep(1, ncol(zmat));
 30 | 
 31 |     prev <- list(coefMu = runif(nfixef), coefCov = diag(nfixef), ranCov = diag(nranef)); conv <- 1e5
 32 | 
 33 |     startTime <- proc.time()
 34 |     for (its in 0:niter) {
 35 | 
 36 |         smat <- diag(0, ncol(xmat))
 37 |         svec <- numeric(ncol(xmat))
 38 | 
 39 |         gmat <- vector("list", ngroup)
 40 |         hmat <- vector("list", ngroup)
 41 |         for(ii in 1:ngroup) {
 42 |             gmat[[ii]] <- crossprod(xmat[grpIdx[[ii]], ], zmat[grpIdx[[ii]], ]) * muErrInv
 43 |             hmat[[ii]] <- solve(crossprod(zmat[grpIdx[[ii]], ], zmat[grpIdx[[ii]], ]) * muErrInv + muRanCovInv)
 44 |             tmp <- gmat[[ii]] %*% hmat[[ii]]
 45 |             smat <- smat + tcrossprod(tmp, gmat[[ii]])
 46 |             svec <- svec + drop(tmp %*% crossprod(zmat[grpIdx[[ii]], ], yvec[grpIdx[[ii]]]))
 47 |         }
 48 | 
 49 |         fixCov <- solve(crossprod(fixefMat, fixefMat) * muErrInv + diag(1, nfixef) / varBeta - smat)
 50 |         fixMu <- drop(muErrInv * fixCov %*% (crossprod(fixefMat, ordY) - svec))
 51 | 
 52 |         ranMu <- vector("list", ngroup)
 53 |         ranCov <- vector("list", ngroup)
 54 |         tmp1 <- numeric(ngroup)
 55 |         tmp2 <- numeric(ngroup)
 56 |         for(ii in 1:ngroup) {
 57 |             ranCov[[ii]] <- hmat[[ii]] + tcrossprod(hmat[[ii]], gmat[[ii]]) %*% fixCov %*% gmat[[ii]] %*% hmat[[ii]]
 58 |             ranMu[[ii]] <- drop(hmat[[ii]] %*% (muErrInv * crossprod(zmat[grpIdx[[ii]], ], yvec[grpIdx[[ii]]]) - crossprod(gmat[[ii]], fixMu)))
 59 |             tmp1[ii] <- sum(crossprod(zmat[grpIdx[[ii]], ], zmat[grpIdx[[ii]], ]) * ranCov[[ii]])
 60 |             tmp2[ii] <- sum(tcrossprod(gmat[[ii]] %*% hmat[[ii]], gmat[[ii]]) * fixCov)
 61 |         }
 62 |         ranMuVec <- do.call(c, ranMu)
 63 |         resids <- ordY - fixefMat %*% fixMu - ranefMat %*% ranMuVec
 64 |         scaleErr <- muErrAInv + 0.5 * (sum(resids^2) + sum(crossprod(fixefMat, fixefMat) * fixCov) + sum(tmp1) - 2 * sum(tmp2) / muErrInv)
 65 |         shapeErr <- 0.5 * (ndim + 1)
 66 |         muErrInv <- shapeErr / scaleErr
 67 | 
 68 |         ## update post. for parameter in the px-ed form of half-cauchy prior for err
 69 |         shapeErrA <- 1
 70 |         scaleErrA <- muErrInv + errAScl
 71 |         muErrAInv <- shapeErrA / scaleErrA
 72 | 
 73 |         ## update post. for parameter in the px-ed form of half-cauchy
 74 |         ## prior for the random effects covariance matrix
 75 |         scaleRanCovAs <- nu * diag(muRanCovInv) + ranCovAsScl
 76 |         shapeRanCovAs <- 0.5 * (nu + nranef)
 77 |         muRanCovAsInv <- pmax(shapeRanCovAs / scaleRanCovAs, 1e-5)
 78 | 
 79 |         ranefCoefMuMat <- matrix(ranMuVec, nrow = nranef, ncol = ngroup)
 80 |         ranefCoefCovMat <- diag(0, nranef)
 81 |         for (jj in 1:nranef) {
 82 |             ranefCoefCovMat <- ranefCoefCovMat + ranCov[[jj]]
 83 |         }
 84 |         scaleRanCovMat <- tcrossprod(ranefCoefMuMat, ranefCoefMuMat) + ranefCoefCovMat + 2 * nu * diag(muRanCovAsInv)
 85 |         rateRanCovMat <- solve(scaleRanCovMat)
 86 |         muRanCovInv <- (nu + ngroup + nranef - 1) * rateRanCovMat
 87 | 
 88 |         if ((its > 10) && (conv < 1e-10))
 89 |             break
 90 | 
 91 |         if (its %% 10 == 0) {
 92 |             cat("iteration: ", its, "\n")
 93 | 
 94 |             diff1 <- matrix(prev$coefMu[1:nfixef] - fixMu)
 95 |             diff2 <- prev$coefCov - fixCov
 96 |             diff3 <- matrix(prev$ranCov - as.matrix(scaleRanCovMat))
 97 |             conv <- norm(diff1, "O") + norm(diff2, "O") + norm(diff3, "O")
 98 | 
 99 |             prev$coefMu <- fixMu; prev$coefCov <- fixCov; prev$ranCov <- scaleRanCovMat
100 |         }
101 | 
102 |     }
103 |     endTime <- proc.time()
104 | 
105 |     list(
106 |         coefs = list(
107 |             cov = fixCov,
108 |             mu = fixMu
109 |             )
110 |         ,
111 |         err = list(
112 |             aa = scaleErr,
113 |             bb = shapeErr
114 |             )
115 |         ,
116 |         cov = list(
117 |             scale = scaleRanCovMat,
118 |             df = (nu + ngroup + nranef - 1)
119 |             )
120 |         ,
121 |         niter = its
122 |         ,
123 |         time = endTime - startTime
124 |         )
125 | }
126 | 


--------------------------------------------------------------------------------
/ml/code/wasp_lme.stan:
--------------------------------------------------------------------------------
 1 | functions {
 2 |   // takes care of the modified likelihood
 3 |   real stoc_approx_log (vector y, vector mu, real stdErr, matrix covMat, matrix zmat, real nrep) {
 4 |     matrix[num_elements(y), num_elements(y)] covY; 
 5 |     matrix[num_elements(y), num_elements(y)] L;
 6 |     
 7 |     covY <- quad_form(covMat, zmat');
 8 |     for (kk in 1:(rows(zmat)))
 9 |       covY[kk, kk] <- covY[kk, kk] + stdErr;
10 | 
11 |     L <- cholesky_decompose(covY);
12 |     
13 |     return (nrep * multi_normal_cholesky_log(y, mu, L));
14 |   } 
15 | }
16 | 
17 | data {
18 |   int<lower=0> nobs;                          // total no. of obs
19 |   int<lower=0> nfixef;                        // total no. of fixed effects
20 |   int<lower=0> nranef;                        // total no. of random effects
21 |   int<lower=0> ngroup;                        // total no. of clusters
22 |   real<lower=0> nrep;                         // total no. of resamples
23 |   matrix[nobs, nfixef] xmat;                  // fixed effects design matrix
24 |   matrix[nobs, nranef] zmat;                  // random effects design matrix
25 |   int group[nobs];                            // cluster ids
26 |   vector[nobs] yvec;                          // observations
27 |   int pos1[ngroup];                           // database indices ...
28 |   int pos2[ngroup];                           // to handle ragged arrays
29 | }
30 | 
31 | transformed data {
32 |   // both fix eff. and rand. eff. are apriori centered at 0
33 |   vector[nranef] meanRanef;
34 |   vector[nfixef] meanFixef;
35 |   
36 |   meanRanef <- rep_vector(0.0, nranef);
37 |   meanFixef <- rep_vector(0.0, nfixef);
38 | }
39 | 
40 | parameters {
41 |   corr_matrix[nranef] corrRanef;       // correlation matrix of rand. eff.
42 |   vector<lower=0>[nranef] sclRanef;    // scale matrix of rand. eff.
43 | 
44 |   vector[nfixef] fixef;                // population level fix. eff.
45 |   real<lower=0> stdErrFixef;           // std err. in pop. level. fix. eff.
46 |   
47 |   real<lower=0> stdErr;                // population level std. err.
48 | }
49 | 
50 | transformed parameters {
51 |   matrix[nranef, nranef] covRanef;
52 |   vector[nobs] mu;
53 | 
54 |   mu <- xmat * fixef;
55 | 
56 |   covRanef <- quad_form_diag(corrRanef, sclRanef);
57 | }
58 | 
59 | model {  
60 | 
61 |   stdErr ~ cauchy(0, 2.5);
62 | 
63 |   // prior for fix. eff.
64 |   stdErrFixef ~ cauchy(0, 2.5);  
65 |   fixef ~ normal(meanFixef, stdErrFixef);
66 |   
67 |   // prior for rand. eff.
68 |   sclRanef ~ cauchy(0, 2.5);
69 |   corrRanef ~ lkj_corr(2);
70 |   
71 |   for (ii in 1:ngroup) {
72 |     segment(yvec, pos1[ii], pos2[ii] - pos1[ii] + 1) ~ stoc_approx(segment(mu, pos1[ii], pos2[ii] - pos1[ii] + 1), 
73 |                                                                    stdErr, covRanef,
74 |                                                                    block(zmat, pos1[ii], 1, pos2[ii] - pos1[ii] + 1, nranef),
75 |                                                                    nrep);
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/ml/code/wasp_sampler.R:
--------------------------------------------------------------------------------
 1 | sampleFromWaspMixMdl <- function (yvec, xmat, zmat, group, nrep, niter, nburn, nthin, id) {
 2 |     library(inline)
 3 |     library(Rcpp)
 4 |     library(rstan)
 5 | 
 6 |     grpLbl <- sort(unique(group))
 7 |     ngroup <- length(grpLbl)
 8 | 
 9 |     ranefList <- list()
10 |     grpIdx <- list()
11 |     for (ii in 1:ngroup) {
12 |       grpIdx[[ii]] <- which(group == grpLbl[ii])
13 |       ranefList[[ii]] <- zmat[grpIdx[[ii]], , drop = FALSE]
14 |     }
15 |     ranefMat <- do.call(rbind, ranefList)
16 |     fixefMat <- xmat[unlist(grpIdx), ]
17 | 
18 |     pos2 <- cumsum(sapply(grpIdx, length))
19 |     pos1 <- c(1, pos2[-ngroup] + 1)
20 | 
21 |     ordY <- yvec[unlist(grpIdx)]
22 |     ordGrp <- group[unlist(grpIdx)]
23 | 
24 |     idx <- seq_along(unlist(grpIdx))
25 |     simList = list(nobs = length(yvec[idx]),
26 |       nfixef = ncol(xmat),
27 |       nranef = ncol(zmat),
28 |       ngroup = length(unique(ordGrp[idx])),
29 |       nrep = nrep,
30 |       xmat = fixefMat[idx, ],
31 |       zmat = ranefMat[idx, ],
32 |       group = ordGrp[idx],
33 |       yvec = ordY[idx],
34 |       pos1 = pos1,
35 |       pos2 = pos2)
36 | 
37 |     seeds <- (1:2000) * as.numeric(gsub(":", "", substr(Sys.time(), 12, 19)))
38 | 
39 |     stanCode <- readChar("wasp_lme.stan", file.info("wasp_lme.stan")$size)
40 |     startTime <- proc.time()
41 |     mdl <- stan(model_code = stanCode, data = simList, iter = niter, warmup = nburn, chains = 1, thin = nthin,
42 |                 seed = seeds[id],
43 |                 init = list(list(betas = rep(0, ncol(xmat)),
44 |                                  corrRanef = diag(ncol(zmat)),
45 |                                  sclRanef = rep(2, ncol(zmat))
46 |                                  )))
47 |     endTime <- proc.time()
48 | 
49 |     lst <- mdl@sim$samples[[1]]
50 |     bs <- grep("fixef|covRanef", names(lst))
51 |     sampdf <- do.call(cbind, lst[bs])
52 | 
53 |     list(samples = sampdf[(nrow(sampdf) - (niter - nburn) / nthin + 1):nrow(sampdf), ], time = endTime - startTime)
54 | }
55 | 


--------------------------------------------------------------------------------
/ml/data/dataSel.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/ml/data/dataSel.rds


--------------------------------------------------------------------------------
/ml/data/ml_test.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/ml/data/ml_test.rds


--------------------------------------------------------------------------------
/ml/data/ml_train.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/ml/data/ml_train.rds


--------------------------------------------------------------------------------
/ml/data/test_idx.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/ml/data/test_idx.rds


--------------------------------------------------------------------------------
/ml/data/train_idx.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/ml/data/train_idx.rds


--------------------------------------------------------------------------------
/ml/data/wasp_ml_train.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/ml/data/wasp_ml_train.rds


--------------------------------------------------------------------------------
/ml/qsub/bbvb.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N bbvb
 3 | #$ -l mf=16G
 4 | #$ -pe smp 4
 5 | #$ -l h_rt=320:00:00
 6 | #$ -l s_rt=320:00:00
 7 | #$ -wd /Users/ssrivastva/wasp/ml/code/
 8 | #$ -m a
 9 | #$ -M sanvesh-srivastava@uiowa.edu
10 | #$ -t 1-10
11 | #$ -V
12 | #$ -e /Users/ssrivastva/err/
13 | #$ -o /Users/ssrivastva/out/
14 | 
15 | module load R/intel-composer_xe_2015.3.187_3.3.0
16 | 
17 | R CMD BATCH --no-save --no-restore "--args 7 $SGE_TASK_ID" submit.R vb/bbvb_$SGE_TASK_ID.rout
18 | 


--------------------------------------------------------------------------------
/ml/qsub/comp10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp10
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/ml/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-100
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 4 $SGE_TASK_ID" submit.R comp/comp10_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/ml/qsub/comp_joint_cov.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp_joint_cov
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/ml/code/ 
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-10
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 6 $SGE_TASK_ID" submit.R comp/joint_cov_$SGE_TASK_ID.rout
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/ml/qsub/comp_marg.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp_lme
 3 | #$ -q LT
 4 | #$ -l mf=16G
 5 | #$ -l h_rt=320:00:00
 6 | #$ -l s_rt=320:00:00
 7 | #$ -wd /Users/ssrivastva/wasp/ml/code/ 
 8 | #$ -m a
 9 | #$ -M sanvesh-srivastava@uiowa.edu
10 | #$ -t 1-10
11 | #$ -V
12 | #$ -e /Users/ssrivastva/err/
13 | #$ -o /Users/ssrivastva/out/
14 | 
15 | module load R/3.3.0
16 | 
17 | R CMD BATCH --no-save --no-restore "--args 5 $SGE_TASK_ID" submit.R comp/lme_$SGE_TASK_ID.rout
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/ml/qsub/mcmc.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N full
 3 | #$ -l mf=64G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/ml/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-10
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 1 $SGE_TASK_ID" submit.R mcmc/full_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/ml/qsub/vb.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N vb
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/ml/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-10
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 2 $SGE_TASK_ID" submit.R vb/vb_$SGE_TASK_ID.rout
17 | 
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/ml/qsub/wasp10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N wasp10
 3 | #$ -q LT
 4 | #$ -l mf=16G
 5 | #$ -l h_rt=320:00:00
 6 | #$ -l s_rt=320:00:00
 7 | #$ -wd /Users/ssrivastva/wasp/ml/code/
 8 | #$ -m a
 9 | #$ -M sanvesh-srivastava@uiowa.edu
10 | #$ -t 1-100
11 | #$ -V
12 | #$ -e /Users/ssrivastva/err/
13 | #$ -o /Users/ssrivastva/out/
14 | 
15 | module load R/3.3.0
16 | 
17 | R CMD BATCH --no-save --no-restore "--args 3 $SGE_TASK_ID" submit.R wasp/wasp10_$SGE_TASK_ID.rout
18 | 


--------------------------------------------------------------------------------
/ml/qsub/wasp_joint_cov.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N ml_cov_wasp
 3 | #$ -l mf=16G
 4 | #$ -pe smp 8
 5 | #$ -l h_rt=310:00:00
 6 | #$ -l s_rt=310:00:00
 7 | #$ -wd /Users/ssrivastva/wasp/ml/code/
 8 | #$ -m a
 9 | #$ -M sanvesh-srivastava@uiowa.edu
10 | #$ -t 1-10
11 | #$ -V
12 | #$ -e /Users/ssrivastva/err/
13 | #$ -o /Users/ssrivastva/out/
14 | 
15 | module load gurobi/6.5.1
16 | 
17 | module load matlab/R2015b
18 | 
19 | matlab -nojvm -nodisplay -singleCompThread -r "calc_wasp_cov_2d_k10($SGE_TASK_ID, 10, '/Shared/ssrivastva/wasp/ml/result/wasp/')"
20 | 
21 | 


--------------------------------------------------------------------------------
/parafac/code/callLpSolver.m:
--------------------------------------------------------------------------------
  1 | % FUNCTION:  [xsol, output] = callLpSolver(solver, Amat, bvec, ...
  2 | %                                          cvec, maxiters, tolx)
  3 | % PURPOSE: Call the Lp solver to solve the LP problem.
  4 | % 
  5 | %
  6 | function [xsol, output] = callLpSolver(solver, Amat, bvec, cvec, maxiters, tolx)
  7 | 
  8 | % Check the inputs.
  9 | if nargin < 3, error('At least three inputs are required!'); end
 10 | if nargin < 5, tolx = 1e-4; end
 11 | if nargin < 4, maxiters = 1000; end
 12 | if isempty(tolx), tolx = 1e-4; end
 13 | if isempty(maxiters), maxiters = 1000; end
 14 | if size(Amat, 1) ~= length(bvec), error('Inputs are inconsistent!'); end
 15 | if size(Amat, 2) ~= length(cvec), error('Inputs are inconsistent!'); end
 16 | xsol = []; output = [];
 17 | nx   = length(cvec);
 18 | 
 19 | %% Call the SeDuMi solver.
 20 | if strcmpi(solver, 'sedumi')
 21 |     pars.maxiter = maxiters;
 22 |     pars.eps     = tolx;
 23 |     Cones.l      = length(cvec);
 24 |     time3        = tic;
 25 |     [x_sedumi, y_sedumi, info] = sedumi(Amat, bvec, cvec, Cones, pars);
 26 |     x_sedumi = full(x_sedumi);
 27 |     xsol         = x_sedumi;
 28 |     output.time3        = toc(time3);
 29 |     output.info  = info;
 30 |     output.dual_sol = y_sedumi;
 31 | end
 32 | 
 33 | %% Call the Matlab LINPROG solver.
 34 | if strcmpi(solver, 'linprog')
 35 |     time5       = tic;
 36 |     opts        = optimset('Algorithm', 'interior-point', ...
 37 |                            'Display', 'iter', ...
 38 |                            'MaxIter', maxiters, 'TolX', tolx, 'TolFun', tolx);
 39 |     [xsol, fx2] = linprog(cvec, [], [], Amat, bvec, zeros(nx, 1), [], [], opts);
 40 |     output.time = toc(time5);
 41 |     output.fx   = fx2;
 42 | end
 43 | 
 44 | %% Call the SDPT3 solver.
 45 | if strcmpi(solver, 'sdpt3')
 46 |     Asdpt3 = spconvert(Amat);
 47 |     blk{1, 1} = 'l';
 48 |     blk{1, 2} = ones(1, nx);
 49 |     sdpt3opt  = struct('gaptol', tolx, 'maxit', maxiters);
 50 |     time4     = tic;
 51 |     %[X0, y0, Z0] = infeaspt(blk, Asdpt3, cvec, bvec); 
 52 |     %[fx_sdpt3, x_sdpt3, y_sdpt3, Z_sdpt3] = sqlp(blk, Asdpt3, cvec, bvec, sdpt3opt, X0, y0, Z0);
 53 |     [fx_sdpt3, x_sdpt3, y_sdpt3, Z_sdpt3] = sqlp(blk, Asdpt3, cvec, bvec, sdpt3opt);
 54 |     output.time4     = toc(time4);
 55 |     x_sdpt3   = x_sdpt3{:};
 56 |     xsol      = x_sdpt3;
 57 |     output.dual_sol = y_sdpt3;
 58 |     output.slacks   = Z_sdpt3;
 59 |     output.fx       = fx_sdpt3;
 60 | end    
 61 | 
 62 | %% Call Our Decopt solver.
 63 | if strcmpi(solver, 'decopt')
 64 |     
 65 |     % Set the parameters.
 66 |     param.MaxIters      = maxiters;
 67 |     param.Verbosity     = 2;
 68 |     param.RelTolX       = tolx;
 69 |     param.saveHistMode  = 0;
 70 |     param.Algorithm     = 3;
 71 |     param.InnerMaxIters = 20;
 72 |     param.adaptStepSize = 0;
 73 | 
 74 |     % Call the solver.
 75 |     proxLpPos    = @(x, gamma)( min( max(0, x - gamma*cvec), 1.0) );
 76 | 
 77 |     % User-define proximal-functions.
 78 |     proxOpers{1} = @(x, gamma, varargin)(proxLpPos(x, gamma));
 79 |     proxOpers{2} = @(x, gamma, varargin)(projL2norm(x, 1e-12));
 80 | 
 81 |     proxOpers{3} = @(x, varargin)( cvec'*x );
 82 |     proxOpers{4} = @(x, varargin)(0);
 83 |     
 84 |     % Generate an initial point.
 85 |     x0       = zeros(nx, 1);
 86 |     
 87 |     %% Call the solver with user-define prox-functions.
 88 |     time1 = tic;
 89 |     [xsol, out] = decoptSolver('UserDef', Amat, bvec, param, 'x0', x0, 'Prox', proxOpers, 'GammaFactor', 1.1);
 90 |     output.time = toc(time1);
 91 |     output.info = out;
 92 | 
 93 | end
 94 | 
 95 | %% Call the Gurobi solver.
 96 | if strcmpi(solver, 'gurobi')
 97 |     
 98 |     % Generate the LP model.
 99 |     time_g            = tic;
100 |     model.A           = Amat;
101 |     model.obj         = full(cvec);
102 |     model.rhs         = full(bvec);
103 |     model.modelsense  = 'min';
104 |     model.sense       = '=';
105 |     
106 |     % Define the parameters.
107 |     param.method      = 2;
108 |     param.Presolve    = 2;
109 |     param.Crossover   = 0;
110 |     param.outputflag  = 1;
111 |     
112 |     % Call the solver.
113 |     result            = gurobi(model, param);
114 |     
115 |     % Obtain the final results.
116 |     output.result     = result;
117 |     output.time       = toc(time_g);
118 |     xsol              = result.x;
119 |     
120 | end
121 | 
122 | 


--------------------------------------------------------------------------------
/parafac/code/obtain_samples.m:
--------------------------------------------------------------------------------
 1 | % mcmc samples
 2 | clear; 
 3 | for cc=1:10
 4 |     load(strcat('/Shared/ssrivastva/wasp/parafac/result/full/res_', num2str(cc), ...
 5 |                 '.mat'));    
 6 |     for dd = 1:20
 7 |         margMat = zeros(1000, 2);
 8 |         for ss = 1:1000
 9 |             margMat(ss, :) = history{1, ss}(dd, :);
10 |         end
11 |         csvwrite(strcat('/Shared/ssrivastva/wasp/parafac/result/full/res_cv_', num2str(cc), ...
12 |             '_dim_', num2str(dd), '.csv'), margMat);    
13 |     end   
14 | end
15 | 
16 | % wasp samples 
17 | clear;
18 | for cc = 1:10
19 |     for kk=1:5
20 |         load(strcat('/Shared/ssrivastva/wasp/parafac/result/sub5/samp/res_cv_', ...
21 |                     num2str(cc), '_sub_', num2str(kk), '_k5.mat'));    
22 |         for dd = 1:20
23 |             margMat = zeros(1000, 2);
24 |             for ss = 1:1000
25 |                 margMat(ss, :) = history{1, ss}(dd, :);
26 |             end
27 |             csvwrite(strcat('/Shared/ssrivastva/wasp/parafac/result/sub5/samp/csv/res_cv_', num2str(cc), ...
28 |                             '_sub_', num2str(kk), '_dim_', num2str(dd), '_k5.csv'), margMat);    
29 |         end   
30 |     end    
31 | end
32 | 
33 | clear;
34 | for cc = 1:10
35 |     for kk=1:10
36 |         load(strcat('/Shared/ssrivastva/wasp/parafac/result/sub10/samp/res_cv_', ...
37 |                     num2str(cc), '_sub_', num2str(kk), '_k10.mat'));    
38 |         for dd = 1:20
39 |             margMat = zeros(1000, 2);
40 |             for ss = 1:1000
41 |                 margMat(ss, :) = history{1, ss}(dd, :);
42 |             end
43 |             csvwrite(strcat('/Shared/ssrivastva/wasp/parafac/result/sub10/samp/csv/res_cv_', num2str(cc), ...
44 |                             '_sub_', num2str(kk), '_dim_', num2str(dd), '_k10.csv'), margMat);    
45 |         end   
46 |     end    
47 | end
48 | 
49 | % cmc; sdp samples 
50 | 
51 | clear; 
52 | for cc = 1:10
53 |     for kk=1:5
54 |         load(strcat('/Shared/ssrivastva/wasp/parafac/result/comp/sub5/samp/res_cv_', ...
55 |                     num2str(cc), '_sub_', num2str(kk), '_k5.mat'));    
56 |         for dd = 1:20
57 |             margMat = zeros(1000, 2);
58 |             for ss = 1:1000
59 |                 margMat(ss, :) = history{1, ss}(dd, :);
60 |             end
61 |             csvwrite(strcat('/Shared/ssrivastva/wasp/parafac/result/comp/sub5/res_cv_', num2str(cc), ...
62 |                             '_sub_', num2str(kk), '_dim_', num2str(dd), '_k5.csv'), margMat);    
63 |         end   
64 |     end    
65 | end
66 | 
67 | clear;
68 | 
69 | for cc = 1:10
70 |     for kk=1:10
71 |         load(strcat('/Shared/ssrivastva/wasp/parafac/result/comp/sub10/samp/res_cv_', ...
72 |                     num2str(cc), '_sub_', num2str(kk), '_k10.mat'));    
73 |         for dd = 1:20
74 |             margMat = zeros(1000, 2);
75 |             for ss = 1:1000
76 |                 margMat(ss, :) = history{1, ss}(dd, :);
77 |             end
78 |             csvwrite(strcat('/Shared/ssrivastva/wasp/parafac/result/comp/sub10/res_cv_', num2str(cc), ...
79 |                             '_sub_', num2str(kk), '_dim_', num2str(dd), '_k10.csv'), margMat);    
80 |         end   
81 |     end    
82 | end
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/parafac/code/parafac_dx_com.m:
--------------------------------------------------------------------------------
  1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | % Probabilitistic Parafac Gibbs Sampler for Nonparametric Bayes Modeling of
  3 | % Multivariate Categorical Data (Tensor Factor Model)
  4 | % 
  5 | % For details of the sampler see Dunson and Xing (2009) at
  6 | % 
  7 | % http://www.tandfonline.com/doi/abs/10.1198/jasa.2009.tm08439#.Uxpc6Nww_0A
  8 | %
  9 | % Written by Sanvesh Srivastava on 03/03/14 based on a previous version
 10 | % by Jing Zhou of UNC, Biostatistics
 11 | % modified by SS 04/26/15
 12 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 13 | 
 14 | function [history, tend] = parafac_dx_com(dataMat, cats, nclass, nrun, nburn, nthin)
 15 | % parafac_dx_com performs Gibbs sampling according to Dunson and Xing (2009)
 16 | %
 17 | % dataMat: is N x P matrix, n-th row contains the P categorical responses of n-th
 18 | % individual; missing data is represented by -999 and baseline category
 19 | % is 0. Format is similar to the GSS database. Note 0 is changed to 1 later,
 20 | % so remember to change this if you are in a different setup
 21 | %
 22 | % cats: is the no. of max. categories in a particular variable; vector of
 23 | % length P.
 24 | %    
 25 | % nclass: is the upper bound on the total number of latent class in
 26 | % stick-breaking representation.
 27 | %    
 28 | % nrun: is the max. number of MCMC iterations.
 29 | %    
 30 | % nburn, nthins: are # burn-ins and # thining for posterior samples.
 31 | %    
 32 | % history: is a cell that contains the posterior samples misc. summaries
 33 | % from the Gibbs sampler. 
 34 | %    
 35 | % tend: is the time for Gibbs sampling.
 36 |     tic;    
 37 |     effSamp = ceil((nrun - nburn) / nthin);
 38 |     ncat    = length(cats);
 39 |     
 40 |     % -- initialize the \lambda tensor for the ndim variables
 41 |     [nsample, ndim] = size(dataMat); 
 42 |     lambda   = cell(1, ncat);  % joint distribution of categorical variables
 43 |     aLambda  = cell(1, ncat);  % parameters of Dirichlet for updating
 44 |                                % posterior of \lambda
 45 |     for ii = 1:ncat
 46 |         aLambda{ii}  = ones(cats(ii), nclass);        
 47 |         for jj = 1:cats(ii)        
 48 |             lambda{ii}(jj, :) = ones(1, nclass) * mean(dataMat(:, ii)  == jj); 
 49 |         end    
 50 |     end        
 51 |     
 52 |     % -- initialize parameters -- %
 53 |     alpha = 1;                                    % dp hyperparameters for stick breaking weights
 54 |     aal   = 1; bal = 1;                           % gamma hyperparameters for alpha
 55 |     aa     = cell(1, ncat);                        % dirichlet hyperparameter for \lambda_h^{(j)}
 56 |     for ii = 1:ncat
 57 |         aa{ii} = ones(cats(ii), 1);
 58 |     end
 59 |     % beta probs for computing stick breaking wts
 60 |     nus = betarnd(1, alpha, [nclass - 1, 1]); nu = zeros(nclass, 1); 
 61 |     % actual stick breaking wts for infinite mixture of atoms
 62 |     nu(1:(nclass - 1)) = nus .* cumprod([1; 1 - nus(1:(nclass-2))]); nu(nclass) = 1 - sum(nu(1:(nclass-1)));
 63 |     % Prob of latent variables belonging to classes; clases of obs; #obs assigned to a class
 64 |     zProbs = zeros(nsample, nclass); zClass = zeros(nsample, nclass); 
 65 |     
 66 |     dataBasedSummary = cell(1, ncat); % # obs for every category of a variable.    
 67 |     dataClassInd = cell(1, ncat); % indicator maxtrix of latent class memberships.
 68 |     for jj = 1:ncat        
 69 |         dataClassInd{jj} = zeros(nsample, cats(jj));
 70 |         for ii = 1:nsample
 71 |             if dataMat(ii, jj) > 0                    
 72 |                 dataClassInd{jj}(ii, :) = dataMat(ii, jj) == 1:cats(jj);
 73 |             end    
 74 |         end
 75 |         dataBasedSummary{jj} = sum(dataClassInd{jj});
 76 |     end
 77 | 
 78 |     history = cell(5, effSamp);
 79 |     hhh     = 0;
 80 | 
 81 |     % -- start Gibbs sampler -- %
 82 |     for iter = 1:nrun    
 83 |         % -- update zs -- %
 84 |         zProbs = zeros(nsample, nclass); 
 85 |         zClass = zeros(nsample, nclass);     
 86 |         for ii = 1:nsample
 87 |             for jj = 1:ncat
 88 |                 % notice the use of stochastic approx in form of ntotal/nsample
 89 |                 zProbs(ii, :) = zProbs(ii, :) + log(sum(lambda{jj} .*  repmat(dataClassInd{jj}(ii, :)', 1, nclass), 1));           
 90 |             end
 91 |             zProbs(ii, :) = exp(zProbs(ii, :)) .* nu';
 92 |             if sum(zProbs(ii, :)) == 0 % will happen when zProbs = -Infs
 93 |                 zProbs(ii, :) = ones(1, nclass) / nclass;
 94 |             else
 95 |                 zProbs(ii, :) = zProbs(ii, :) / sum(zProbs(ii, :)); 
 96 |             end
 97 |         end
 98 |         
 99 |         % a way to avoid nan's if things are generated using mnrnd
100 |         mat1 = [zeros(nsample, 1) cumsum(zProbs, 2)];
101 |         rr = unifrnd(0, 1, [nsample, 1]); z = zeros(nsample, 1);
102 |         for l = 1:nclass
103 |             ind = rr > mat1(:, l) & rr <= mat1(:, l + 1); z(ind) = l;
104 |         end
105 |         
106 |         for ii = 1:nsample
107 |             zClass(ii, z(ii)) = 1;        
108 |         end
109 |         classCts = sum(zClass); % # samples assigned to each class
110 |         
111 |         % -- update lambda -- %
112 |         for kk = 1:nclass        
113 |             for jj = 1:ncat                
114 |                 memInd = sum(dataClassInd{jj} .* repmat(zClass(:, kk), 1, cats(jj)));
115 |                 aLambda{jj}(:, kk) = aa{jj}' + memInd;             
116 |                 % generate lambda from a dirichlet dist. using gamma dist.
117 |                 lambda{jj}(:, kk) = gamrnd(aLambda{jj}(:, kk), 1);
118 |                 lambda{jj}(:, kk) = lambda{jj}(:, kk) / sum(lambda{jj}(:, kk)); 
119 |             end
120 |         end
121 |         
122 |         % -- update nu -- %
123 |         for kk = 1:(nclass - 1)
124 |             nus(kk) = betarnd(1 + classCts(kk), alpha + sum(sum(zClass(:, (kk + 1):nclass))));
125 |         end    
126 |         nu(1:(nclass - 1)) = nus .* cumprod([1; 1 - nus(1:(nclass-2))]); nu(nclass) = 1 - sum(nu(1:(nclass-1)));
127 |         
128 |         % -- update alpha-- %
129 |         nuss = 1 - nus(1:(nclass - 1)); nuss(nuss < 1e-6) = 1e-6; % avoids numerical errors
130 |         alpha = gamrnd(aal + nclass - 1, 1 / (bal - sum(log(nuss))));
131 |         
132 |         % -- store draws across iterations -- %   
133 |         if ((iter > nburn) & (mod(iter, 5) == 0)) 
134 |             % -- calc marginals -- %
135 |             margMat = eye(ncat, 2);
136 |             for ii = 1:ncat            
137 |                 margMat(ii, :) = sum(bsxfun(@times, lambda{ii}', nu)); 
138 |             end
139 |             % 
140 |             hhh = hhh + 1;
141 |             history{1, hhh} = margMat;
142 |             history{2, hhh} = nu;            
143 |             history{3, hhh} = alpha;
144 |             history{4, hhh} = lambda;            
145 |         end        
146 |         if mod(iter, 1000) == 0, disp(iter); end    
147 |     end    
148 |     tend = toc;
149 | end


--------------------------------------------------------------------------------
/parafac/code/parafac_dx_sub.m:
--------------------------------------------------------------------------------
  1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | % Probabilitistic Parafac Gibbs Sampler for Nonparametric Bayes Modeling of
  3 | % Multivariate Categorical Data (Tensor Factor Model)
  4 | % 
  5 | % For details of the sampler see Dunson and Xing (2009) at
  6 | % 
  7 | % http://www.tandfonline.com/doi/abs/10.1198/jasa.2009.tm08439#.Uxpc6Nww_0A
  8 | %
  9 | % Written by Sanvesh Srivastava on 03/03/14 based on a previous version
 10 | % by Jing Zhou of UNC, Biostatistics
 11 | % modified by SS 04/26/15
 12 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 13 | 
 14 | function [history, tend] = parafac_dx_sub(dataMat, cats, nclass, ntotal, nrun, nburn, nthin)
 15 | % parafac_dx_sub modifies Gibbs sampling scheme of Dunson and Xing (2009)
 16 | % using stochastic approx
 17 | %
 18 | % dataMat: is N x P matrix, n-th row contains the P categorical responses of n-th
 19 | % individual.
 20 | %
 21 | % cats: is the no. of max. categories in a particular variable; vector of
 22 | % length P.
 23 | %    
 24 | % nclass: is the upper bound on the total number of latent class in
 25 | % stick-breaking representation.
 26 | %        
 27 | % ntotal: is the total number of data points; used in the stochastic approx. of
 28 | % log-posterior for individual data subsets. 
 29 | %    
 30 | % nrun: is the max. number of MCMC iterations.
 31 | %    
 32 | % nburn, nthins: are # burn-ins and # thining for posterior samples.
 33 | %    
 34 | % history: is a cell that contains the posterior samples misc. summaries
 35 | % from the Gibbs sampler. 
 36 | % 
 37 | % tend: is the time for Gibbs sampling.
 38 | 
 39 |     effSamp = ceil((nrun - nburn) / nthin);
 40 |     ncat    = length(cats);
 41 |     
 42 |     % -- initialize the \lambda tensor for the ndim variables
 43 |     [nsample, ndim] = size(dataMat); 
 44 |     nrep = ntotal / nsample;
 45 |     lambda   = cell(1, ncat);  % joint distribution of categorical variables
 46 |     aLambda  = cell(1, ncat);  % parameters of Dirichlet for updating
 47 |                                % posterior of \lambda
 48 |     for ii = 1:ncat
 49 |         aLambda{ii}  = ones(cats(ii), nclass) / cats(ii);        
 50 |         for jj = 1:cats(ii)        
 51 |             lambda{ii}(jj, :) = ones(1, nclass) / cats(ii); %* mean(dataMat(:, ii)  == jj); 
 52 |         end    
 53 |     end        
 54 |     
 55 |     % -- initialize parameters -- %
 56 |     alpha = 1;                                    % dp hyperparameters for stick breaking weights
 57 |     aal   = 1; bal = 1;                           % gamma hyperparameters for alpha
 58 |     aa     = cell(1, ncat);                        % dirichlet hyperparameter for \lambda_h^{(j)}
 59 |     for ii = 1:ncat
 60 |         aa{ii} = ones(cats(ii), 1) / cats(ii);
 61 |     end
 62 |     % beta probs for computing stick breaking wts
 63 |     nus = betarnd(1, alpha, [nclass - 1, 1]); nu = zeros(nclass, 1); 
 64 |     % actual stick breaking wts for infinite mixture of atoms
 65 |     nu(1:(nclass - 1)) = nus .* cumprod([1; 1 - nus(1:(nclass-2))]); nu(nclass) = 1 - sum(nu(1:(nclass-1)));
 66 |     % Prob of latent variables belonging to classes; clases of obs; #obs assigned to a class
 67 |     zProbs = zeros(nsample, nclass); zClass = zeros(nsample, nclass); 
 68 |     
 69 |     dataBasedSummary = cell(1, ncat); % # obs for every category of a variable.    
 70 |     dataClassInd = cell(1, ncat); % indicator maxtrix of latent class memberships.
 71 |     for jj = 1:ncat        
 72 |         dataClassInd{jj} = zeros(nsample, cats(jj));
 73 |         for ii = 1:nsample
 74 |             if dataMat(ii, jj) > 0                    
 75 |                 dataClassInd{jj}(ii, :) = dataMat(ii, jj) == 1:cats(jj);
 76 |             end    
 77 |         end
 78 |         dataBasedSummary{jj} = sum(dataClassInd{jj});
 79 |     end
 80 | 
 81 |     history = cell(5, effSamp);
 82 |     hhh     = 0;
 83 |     
 84 |     % -- start Gibbs sampler -- %
 85 |     tic;    
 86 |     for iter = 1:nrun    
 87 |         % -- update zs -- %
 88 |         zProbs = zeros(nsample, nclass); 
 89 |         zClass = zeros(nsample, nclass);     
 90 |         for ii = 1:nsample
 91 |             for jj = 1:ncat
 92 |                 zProbs(ii, :) = zProbs(ii, :) + log(sum(lambda{jj} .*  repmat(dataClassInd{jj}(ii, :)', 1, nclass), 1));           
 93 |             end
 94 |             zProbs(ii, :) = exp(zProbs(ii, :)) .* (nu)';
 95 |             if sum(zProbs(ii, :)) == 0 % will happen when zProbs = -Infs
 96 |                 zProbs(ii, :) = ones(1, nclass) / nclass;
 97 |             else
 98 |                 zProbs(ii, :) = zProbs(ii, :) / sum(zProbs(ii, :)); 
 99 |             end
100 |         end
101 |         
102 |         % a way to avoid nan's if things are generated using mnrnd
103 |         mat1 = [zeros(nsample, 1) cumsum(zProbs, 2)];
104 |         rr = unifrnd(0, 1, [nsample, 1]); z = zeros(nsample, 1);
105 |         for l = 1:nclass
106 |             ind = rr > mat1(:, l) & rr <= mat1(:, l + 1); z(ind) = l;
107 |         end
108 |         
109 |         for ii = 1:nsample
110 |             zClass(ii, z(ii)) = 1;        
111 |         end
112 |         classCts = sum(zClass); % # samples assigned to each class
113 |         
114 |         % -- update lambda -- %
115 |         for kk = 1:nclass        
116 |             for jj = 1:ncat                
117 |                 memInd = sum(dataClassInd{jj} .* repmat(zClass(:, kk), 1, cats(jj)));
118 |                 aLambda{jj}(:, kk) = aa{jj}' + nrep * memInd;             
119 |                 % generate lambda from a dirichlet dist. using gamma dist.
120 |                 lambda{jj}(:, kk) = gamrnd(aLambda{jj}(:, kk), 1);
121 |                 lambda{jj}(:, kk) = lambda{jj}(:, kk) / sum(lambda{jj}(:, kk)); 
122 |             end
123 |         end
124 |         
125 |         % -- update nu -- %
126 |         for kk = 1:(nclass - 1)
127 |             nus(kk) = betarnd(1 + nrep * classCts(kk), alpha + nrep * sum(sum(zClass(:, (kk + 1):nclass))));
128 |         end    
129 |         nu(1:(nclass - 1)) = nus .* cumprod([1; 1 - nus(1:(nclass-2))]); nu(nclass) = 1 - sum(nu(1:(nclass-1)));
130 |         
131 |         % -- update alpha-- %
132 |         nuss = 1 - nus(1:(nclass - 1)); nuss(nuss < 1e-6) = 1e-6; % avoids numerical errors
133 |         alpha = gamrnd(aal + nclass - 1, 1 / (bal - sum(log(nuss))));
134 |         
135 |         % -- store draws across iterations -- %   
136 |         if ((iter > nburn) & (mod(iter, 5) == 0)) 
137 |             % -- calc marginals -- %
138 |             margMat = eye(ncat, 2);
139 |             for ii = 1:ncat            
140 |                 margMat(ii, :) = sum(bsxfun(@times, lambda{ii}', nu)); 
141 |             end
142 | 
143 |             hhh = hhh + 1;
144 |             history{1, hhh} = margMat;
145 |             history{2, hhh} = nu;            
146 |             history{3, hhh} = alpha;
147 |             history{4, hhh} = lambda;            
148 |         end
149 |         
150 |         if mod(iter, 1000) == 0, disp(iter); end    
151 |     end    
152 |     tend = toc;
153 | end
154 | 


--------------------------------------------------------------------------------
/parafac/code/recoverSolution.m:
--------------------------------------------------------------------------------
 1 | % FUNCTION: [tOptSol, aOptSol] = recoverSolution(xOptSol, nsubs, N, Ni)
 2 | % PURPOSE:  Recover the original solution.
 3 | %
 4 | function [tOptSol, aOptSol] = recoverSolution(xOptSol, nsubs, N, Ni)
 5 | 
 6 |     % Define this soft-thresholding operator to remove small elements.
 7 |     softThresOper = @(x, t)(sign(x).*max(abs(x) - t, 0));
 8 | 
 9 |     % Recover the solution.
10 |     aOptSol = xOptSol(1:N, 1);
11 |     xRest   = xOptSol(N+1:end);
12 |     for p = 1:nsubs
13 |         tOptSol{p} = reshape( xRest(1:N*Ni(p)), N, Ni(p))/Ni(p);
14 |         xRest      = xRest(N*Ni(p)+1:end);
15 |         tOptSol{p} = softThresOper(tOptSol{p}, 1e-10);
16 |     end
17 | 
18 | end
19 | % @END ...
20 | 


--------------------------------------------------------------------------------
/parafac/code/simulate_data.m:
--------------------------------------------------------------------------------
  1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | % 
  3 | % Replicates simulation study of Dunson and Xing (2009) 
  4 | % 
  5 | % http://www.tandfonline.com/doi/abs/10.1198/jasa.2009.tm08439#.Uxpc6Nww_0A
  6 | %
  7 | % based on a version by Jing Zhou of UNC, Biostatistics
  8 | % modified by SS 05/10/15
  9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 10 | 
 11 | clear;clc;
 12 | rng(12345)
 13 | 
 14 | % -- global parameters -- %
 15 | 
 16 | % N = #sample
 17 | % q = #dimensions
 18 | % d = #categories in each dim.
 19 | % rep = #replications
 20 | N = 100000; q = 20; d = 2; rep = 10;
 21 | 
 22 | F1 = 0.5 * ones(q, d);    % category probabilities
 23 | F2 = F1; % category probabilities of response in sub-populations
 24 | 
 25 | F1(2,:) = [0.20 0.80]; F1(4,:) = [0.25 0.75]; F1(12,:) = [0.80 0.20]; F1(14,:) = [0.75 0.25];
 26 | F2(2,:) = [0.80 0.20]; F2(4,:) = [0.75 0.25]; F2(12,:) = [0.20 0.80]; F2(14,:) = [0.25 0.75];   
 27 | 
 28 | Yn_big = zeros(N, q, rep);
 29 | Si = ones(N, 1);
 30 | 
 31 | locs1 = randsample(1:N/2, 0.2*(N/2)); locs2 = randsample(N/2+1:N, 0.2*(N/2));
 32 | locs = [locs1 locs2];
 33 | Si(locs) = 0;
 34 | 
 35 | for g = 1:rep
 36 |     for j = 1:q
 37 |         Yn_big(Si==1,j,g) = mnrnd(1,F1(j,:),sum(Si==1))*(1:d)';
 38 |         Yn_big(Si==0,j,g) = mnrnd(1,F2(j,:),sum(Si==0))*(1:d)';
 39 |     end
 40 | end
 41 | 
 42 | save('/Shared/ssrivastva/wasp/parafac/data/parafac_full_data.mat', 'Yn_big', 'Si', 'locs', 'locs1', 'locs2', 'F1', 'F2', 'N', 'q', 'd', 'rep');    
 43 | 
 44 | clear;clc;
 45 | rng(12345)
 46 | 
 47 | load('/Shared/ssrivastva/wasp/parafac/data/parafac_full_data.mat');
 48 | % -- global parameters -- %
 49 | 
 50 | nsub = 5;
 51 | 
 52 | si1 = find(Si == 1);
 53 | si0 = find(Si == 0);
 54 | 
 55 | for cc = 1:rep
 56 |     si1Part = randi(nsub, length(si1), 1);
 57 |     si0Part = randi(nsub, length(si0), 1);    
 58 |     for kk = 1:nsub
 59 |         n1s = si1(si1Part == kk);
 60 |         n0s = si0(si0Part == kk);        
 61 |         partMat = Yn_big([n1s; n0s], :, cc);
 62 |         disp(['cv: ' num2str(cc) ' dim: ' num2str(size(partMat))]);
 63 |         save(strcat('/Shared/ssrivastva/wasp/parafac/data/sub5/parafac_cv_', ...
 64 |                     num2str(cc), '_sub_', num2str(kk), '_k5.mat'), 'partMat', 'n1s', 'n0s', 'si1Part', 'si0Part');            
 65 |     end    
 66 | end
 67 | 
 68 | clear;clc;
 69 | rng(12345)
 70 | 
 71 | load('/Shared/ssrivastva/wasp/parafac/data/parafac_full_data.mat');
 72 | 
 73 | % -- global parameters -- %
 74 | 
 75 | nsub = 10;
 76 | 
 77 | si1 = find(Si == 1);
 78 | si0 = find(Si == 0);
 79 | 
 80 | for cc = 1:rep
 81 |     si1Part = randi(nsub, length(si1), 1);
 82 |     si0Part = randi(nsub, length(si0), 1);    
 83 |     for kk = 1:nsub
 84 |         n1s = si1(si1Part == kk);
 85 |         n0s = si0(si0Part == kk);        
 86 |         partMat = Yn_big([n1s; n0s], :, cc);
 87 |         disp(['cv: ' num2str(cc) ' dim: ' num2str(size(partMat))]);        
 88 |         save(strcat('/Shared/ssrivastva/wasp/parafac/data/sub10/parafac_cv_', ...
 89 |                     num2str(cc), '_sub_', num2str(kk), '_k10.mat'), 'partMat', 'n1s', 'n0s', 'si1Part', 'si0Part');            
 90 |     end    
 91 | end
 92 | 
 93 | % $$$ % --- test code --- %
 94 | % $$$ [history, tend] = parafac_dx_sub(Yn_big(1:100, :, 1), cats, 20, 100000, 2000, 1000, 5); 
 95 | % $$$ [fhistory, tend] = parafac_dx_com(Yn_big(500,:,1), cats, 20, 2000, 1000, 5);           
 96 | % $$$ 
 97 | % $$$ load full/res_2.mat
 98 | % $$$ 
 99 | % $$$ full=history;
100 | % $$$ fmargMat = zeros(1000, 2);
101 | % $$$ for ss = 1:1000
102 | % $$$     fmargMat(ss, :) = full{1, ss}(4, :);
103 | % $$$ end
104 | % $$$ 
105 | % $$$ load sub10/samp/res_cv_2_sub_2_k10_6_27.mat
106 | % $$$ 
107 | % $$$ part = history;
108 | % $$$ margMat = zeros(1000, 2);
109 | % $$$ for ss = 1:1000
110 | % $$$     margMat(ss, :) = part{1, ss}(4, :);
111 | % $$$ end
112 | % $$$ 
113 | % $$$ hold off;
114 | % $$$ plot(margMat(:, 1), 'Color', [1 0 1] );
115 | % $$$ hold on;
116 | % $$$ plot(fmargMat(:, 1));
117 | % $$$ % 
118 | % $$$ 
119 | % $$$ hold off;
120 | % $$$ hist(fmargMat(:, 1))
121 | % $$$ hold on;
122 | % $$$ hist(margMat(:, 1))
123 | % $$$ 
124 | % $$$ subplot(2, 1, 1)
125 | % $$$ 
126 | % $$$ subplot(2, 1, 2)
127 | % $$$ plot(margMat(:, 1), 'Color', [1 0 1])
128 | 


--------------------------------------------------------------------------------
/parafac/code/submit.R:
--------------------------------------------------------------------------------
 1 | cmdArgs <- commandArgs(trailingOnly = TRUE)
 2 | 
 3 | mtd <- as.numeric(cmdArgs[1])
 4 | id <- as.numeric(cmdArgs[2])
 5 | 
 6 | if (mtd == 2) {
 7 |     library(parallelMCMCcombine)
 8 | 
 9 |     cid <- id
10 |     subProb <- array(0.0, dim = c(20, 1000, 5))
11 | 
12 |     for (dd in 1:20) {
13 |         for (kk in 1:5) {
14 |             dat <- read.table(paste0("/Shared/ssrivastva/wasp/parafac/result/comp/sub5/res_cv_", cid, "_sub_", kk, "_dim_", dd, "_k5.csv"), sep = ",", header = FALSE)
15 |             subProb[dd, , kk] <- dat[ , 1]
16 |         }
17 |     }
18 | 
19 |     stime <- rep(NA, 20)
20 |     scotRes <- list()
21 |     for (dd in 1:20) {
22 |         strt1 <- proc.time()
23 |         scotRes[[dd]] <- as.numeric(t(consensusMCindep(subchain = subProb[dd, , , drop = FALSE])))
24 |         end1 <- proc.time()
25 |         stime[dd] <- end1[3] - strt1[3]
26 |     }
27 | 
28 |     xtime <- rep(NA, 20)
29 |     xingRes <- list()
30 |     for (dd in 1:20) {
31 |         strt1 <- proc.time()
32 |         xingRes[[dd]] <- as.numeric(t(semiparamDPE(subchain = subProb[dd, , , drop = FALSE])))
33 |         end1 <- proc.time()
34 |         xtime[dd] <- end1[3] - strt1[3]
35 |     }
36 | 
37 |     fname1 <- paste0("/Shared/ssrivastva/wasp/parafac/result/comp/sub5/marg/cons_cv_", cid, "_k5.rds")
38 |     fname2 <- paste0("/Shared/ssrivastva/wasp/parafac/result/comp/sub5/marg/xing_cv_", cid, "_k5.rds")
39 | 
40 |     saveRDS(list(marg = scotRes, time = stime), fname1)
41 |     saveRDS(list(marg = xingRes, time = xtime), fname2)
42 | } else if (mtd == 3) {
43 |     library(parallelMCMCcombine)
44 | 
45 |     cid <- id
46 |     subProb <- array(0.0, dim = c(20, 1000, 10))
47 | 
48 |     for (dd in 1:20) {
49 |         for (kk in 1:10) {
50 |             dat <- read.table(paste0("/Shared/ssrivastva/wasp/parafac/result/comp/sub10/res_cv_", cid, "_sub_", kk, "_dim_", dd, "_k10.csv"), sep = ",", header = FALSE)
51 |             subProb[dd, , kk] <- dat[ , 1]
52 |         }
53 |     }
54 | 
55 |     stime <- rep(NA, 20)
56 |     scotRes <- list()
57 |     for (dd in 1:20) {
58 |         strt1 <- proc.time()
59 |         scotRes[[dd]] <- as.numeric(t(consensusMCindep(subchain = subProb[dd, , , drop = FALSE])))
60 |         end1 <- proc.time()
61 |         stime[dd] <- end1[3] - strt1[3]
62 |     }
63 | 
64 |     xtime <- rep(NA, 20)
65 |     xingRes <- list()
66 |     for (dd in 1:20) {
67 |         strt1 <- proc.time()
68 |         xingRes[[dd]] <- as.numeric(t(semiparamDPE(subchain = subProb[dd, , , drop = FALSE])))
69 |         end1 <- proc.time()
70 |         xtime[dd] <- end1[3] - strt1[3]
71 |     }
72 | 
73 |     fname1 <- paste0("/Shared/ssrivastva/wasp/parafac/result/comp/sub10/marg/cons_cv_", cid, "_k10.rds")
74 |     fname2 <- paste0("/Shared/ssrivastva/wasp/parafac/result/comp/sub10/marg/xing_cv_", cid, "_k10.rds")
75 | 
76 |     saveRDS(list(marg = scotRes, time = stime), fname1)
77 |     saveRDS(list(marg = xingRes, time = xtime), fname2)
78 | }
79 | 


--------------------------------------------------------------------------------
/parafac/code/submit_parafac_full.m:
--------------------------------------------------------------------------------
 1 | function submit_parafac_full(dd, nclass, nrun, nburn, nthin)
 2 | 
 3 | load('/Shared/ssrivastva/wasp/parafac/data/parafac_full_data.mat');
 4 | 
 5 | train = Yn_big(:, :, dd); 
 6 | [nsample ndim] = size(train);
 7 | cats = repmat(2, 1, ndim);
 8 | [history, tend] = parafac_dx_com(train, cats, nclass, nrun, nburn, nthin);
 9 | 
10 | save(strcat('/Shared/ssrivastva/wasp/parafac/result/full/res_', num2str(dd), '.mat'), 'history', 'tend');    
11 | csvwrite(strcat('/Shared/ssrivastva/wasp/parafac/result/full/time_', num2str(dd), '.csv'), tend);
12 | 
13 | disp(['done with rep ' num2str(dd) ' ...' ]);    
14 |         
15 | quit
16 | 


--------------------------------------------------------------------------------
/parafac/code/submit_parafac_sub10.m:
--------------------------------------------------------------------------------
 1 | function submit_parafac_sub10(dd, nclass, nsub, nrun, nburn, nthin)
 2 | 
 3 | cvidx = repmat(1:10, nsub, 1);
 4 | cvidx = cvidx(:);
 5 | 
 6 | subidx = repmat(1:nsub, 1, 10);
 7 | subidx = subidx(:);
 8 | 
 9 | nrepf = cvidx(dd);
10 | nsubf = subidx(dd);
11 | 
12 | disp(['nrep: ', num2str(nrepf) ' nsub: ', num2str(nsubf)]);
13 | 
14 | load(strcat('/Shared/ssrivastva/wasp/parafac/data/sub10/parafac_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k10.mat'));
15 | 
16 | train = partMat;
17 | [nsample ndim] = size(train);
18 | cats = repmat(2, 1, ndim);
19 | 
20 | [history, tend] = parafac_dx_sub(train, cats, nclass, 100000, nrun, nburn, nthin);
21 | 
22 | save(strcat('/Shared/ssrivastva/wasp/parafac/result/sub10/samp/res_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k10.mat'), 'history', 'tend');    
23 | csvwrite(strcat('/Shared/ssrivastva/wasp/parafac/result/sub10/samp/time_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k10.csv'), tend);
24 | 
25 | disp(['done with cv ' num2str(nrepf) ' ...' ' subset ... ' num2str(nsubf) ' ... ']);    
26 |         
27 | quit
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/parafac/code/submit_parafac_sub10_comp.m:
--------------------------------------------------------------------------------
 1 | function submit_parafac_sub10_comp(dd, nclass, nsub, nrun, nburn, nthin)
 2 | 
 3 | cvidx = repmat(1:10, nsub, 1);
 4 | cvidx = cvidx(:);
 5 | 
 6 | subidx = repmat(1:nsub, 1, 10);
 7 | subidx = subidx(:);
 8 | 
 9 | nrepf = cvidx(dd);
10 | nsubf = subidx(dd);
11 | 
12 | disp(['nrep: ', num2str(nrepf) ' nsub: ', num2str(nsubf)]);
13 | 
14 | load(strcat('/Shared/ssrivastva/wasp/parafac/data/sub10/parafac_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k10.mat'));
15 | 
16 | train = partMat;
17 | [nsample ndim] = size(train);
18 | cats = repmat(2, 1, ndim);
19 | 
20 | [history, tend] = parafac_dx_com(train, cats, nclass, nrun, nburn, nthin);
21 |                    
22 | save(strcat('/Shared/ssrivastva/wasp/parafac/result/comp/sub10/samp/res_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k10.mat'), 'history', 'tend');    
23 | csvwrite(strcat('/Shared/ssrivastva/wasp/parafac/result/comp/sub10/samp/time_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k10.csv'), tend);
24 | 
25 | disp(['done with cv ' num2str(nrepf) ' ...' ' subset ... ' num2str(nsubf) ' ... ']);    
26 |         
27 | quit
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/parafac/code/submit_parafac_sub5.m:
--------------------------------------------------------------------------------
 1 | function submit_parafac_sub5(dd, nclass, nsub, nrun, nburn, nthin)
 2 | 
 3 | cvidx = repmat(1:10, nsub, 1);
 4 | cvidx = cvidx(:);
 5 | 
 6 | subidx = repmat(1:nsub, 1, 10);
 7 | subidx = subidx(:);
 8 | 
 9 | nrepf = cvidx(dd);
10 | nsubf = subidx(dd);
11 | 
12 | disp(['nrep: ', num2str(nrepf) ' nsub: ', num2str(nsubf)]);
13 | 
14 | load(strcat('/Shared/ssrivastva/wasp/parafac/data/sub5/parafac_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k5.mat'));
15 | 
16 | train = partMat;
17 | [nsample ndim] = size(train);
18 | cats = repmat(2, 1, ndim);
19 | 
20 | [history, tend] = parafac_dx_sub(train, cats, nclass, 100000, nrun, nburn, nthin);
21 | 
22 | save(strcat('/Shared/ssrivastva/wasp/parafac/result/sub5/samp/res_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k5.mat'), 'history', 'tend');    
23 | csvwrite(strcat('/Shared/ssrivastva/wasp/parafac/result/sub5/samp/time_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k5.csv'), tend);
24 | 
25 | disp(['done with cv ' num2str(nrepf) ' ...' ' subset ... ' num2str(nsubf) ' ... ']);    
26 |         
27 | quit
28 | 
29 | 
30 | 
31 |         
32 | 


--------------------------------------------------------------------------------
/parafac/code/submit_parafac_sub5_comp.m:
--------------------------------------------------------------------------------
 1 | function submit_parafac_sub5_comp(dd, nclass, nsub, nrun, nburn, nthin)
 2 | 
 3 | cvidx = repmat(1:10, nsub, 1);
 4 | cvidx = cvidx(:);
 5 | 
 6 | subidx = repmat(1:nsub, 1, 10);
 7 | subidx = subidx(:);
 8 | 
 9 | nrepf = cvidx(dd);
10 | nsubf = subidx(dd);
11 | 
12 | disp(['nrep: ', num2str(nrepf) ' nsub: ', num2str(nsubf)]);
13 | 
14 | load(strcat('/Shared/ssrivastva/wasp/parafac/data/sub5/parafac_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k5.mat'));
15 | 
16 | train = partMat;
17 | [nsample ndim] = size(train);
18 | cats = repmat(2, 1, ndim);
19 | 
20 | [history, tend] = parafac_dx_sub(train, cats, nclass, 100000, nrun, nburn, nthin);
21 | 
22 | save(strcat('/Shared/ssrivastva/wasp/parafac/result/comp/sub5/samp/res_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k5.mat'), 'history', 'tend');    
23 | csvwrite(strcat('/Shared/ssrivastva/wasp/parafac/result/comp/sub5/samp/time_cv_', num2str(nrepf), '_sub_', num2str(nsubf), '_k5.csv'), tend);
24 | 
25 | disp(['done with cv ' num2str(nrepf) ' ...' ' subset ... ' num2str(nsubf) ' ... ']);    
26 |         
27 | quit
28 | 
29 | 
30 | 
31 |         
32 | 


--------------------------------------------------------------------------------
/parafac/qsub/comp10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N para_c10
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/parafac/code
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-100
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load matlab/R2015b
15 | 
16 | matlab -nojvm -nodisplay -singleCompThread -r "submit_parafac_sub10_comp($SGE_TASK_ID, 20, 10, 10000, 5000, 5)"
17 | 
18 | 


--------------------------------------------------------------------------------
/parafac/qsub/comp5.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N para_c5
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/parafac/code
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-50
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load matlab/R2015b
15 | 
16 | matlab -nojvm -nodisplay -singleCompThread -r "submit_parafac_sub5_comp($SGE_TASK_ID, 20, 5, 10000, 5000, 5)"
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/parafac/qsub/comp_marg10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp_marg10
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/parafac/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-10
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 2 $SGE_TASK_ID" submit.R comp/marg10_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/parafac/qsub/comp_marg5.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N comp_marg5
 3 | #$ -l mf=16G
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/parafac/code/
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-10
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load R/3.3.0
15 | 
16 | R CMD BATCH --no-save --no-restore "--args 1 $SGE_TASK_ID" submit.R comp/marg5_$SGE_TASK_ID.rout
17 | 


--------------------------------------------------------------------------------
/parafac/qsub/full.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N full_parafac
 3 | #$ -l mf=64G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/parafac/code
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-10
10 | #$ -pe smp 4
11 | #$ -V
12 | #$ -e /Users/ssrivastva/err/
13 | #$ -o /Users/ssrivastva/out/
14 | 
15 | module load matlab/R2015b
16 | 
17 | matlab -nojvm -nodisplay -singleCompThread -r "submit_parafac_full($SGE_TASK_ID, 20, 10000, 5000, 5)"
18 | 
19 | 


--------------------------------------------------------------------------------
/parafac/qsub/wasp10.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N wasp_para10
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/parafac/code
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-100
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load matlab/R2015b
15 | 
16 | matlab -nojvm -nodisplay -singleCompThread -r "submit_parafac_sub10($SGE_TASK_ID, 20, 10, 10000, 5000, 5)"
17 | 
18 | 


--------------------------------------------------------------------------------
/parafac/qsub/wasp5.q:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -N wasp_para5
 3 | #$ -l mf=16G        
 4 | #$ -l h_rt=320:00:00
 5 | #$ -l s_rt=320:00:00
 6 | #$ -wd /Users/ssrivastva/wasp/parafac/code
 7 | #$ -m a
 8 | #$ -M sanvesh-srivastava@uiowa.edu
 9 | #$ -t 1-50
10 | #$ -V
11 | #$ -e /Users/ssrivastva/err/
12 | #$ -o /Users/ssrivastva/out/
13 | 
14 | module load matlab/R2015b
15 | 
16 | matlab -nojvm -nodisplay -singleCompThread -r "submit_parafac_sub5($SGE_TASK_ID, 20, 5, 10000, 5000, 5)"
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/parafac/result/img/para_time.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/parafac/result/img/para_time.pdf


--------------------------------------------------------------------------------
/parafac/result/img/supp_marg_para.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/blayes/WASP/a3f415cfe4276776a5116459d51491620029c357/parafac/result/img/supp_marg_para.pdf


--------------------------------------------------------------------------------