├── documentation.pdf
├── .gitignore
├── code
    ├── Makefile
    ├── wrapper.R
    ├── helper_functions.R
    └── dmbvs.c
├── README.md
├── example_analysis_script.R
└── LICENSE


/documentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duncanwadsworth/dmbvs/HEAD/documentation.pdf


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | *.RData
 4 | *.Rproj
 5 | .DS_Store
 6 | *.txt
 7 | *.x
 8 | *.pdf
 9 | *.html
10 | !documentation.pdf
11 | 


--------------------------------------------------------------------------------
/code/Makefile:
--------------------------------------------------------------------------------
 1 | ## Makefile for dmbvs
 2 | 
 3 | ## file and path names
 4 | ## LIBS and INCS may need to be changed
 5 | INPUT = dmbvs
 6 | LIBS = /usr/local/lib
 7 | INCS = /usr/local/include
 8 | 
 9 | compile:
10 | 	#clang -I$(INCS) -L$(LIBS) -Wall $(INPUT).c -o $(INPUT).x -lgsl -lgslcblas -lm
11 | 	#/local/gcc-5.2.0/bin/gcc -I/local/gsl-2.1/include -L/local/gsl-2.1/lib -Wall $(INPUT).c -o $(INPUT).x -lgsl -lgslcblas -lm
12 | 	gcc -I$(INCS) -L$(LIBS) -Wall $(INPUT).c -o $(INPUT).x -lgsl -lgslcblas -lm
13 | 
14 | clean:
15 | 	rm -f $(INPUT).x
16 | 	if test -a $(INPUT).o ; then rm -f $(INPUT).o ; fi
17 | 
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | dmbvs
 2 | ===
 3 | 
 4 | > **Important**\
 5 | > This repo is no longer maintained and its contents have been superseded by the library here: [https://github.com/mkoslovsky/MicroBVS](https://github.com/mkoslovsky/MicroBVS). Anything you can do with `dmbvs` can be done better with `MicroBVS`.
 6 | 
 7 | ### Description
 8 | 
 9 | Bayesian variable selection for Dirichlet-Multinomial regression. For $n$ 
10 | samples, let $Y$ be an $n \times q$ matrix with $q$ taxa. Let $X$ be an 
11 | $n \times p$ matrix with $p$ covariates. By applying spike-and-slab priors to 
12 | the Dirichlet-Multinomial regression coefficients, we obtain concise 
13 | taxa/covariate associations. The methodology is further described in the 
14 | manuscript:
15 | 
16 | > Wadsworth, W. D., Argiento, R., Guindani, M., Galloway-Pena, J., Samuel, S. A., & 
17 | > Vannucci, M. (2016). An Integrative Bayesian Dirichlet-Multinomial Regression 
18 | > Model for the Analysis of Taxonomic Abundances in Microbiome data.
19 | 
20 | The code has been updated on 10/02/17 with an option to perform a stochastic 
21 | search algorithm for Bayesian variable selection instead of the Gibbs sampler. 
22 | The stochastic search variable selection approach provides gains in computational 
23 | speed.
24 | 
25 | ### Contents
26 | 
27 | The repository contains 
28 | 
29 | 1. C code implementing an MCMC sampler for Dirichlet-Multinomial Bayesian 
30 | Variable Selection (dmbvs) using spike-and-slab priors, 
31 | 
32 | 2. R code to wrap and run the sampler from within R, 
33 | 
34 | 3. a function for simulating data, 
35 | 
36 | 4. and a "start-to-finish" script (`example_analysis_script.R`) demonstrating 
37 | usage of the code. This script gives reasonable default settings for the 
38 | hyperparameters and MCMC parameters for the example simulated data. Settings may 
39 | change for other data.
40 | 
41 | ### Usage
42 | 
43 | * The R code requires the `dirmult` and `MASS` packages. Please ensure those are 
44 | installed first. 
45 | 
46 | * The C code relies on the [GNU Scientific Library (GSL)](https://www.gnu.org/software/gsl/). 
47 | GSL must be installed and modifications may need to be made to the 'library' 
48 | and 'include' paths in the Makefile.
49 | 
50 | * The main C file (`dmbvs.c`), as well as the Makefile to be used for compilation 
51 | of the code, can be found under the directory `code/`.
52 | 
53 | * On Linux and Mac the C code may be compiled with the Makefile from R using:
54 | 
55 | ```{r}
56 | # must be in package's root directory
57 | setwd("dmbvs")
58 | system("cd code; make")
59 | ```
60 | 
61 | * If compilation has been successful there will be an executable called `dmbvs.x` 
62 | the `code/` directory. Data may be simulated and the MCMC code run using:
63 | 
64 | ```{r}
65 | source(file.path("code", "wrapper.R"))
66 | source(file.path("code", "helper_functions.R"))
67 | simdata = simulate_dirichlet_multinomial_regression(n_obs = 100, n_vars = 50,
68 |                                                     n_taxa = 50, n_relevant_vars = 5,
69 |                                                     n_relevant_taxa = 5)
70 | results = dmbvs(XX = simdata$XX[,-1], YY = simdata$YY, 
71 |                 intercept_variance = 10, slab_variance = 10, 
72 |                 bb_alpha = 0.02, bb_beta = 1.98, GG = 1100L, thin = 10L, burn = 100L,
73 |                 exec = file.path(".", "code", "dmbvs.x"), output_location = ".")
74 | ```
75 | 
76 | * Tested on:
77 |     * Mac with R version 3.2.2 and GSL version 2.1 with the Apple LLVM version 
78 |     7.0.2 compiler
79 |     * Red Hat Linux 6.6 with R version 3.1.2 and GSL version 2.1 and gcc 
80 |     version 5.2.0
81 | 
82 | 


--------------------------------------------------------------------------------
/example_analysis_script.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ###################################################################
 3 | # Example Analysis of simulated data using a Dirichlet-Multinomial
 4 | # Bayesian variable selection model
 5 | ###################################################################
 6 | 
 7 | # -- README -------------------------------------------------------------------
 8 | # This script gives an example of how to run the code on a simulated
 9 | # dataset. It assumes several things:
10 | # 1. The Gnu Scientific Library is installed
11 | # 2. The paths in the Makefile are correct
12 | # 3. The data is available
13 | # -----------------------------------------------------------------------------
14 | 
15 | # -- simulate data ------------------------------------------------------------
16 | # this simulates a dataset identical in dimension to the ones used in the
17 | # simulation sections of the manuscript
18 | source(file.path("code", "helper_functions.R"))
19 | simdata = simulate_dirichlet_multinomial_regression(n_obs = 100, n_vars = 50,
20 |                                                     n_taxa = 50, n_relevant_vars = 5,
21 |                                                     n_relevant_taxa = 5)
22 | Ysim = simdata$YY
23 | Xsim = simdata$XX
24 | 
25 | # -- run analysis -------------------------------------------------------------
26 | # preliminaries
27 | source(file.path("code", "wrapper.R"))
28 | system("cd code; make")
29 | executable_location = "code/dmbvs.x"
30 | save_prefix = "simulation"
31 | 
32 | # prepare and check data
33 | YY = as.matrix(Ysim)
34 | XX = scale(as.matrix(Xsim[,-1]), center = T, scale = T)
35 | colnames(YY) = paste0("taxa", 1:ncol(YY))
36 | colnames(XX) = paste0("covariate", 1:ncol(XX))
37 | dim(YY)
38 | dim(XX)
39 | 
40 | # MCMC and hyperparameters
41 | # these values are reasonable for the data simulated here but should be changed
42 | # depending on the characteristics of other datasets
43 | GG = 301L; thin = 2L; burn = 101L; # fast, for testing
44 | #GG = 11001L; thin = 10L; burn = 1001L; # good defaults, in this case
45 | # reasonable default parameters, see further discussion in the manuscript
46 | bb_alpha = 0.02; bb_beta = 2 - bb_alpha
47 | proposal_alpha = 0.5; proposal_beta = 0.5
48 | slab_variance = 10; intercept_variance = 10
49 | 
50 | # description
51 | cat("Beta-Binomial mean:", bb_alpha/(bb_alpha + bb_beta), "\n")
52 | cat("Number of kept iterations:", (GG - burn)/thin, "\n")
53 | 
54 | # run the algorithm
55 | results = dmbvs(XX = XX, YY = YY, intercept_variance = intercept_variance,
56 |                 slab_variance = slab_variance, bb_alpha = bb_alpha,
57 |                 bb_beta = bb_beta, GG = GG, thin = thin, burn = burn,
58 |                 init_beta = "warmstart", init_alpha = "warmstart",
59 |                 proposal_alpha = proposal_alpha, proposal_beta = proposal_beta,
60 |                 exec = executable_location, selection_type = "ss",
61 |                 output_location = ".")
62 | params = data.frame(GG, burn, thin, intercept_variance,
63 |                     slab_variance, bb_alpha, bb_beta,
64 |                     proposal_alpha, proposal_beta)
65 | save(results, params, XX, YY,
66 |      file = paste0("results-", save_prefix, "-", Sys.Date(), ".RData"))
67 | 
68 | # quick check results
69 | source(file.path("code", "helper_functions.R"))
70 | mppi = colMeans((results$beta != 0) + 0)
71 | (blfdrate = bfdr(mppi, threshold = 0.1)$threshold)
72 | MPPI = data.frame(expand.grid(covariates = colnames(results$hyperparameters$inputdata$XX),
73 |                               taxa = colnames(results$hyperparameters$inputdata$YY)),
74 |                   mppi = mppi,
75 |                   beta = colMeans(results$beta),
76 |                   truebeta = c(t(simdata$betas[,-1])))
77 | subset(MPPI, mppi > blfdrate | truebeta != 0)
78 | plot(mppi, type = "h", ylab = "MPPI",
79 |      xlab = "beta index", main = "Manhattan plot")
80 | 
81 | # active variable traceplot
82 | plot.ts(rowSums((results$beta != 0) + 0), main = "Active variables traceplot",
83 |         ylab = "number of betas in the model", xlab = "iteration")
84 | 
85 | # some of the selected beta traceplots
86 | selected = which(mppi > 0.5)
87 | fortraces = selected[sample(length(selected), 10)]
88 | plot.ts(results$beta[,fortraces], main = "Some selected beta traceplots",
89 |         xlab = "iteration", ylab = "")
90 | 
91 | # visualize the associations
92 | association_plot(MPPI[,-5], graph_layout = "bipartite", main = "Sample Results")
93 | 
94 | 


--------------------------------------------------------------------------------
/code/wrapper.R:
--------------------------------------------------------------------------------
  1 | #' an R wrapper to C code for spike-and-slab Dirichlet--Multinomial
  2 | #' Bayesian variable selection
  3 | #'
  4 | #' @param XX covariate matrix (without intercept)
  5 | #' @param YY count matrix
  6 | #' @param intercept_variance a scalar for the prior variance on the intercept
  7 | #' of the log-linear predictors
  8 | #' @param slab_variance a scalar for the prior variance on the slab of the
  9 | #' spike-and-slab
 10 | #' @param bb_alpha a scalar for the alpha hyperparameter of the Beta-Bernoulli
 11 | #' spike inclusion prior
 12 | #' @param bb_beta a scalar for the beta hyperparameter of the Beta-Bernoulli
 13 | #' spike inclusion prior
 14 | #' @param GG the total number of MCMC iterations
 15 | #' @param thin the MCMC thinning interval
 16 | #' @param burn the number of MCMC iterations out of GG that will be discarded
 17 | #' @param proposal_alpha initial value, either a scalar or a vector of length
 18 | #' ncol(YY), if a scalar, that value is used for all proposals on alpha
 19 | #' @param proposal_beta initial value, either a scalar or a matrix with
 20 | #' ncol(XX) columns and ncol(YY) rows, if a scalar, that value is used for all
 21 | #' proposals on beta
 22 | #' @param init_alpha either a scalar or a vector of size ncol(YY)
 23 | #' @param init_beta either a scalar or a matrix with ncol(YY) rows and
 24 | #' ncol(XX) columns, inclusion initialization uses non-zero elements of
 25 | #' init_beta
 26 | #' @param exec the path to the C executable
 27 | #' @param output_location if NULL, output goes to an the directory output/
 28 | #' created in the working directory
 29 | #' @param r_seed an integer seed to pass to GSL's random number generator
 30 | #' @param selection_type either "ss" (Stochastic Search) or "Gibbs", determines
 31 | #' the MCMC mechanism for variable selection
 32 | #'
 33 | #' @return alpha: a matrix with iterations in the rows and the alphas in the
 34 | #' columns
 35 | #' @return alpha_accept: the Metropolis-Hastings acceptance ratio for the alphas
 36 | #' @return beta: a matrix with iterations in the rows and the per-iteration beta
 37 | #' matrix -- flattened by rows -- in the columns
 38 | #' @return beta_accept: the Metropolis-Hastings acceptance ratio for the betas
 39 | #' @return hyperparameters: a list containing the hyperparameters, the MCMC
 40 | #' parameters, and the data from the original function call
 41 | #'
 42 | #' @export
 43 | dmbvs = function(XX, YY, intercept_variance, slab_variance, bb_alpha, bb_beta,
 44 |                  GG, thin, burn, proposal_alpha = 0.5, proposal_beta = 0.5,
 45 |                  init_alpha = 0, init_beta = 0, exec = file.path(".","dmbvs.x"),
 46 |                  output_location = NULL, r_seed = NULL, selection_type = "Gibbs"){
 47 | 
 48 |   # data dimensions
 49 |   n_cats = ncol(YY)
 50 |   n_obs = nrow(XX)
 51 |   n_vars = ncol(XX)
 52 | 
 53 |   # argument checking
 54 |   #if(!is.integer(YY)){warning("YY is not integer typed: is it a count matrix?")}
 55 |   if(bb_alpha > bb_beta | bb_alpha == bb_beta){warning("Beta-Bernoulli prior is symmetric or left skewed: right skewed enforces sparsity")}
 56 |   if(all(XX[,1] == 1)){stop("XX appears to have an intercept column")}
 57 |   if(n_obs != nrow(YY)){stop("dimensions of XX and YY do not match")}
 58 |   if(burn > GG){stop("burnin greater than the number of iterations")}
 59 |   if(any(intercept_variance < 0, slab_variance < 0, bb_alpha < 0, bb_beta < 0)){stop("check hyperparameter values: at least one is negative")}
 60 |   if((length(proposal_beta) != 1) & (nrow(as.matrix(proposal_beta)) != n_cats) & (ncol(as.matrix(proposal_beta)) != n_vars)){stop("bad dimension for proposal_beta")}
 61 |   if(!(selection_type %in% c("ss", "Gibbs"))){stop("unrecognized variable selection type: choose ss or Gibbs")}
 62 | 
 63 |   # set output location
 64 |   # for extremely long runs it's necessary to leave output in a subdirectory of the
 65 |   # working directory since temp directories seems to get cleaned out occasionally
 66 |   if(is.null(output_location)){
 67 |     # for parallel simulations need unique output directories
 68 |     dir.create(paste0(getwd(), "/output-pid-", Sys.getpid()))
 69 |     out_dir = paste0(getwd(), "/output-pid-", Sys.getpid())
 70 |   }else{
 71 |     dir.create(paste0(output_location, "/output-pid-", Sys.getpid()))
 72 |     out_dir = paste0(output_location, "/output-pid-", Sys.getpid())
 73 |   }
 74 | 
 75 |   # text files for data
 76 |   utils::write.table(as.matrix(XX), file.path(out_dir, "covariates.txt"),
 77 |                      row.names = F, col.names = F)
 78 |   utils::write.table(as.matrix(YY), file.path(out_dir, "count_matrix.txt"),
 79 |                      row.names = F, col.names = F)
 80 | 
 81 |   # text files for proposals and initialization
 82 |   # intercept initialization
 83 |   if(is.null(init_alpha)){
 84 |     utils::write.table(rep(0, times = n_cats), file.path(out_dir, "init_alpha.txt"),
 85 |                        row.names = F, col.names = F)
 86 |   }else if(length(init_alpha) == 1 & is.numeric(init_alpha)){
 87 |     utils::write.table(rep(init_alpha, times = n_cats), file.path(out_dir, "init_alpha.txt"),
 88 |                        row.names = F, col.names = F)
 89 |   }else if(length(init_alpha) == n_cats & is.numeric(init_alpha)){
 90 |     utils::write.table(init_alpha, file.path(out_dir, "init_alpha.txt"),
 91 |                        row.names = F, col.names = F)
 92 |   }else if(init_alpha == "warmstart"){
 93 |     utils::write.table(scale(log(colSums(YY))), file.path(out_dir, "init_alpha.txt"),
 94 |                        row.names = F, col.names = F)
 95 |   }else{
 96 |     stop("init_alpha not recognized")
 97 |   }
 98 |   # regression parameter initialization
 99 |   if(is.null(init_beta)){
100 |     # empty initialization
101 |     utils::write.table(rep(0, times = n_cats * n_vars),
102 |                        file.path(out_dir, "init_beta.txt"), row.names = F,
103 |                        col.names = F)
104 |   }else if(length(init_beta) == 1 & is.numeric(init_beta)){
105 |     # scalar initialization
106 |     utils::write.table(rep(init_beta, times = n_cats * n_vars),
107 |                        file.path(out_dir, "init_beta.txt"), row.names = F,
108 |                        col.names = F)
109 |   }else if(!is.null(dim(init_beta)) & all(dim(init_beta) == c(n_cats, n_vars))){
110 |     # matrix initialization
111 |     utils::write.table(c(t(as.matrix(init_beta))),
112 |                        file.path(out_dir, "init_beta.txt"), row.names = F,
113 |                        col.names = F)
114 |   }else if(is.character(init_beta) | init_beta == "warmstart"){
115 |     # false discovery rate on correlation test initialization
116 |     cormat = matrix(0, n_cats, n_vars)
117 |     pmat = matrix(0, n_cats, n_vars)
118 |     yy = YY/rowSums(YY) # compositionalize
119 |     for(rr in 1:n_cats){
120 |       for(cc in 1:n_vars){
121 |         pmat[rr, cc] = stats::cor.test(XX[, cc], yy[, rr], method = "spearman",
122 |                                        exact = F)$p.value
123 |         cormat[rr, cc] = stats::cor(XX[, cc], yy[, rr], method = "spearman")
124 |       }
125 |     }
126 |     # defaults to 0.2 false discovery rate
127 |     pm = matrix((stats::p.adjust(c(pmat), method = "fdr") <= 0.2) + 0, n_cats,
128 |                 n_vars)
129 |     betmat = cormat * pm
130 |     utils::write.table(c(t(betmat)), file.path(out_dir, "init_beta.txt"),
131 |                        row.names = F, col.names = F)
132 |   }else{
133 |     stop("init_beta not recognized, if using a matrix, ensure proper dimensions")
134 |   }
135 | 
136 |   # intercept proposal
137 |   if(length(proposal_alpha) == 1){
138 |     utils::write.table(rep(proposal_alpha, times = n_cats),
139 |                        file.path(out_dir, "proposal_alpha.txt"), row.names = F,
140 |                        col.names = F)
141 |   }else if(length(proposal_alpha) == n_cats){
142 |     utils::write.table(proposal_alpha, file.path(out_dir, "proposal_alpha.txt"),
143 |                        row.names = F, col.names = F)
144 |   }else{
145 |     stop("problem with proposal_alpha")
146 |   }
147 |   # regression proposal
148 |   if(length(proposal_beta) == 1){
149 |     utils::write.table(rep(proposal_beta, times = n_cats * n_vars),
150 |                        file.path(out_dir, "proposal_beta.txt"), row.names = F,
151 |                        col.names = F)
152 |   }else if((nrow(proposal_beta) == n_cats) & (ncol(proposal_beta) == n_vars)){
153 |     utils::write.table(c(t(proposal_beta)), file.path(out_dir, "proposal_beta.txt"),
154 |                        row.names = F, col.names = F)
155 |   }else{
156 |     stop("problem with proposal_beta")
157 |   }
158 | 
159 |   # pass the external seed to GSL which will have its own corresponding seed (see the .out file)
160 |   if(is.null(r_seed)){
161 |     a_random_seed = sample(1e6, 1)
162 |   }else if(is.integer(r_seed)){
163 |     a_random_seed = r_seed
164 |   }else{
165 |     stop("problem with external seed to pass to GSL")
166 |   }
167 |   # run compiled code
168 |   if(selection_type == "ss"){
169 |     command = paste(exec, GG, thin, burn, intercept_variance, slab_variance,
170 |                     bb_alpha, bb_beta, n_cats, n_obs, n_vars, a_random_seed,
171 |                     out_dir, 0, ">", paste0("dmbvs-pid-", Sys.getpid(), ".out"))
172 |   } else if(selection_type == "Gibbs"){
173 |     command = paste(exec, GG, thin, burn, intercept_variance, slab_variance,
174 |                     bb_alpha, bb_beta, n_cats, n_obs, n_vars, a_random_seed,
175 |                     out_dir, 1, ">", paste0("dmbvs-pid-", Sys.getpid(), ".out"))
176 |   }
177 |   system(command)
178 |   # read output
179 |   aa = utils::read.table(file.path(out_dir, "alpha.out"))
180 |   aaa = utils::read.table(file.path(out_dir, "alpha_acceptance.out"))
181 |   # bb is read as a n_vars * n_cats long list with each element have n_iters
182 |   # so when it's unlist()ed you get the first variable for n_iters, then the
183 |   # second, etc
184 |   bb = utils::read.table(file.path(out_dir, "beta.out"))
185 |   bba = utils::read.table(file.path(out_dir, "beta_acceptance.out"))
186 | 
187 |   # variables in the columns, iterations in the rows
188 |   return(list(alpha = t(matrix(unlist(aa), nrow = n_cats, byrow = T)),
189 |               alpha_accept = aaa[,1],
190 |               # after transpose beta has iterations in the rows
191 |               beta = t(matrix(unlist(bb), nrow = (n_cats * n_vars), byrow = T)),
192 |               beta_accept = bba[,1],
193 |               hyperparameters = list(mcmc = data.frame(GG = GG, thin = thin, burn = burn,
194 |                                                        proposal_alpha = proposal_alpha,
195 |                                                        proposal_beta = proposal_beta,
196 |                                                        random_seed = a_random_seed),
197 |                                      priors = data.frame(intercept_variance = intercept_variance,
198 |                                                          slab_variance = slab_variance,
199 |                                                          bb_alpha = bb_alpha, bb_beta = bb_beta),
200 |                                      inputdata = list(XX = XX, YY = YY))))
201 | }
202 | 
203 | 


--------------------------------------------------------------------------------
/code/helper_functions.R:
--------------------------------------------------------------------------------
  1 | #' calculate the Bayesian False Discovery Rate
  2 | #'
  3 | #' @param mppi_vector A vector of marginal posterior probabilities of inclusion.
  4 | #' @param threshold The expected false discovery rate threshold
  5 | #'
  6 | #' @return selected: A boolean vector of selected (= T) and rejected (= F)
  7 | #' variables
  8 | #' @return threshold: The BFDR threshold
  9 | #'
 10 | #' @references
 11 | #' Newton, M. A., Noueiry, A., Sarkar, D., & Ahlquist, P. (2004). Detecting
 12 | #' differential gene expression with a semiparametric hierarchical mixture
 13 | #' method. Biostatistics, 5(2), 155-76. doi:10.1093/biostatistics/5.2.155
 14 | #'
 15 | #' @export
 16 | 
 17 | bfdr = function(mppi_vector, threshold = 0.1){
 18 |   # arg checking
 19 |   if(any(mppi_vector > 1 | mppi_vector < 0)){
 20 |     stop("Bad input: mppi_vector should contain probabilities")
 21 |   }
 22 |   if(threshold > 1 | threshold < 0){
 23 |     stop("Bad input: threshold should be a probability")
 24 |   }
 25 |   # sorting the ppi's in decresing order
 26 |   sorted_mppi_vector = sort(mppi_vector, decreasing = TRUE)
 27 |   # computing the fdr 
 28 |   fdr = cumsum((1 - sorted_mppi_vector))/seq(1:length(mppi_vector))
 29 |   # determine index of the largest fdr less than threshold
 30 |   thecut.index = max(which(fdr < threshold))
 31 |   ppi_threshold = sorted_mppi_vector[thecut.index]
 32 |   selected = mppi_vector > ppi_threshold
 33 |   return(list(selected = selected, threshold = ppi_threshold))
 34 | }
 35 | 
 36 | #' a bipartite graph showing the association between X and Y
 37 | #'
 38 | #' @note requires the igraph package
 39 | #'
 40 | #' @param MPPI a data.frame with n_vars x n_taxa rows and these four columns:
 41 | #'  1) covariate: the names of the columns of X
 42 | #'  2) taxa: the names of the columns of Y
 43 | #'  3) mppi: the marginal posterior probability of inclusion for each taxa
 44 | #'          by covariate parameter
 45 | #'  4) beta: a point estimate of for each taxa by covariate parameter
 46 | #' @param mppi_threshold the threshold for inclusion in the plot
 47 | #' @param inc_legend boolean to include the legend
 48 | #' @param lwdx a scalar multiplier for growing or shrinking the widths of edges
 49 | #' @param graph_layout either "circular" for a round layout or "bipartite" for
 50 | #' a side-by-side layout
 51 | #' @param lab_dist a scalar argument for the distance between node centers and
 52 | #' the node labels
 53 | #' @param ... passthrough arguments
 54 | #'
 55 | #' @return a plot
 56 | #'
 57 | #' @export
 58 | association_plot = function(MPPI, mppi_threshold = 0.5, inc_legend = F,
 59 |                             lwdx = 5, graph_layout = "circular", lab_dist = 2,
 60 |                             ...){
 61 |   if(any(!(colnames(MPPI) %in% c("covariates", "taxa", "mppi", "beta")))){
 62 |     stop("please ensure the column names of MPPI are correct\n*** they must be: covariates, taxa, mppi, beta")
 63 |   }
 64 |   if(!require(igraph)){
 65 |     stop("please ensure the igraph package is installed")
 66 |   }
 67 |   # first munge the data into a format easily read by graph_from_data_frame()
 68 |   mm = subset(MPPI, mppi > mppi_threshold)
 69 |   if(nrow(mm) == 0){ # when there are no relevant associations
 70 |     graphics::plot(1, type = "n", axes = F, xlab = "", ylab = "",
 71 |                    xlim = c(-1, 1), ylim = c(-1, 1), ...)
 72 |     graphics::text(0, 0, "No Associations\nAbove MPPI\nThreshold")
 73 |   }else{ # when there are relevant associations
 74 |     mm$esign = round((sign(mm$beta) + 2)/3) + 1
 75 |     mm$emag = abs(mm$beta)
 76 |     mppi = mm[order(mm$covariates, mm$taxa),]
 77 |     # readin data.frame
 78 |     hh = igraph::graph_from_data_frame(mppi, directed = F)
 79 |     # modify graph characteristics
 80 |     igraph::E(hh)$weight = igraph::E(hh)$emag*lwdx
 81 |     #igraph::E(hh)$lty = igraph::E(hh)$esign
 82 |     igraph::E(hh)$lty = 1
 83 |     igraph::E(hh)$color = ifelse(igraph::E(hh)$esign == 2, 1, 2)
 84 |     igraph::V(hh)$type = c(rep(T, times = length(unique(mppi$covariates))),
 85 |                            rep(F, times = length(unique(mppi$taxa))))
 86 |     # layout as a bipartite graph
 87 |     if(graph_layout == "bipartite"){
 88 |       la = layout.bipartite(hh, hgap = 20)
 89 |       graphics::plot(hh, layout = la[,c(2,1)],
 90 |                      edge.width = igraph::E(hh)$weight,
 91 |                      edge.lty = igraph::E(hh)$lty,
 92 |                      edge.color = c("red", "blue")[igraph::E(hh)$color],
 93 |                      vertex.color = c("green", "yellow")[igraph::V(hh)$type + 1],
 94 |                      vertex.shape = c("circle", "square")[igraph::V(hh)$type + 1],
 95 |                      vertex.frame.color = "black", vertex.label.cex = 1.2,
 96 |                      vertex.label.color = "black", vertex.label.family = "sans",
 97 |                      vertex.label.font = 2,
 98 |                      vertex.label.degree = c(2*pi, pi)[igraph::V(hh)$type + 1],
 99 |                      vertex.label.dist = lab_dist, ...)
100 |     }
101 |     # layout as a circular graph
102 |     if(graph_layout == "circular"){
103 |       la = igraph::layout.circle(hh)
104 |       # from http://stackoverflow.com/questions/23209802/placing-vertex-label-outside-a-circular-layout-in-igraph
105 |       radian.rescale = function(x, start = 0, direction = 1) {
106 |         c.rotate = function(x) (x + start) %% (2 * pi) * direction
107 |         c.rotate(scales::rescale(x, c(0, 2 * pi), range(x)))
108 |       }
109 |       lab.locs = radian.rescale(x = 1:nrow(la), start = 0, direction = -1)
110 |       graphics::plot(hh, layout = la,
111 |                      edge.width = igraph::E(hh)$weight,
112 |                      edge.lty = igraph::E(hh)$lty,
113 |                      edge.color = c("red", "blue")[igraph::E(hh)$color],
114 |                      vertex.color = c("green", "yellow")[igraph::V(hh)$type + 1],
115 |                      vertex.shape = c("circle", "square")[igraph::V(hh)$type + 1],
116 |                      vertex.frame.color = "black", vertex.label.cex = 1.2,
117 |                      vertex.label.color = "black", vertex.label.family = "sans",
118 |                      vertex.label.font = 2, vertex.label.dist = lab_dist,
119 |                      vertex.label.degree = lab.locs)
120 |     }
121 |     # a black and white color scheme without vertex shapes
122 |     # graphics::plot(hh, layout = la,
123 |     #                edge.width = igraph::E(hh)$weight,
124 |     #                edge.color = "black",
125 |     #                vertex.color = c("grey", "white")[V(hh)$type + 1],
126 |     #                vertex.label.cex = 1.3,
127 |     #                vertex.label.color = "black", vertex.label.family = "sans",
128 |     #                vertex.label.font = c(2, 3)[igraph::V(hh)$type + 1],
129 |     #                vertex.size = 10, vertex.label.dist = 0.5,
130 |     #                vertex.label.degree = lab.locs)
131 |     if(inc_legend){
132 |       #legend("topright", legend = c("positive", "negative"), lty = c(1, 2), col = "grey50", lwd = 3)
133 |       graphics::legend("topright", legend = c("positive", "negative"), lwd = c(3, 3), col = c("blue","red"))
134 |     }
135 |   }
136 | }
137 | 
138 | #' abundance table visualization
139 | #'
140 | #' @note requires the ggplot2 package
141 | #'
142 | #' @param count_matrix a matrix of integers
143 | #' @param title an optional title
144 | #'
145 | #' @return a plot
146 | #'
147 | #' @export
148 | abundance_plot = function(count_matrix, title = ""){
149 |   if(!require(ggplot2)){
150 |     stop("please ensure the ggplot2 package is installed")
151 |   }
152 |   # sort matrix by column means - make pretty
153 |   ss = count_matrix[, order(colMeans(count_matrix), decreasing = T)]
154 |   # coordinates
155 |   xyz = cbind(expand.grid(1:dim(ss)[1], 1:dim(ss)[2]), as.vector(ss), as.vector(ss) > 0)
156 |   names(xyz) = c("Sample.ID","Phylotype","Counts","Presence")
157 |   print(ggplot2::ggplot(xyz, ggplot2::aes(y = Sample.ID, x = Phylotype, fill = log(Counts))) +
158 |           ggplot2::geom_raster() + ggplot2::theme_bw() + ggplot2::ggtitle(title))
159 | }
160 | 
161 | 
162 | #' simulate data from a Dirichlet-Multinomial regression model
163 | #'
164 | #' @note Requires the dirmult and MASS packages
165 | #'
166 | #' @param n_obs: the number of samples
167 | #' @param n_vars: number of covariates excluding the intercept
168 | #' @param n_taxa: number of species
169 | #' @param n_relevant_vars: number of relevant nutrients
170 | #' @param n_relevant_taxa: number of relevant species
171 | #' @param beta_min: minimum absolute value of the regression parameters
172 | #' @param beta_max: maximum absolute value of the regression parameters
173 | #' @param signoise: scalar multiplier on the regression parameters
174 | #' @param n_reads_min: lower bound on uniform distribution for number of reads in each sample
175 | #' @param n_reads_max: upper bound on uniform distribution for number of reads in each sample
176 | #' @param theta0: the dispersion parameter
177 | #' @param rho: the correlation between covariates
178 | #'
179 | #' @return XX: (design matrix) with intercept: n_obs * (n_vars + 1)
180 | #' @return YY: (count matrix) rows: n_obs samples, columns: n_taxa species
181 | #' @return alphas: simulated intercept vector
182 | #' @return betas: simulated coefficient matrix n_taxa * (n_vars + 1)
183 | #' @return n_reads_min, n_read_max: row sum parameters
184 | #' @return theta0, phi, rho, signoise: simulation inputs
185 | #'
186 | #' @export
187 | simulate_dirichlet_multinomial_regression = function(n_obs = 100,
188 |                                                      n_vars = 100,
189 |                                                      n_taxa = 40,
190 |                                                      n_relevant_vars = 4,
191 |                                                      n_relevant_taxa = 4,
192 |                                                      beta_min = 0.5,
193 |                                                      beta_max = 1.0,
194 |                                                      signoise = 1.0,
195 |                                                      n_reads_min = 1000,
196 |                                                      n_reads_max = 2000,
197 |                                                      theta0 = 0.01,
198 |                                                      rho = 0.4){
199 | 
200 |   # check for required packages
201 |   if(!require(dirmult)){
202 |     stop("dirmult package required")
203 |   }
204 |   if(!require(MASS)){
205 |     stop("MASS package required")
206 |   }
207 |   # covariance matrix for predictors
208 |   Sigma = matrix(1, n_vars, n_vars)
209 |   Sigma = rho^abs(row(Sigma) - col(Sigma))
210 |   # include the intercept
211 |   XX = cbind(rep(1, n_obs),
212 |              scale(MASS::mvrnorm(n = n_obs, mu = rep(0, n_vars), Sigma = Sigma)))
213 |   # empties
214 |   YY = matrix(0, n_obs, n_taxa)
215 |   betas = matrix(0, n_taxa, n_vars)
216 |   phi = matrix(0, n_obs, n_taxa)
217 |   # parameters with signs alternating
218 |   st = 0
219 |   low_side = beta_min
220 |   high_side = beta_max
221 |   if(n_relevant_taxa != 1){
222 |     # warning if the lengths don't match
223 |     coef = suppressWarnings(seq(low_side, high_side, len = n_relevant_taxa) * c(1, -1))
224 |   }else{
225 |     coef = (low_side + high_side) / 2
226 |   }
227 |   coef_g = rep(1.0, len = n_relevant_vars)
228 |   for(ii in 1:n_relevant_vars){
229 |     # overlap species
230 |     betas[(st:(st + n_relevant_taxa - 1)) %% n_taxa + 1, 3 * ii - 2] = coef_g[ii] * sample(coef)[((ii - 1):(ii + n_relevant_taxa - 2)) %% n_relevant_taxa + 1]
231 |     st = st + 1
232 |   }
233 |   # -2.3 and 2.3 so that the intercept varies over three orders of magnitude
234 |   intercept = runif(n_taxa, -2.3, 2.3)
235 |   Beta = cbind(intercept, signoise * betas)
236 |   # row totals
237 |   ct0 = sample(n_reads_min:n_reads_max, n_obs, rep = T)
238 |   for(ii in 1:n_obs){
239 |     thisrow = as.vector(exp(Beta %*% XX[ii, ]))
240 |     phi[ii, ] = thisrow/sum(thisrow)
241 |     YY[ii, ] = dirmult::simPop(J = 1, n = ct0[ii], pi = phi[ii, ], theta = theta0)$data[1, ]
242 |   }
243 | 
244 |   return(list(XX = XX, YY = YY, alphas = intercept, betas = Beta,
245 |               n_reads_min = n_reads_min, n_reads_max = n_reads_max,
246 |               theta0 = theta0, phi = phi, rho = rho, signoise = signoise))
247 | 
248 | }
249 | 
250 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 


--------------------------------------------------------------------------------
/code/dmbvs.c:
--------------------------------------------------------------------------------
   1 | // *********************************************************************** //
   2 | // MCMC code implementing integrative Bayesian variable selection for the  //
   3 | // Dirichlet-Multinomial regression model with application to the analysis //
   4 | // of taxonomic abundances in microbiome data                              //
   5 | // *********************************************************************** //
   6 | //
   7 | // Copyright (C) 2016, Raffaele Argiento and W. Duncan Wadsworth with contributions
   8 | // from Michele Guindani
   9 | //
  10 | // This program is free software: you can redistribute it and/or modify
  11 | // it under the terms of the GNU General Public License as published by
  12 | // the Free Software Foundation, either version 3 of the License, or
  13 | // (at your option) any later version.
  14 | //
  15 | // This program is distributed in the hope that it will be useful,
  16 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 | // GNU General Public License for more details.
  19 | //
  20 | // You should have received a copy of the GNU General Public License
  21 | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
  22 | //
  23 | // *********************************************************************** //
  24 | //
  25 | // This version has been updated on October 2, 2017 with an implementation
  26 | // of a stochastic search algorithm for variable selection in addition to the
  27 | // Gibbs sampler
  28 | //
  29 | // standard includes
  30 | #include <time.h>
  31 | #include <string.h>
  32 | #include <stdio.h>
  33 | #include <stdlib.h>
  34 | #include <math.h>
  35 | // GSL includes
  36 | #include <gsl/gsl_randist.h>
  37 | #include <gsl/gsl_sf_gamma.h> // has both the lngamma and lnbeta functions
  38 | #include <gsl/gsl_rng.h>
  39 | #include <gsl/gsl_errno.h>
  40 | 
  41 | // define the RNG globally
  42 | gsl_rng *rando;
  43 | 
  44 | // data dimensions required in the function prototypes below
  45 | static int n_cats;
  46 | static int n_obs;
  47 | static int n_vars;
  48 | 
  49 | // ----- Function declarations ----- //
  50 | // adaptive MH helper functions
  51 | double online_mean(int iteration, double last_mean, double curr_obs);
  52 | double online_var(int iteration, double last_mean, double last_var,
  53 |                   double curr_mean, double curr_obs);
  54 | double adap_prop(double curr_var);
  55 | // log Beta-Bernoulli spike-and-slab prior evaluation
  56 | double lprior_bbsas(double betajk, int sjk, double sig_bejk, double mu_bejk,
  57 |                     double aa_hp, double bb_hp);
  58 | // calculates the linear predictor for each category of the Dirichlet-Multinomial
  59 | double calculate_gamma(double **XX, double *alpha, double *beta, int jj,
  60 |                        int ii, int Log);
  61 | // MH update for the regression parameters, Gibbs version
  62 | void update_beta_jj(double **XX, double **JJ, double **loggamma,
  63 |                     double *beta_temp, int *inclusion_indicator,
  64 |                     double *prop_per_beta, double mu_be[n_cats][n_vars],
  65 |                     double sig_be[n_cats][n_vars], double aa_hp, double bb_hp,
  66 |                     int jj);
  67 | // MH update for the regression parameters, stochastic search version
  68 | void update_beta_jj_ss(double **XX, double **JJ, double **loggamma,
  69 |                        double *beta_temp, int *inclusion_indicator,
  70 |                        double *prop_per_beta, double mu_be[n_cats][n_vars],
  71 |                        double sig_be[n_cats][n_vars], double aa_hp, double bb_hp,
  72 |                        int jj);
  73 | // MH update for the intercept parameters
  74 | void update_alpha_jj(double **JJ, double **loggamma, double *alpha,
  75 |                      double *prop_per_alpha, int *accepted_alpha_flag,
  76 |                      double mu_al[n_cats], double sig_al[n_cats], int jj);
  77 | // for the Savitsky et al. inclusion proposal step
  78 | void between_models_jj(double **XX, double **JJ, double **loggamma,
  79 |                        double *beta_temp, int *accepted_beta_flag,
  80 |                        int *inclusion_indicator, double mu_be[n_cats][n_vars],
  81 |                        double sig_be[n_cats][n_vars], double aa_hp, double bb_hp,
  82 |                        int jj);
  83 | // Principal function using Gibbs variable selection
  84 | void dmbvs_gibbs(double **XX, int **YY, double *alpha, double *beta,
  85 |                  double mu_al[n_cats], double sig_al[n_cats],
  86 |                  double mu_be[n_cats][n_vars], double sig_be[n_cats][n_vars],
  87 |                  double aa_hp, double bb_hp, double *prop_per_alpha,
  88 |                  double *prop_per_beta, const int GG, const int burn,
  89 |                  const int thin, int Log, char *temp_dir);
  90 | // Principal function using stochastic search variable selection
  91 | void dmbvs_ss(double **XX, int **YY, double *alpha, double *beta,
  92 |               double mu_al[n_cats], double sig_al[n_cats],
  93 |               double mu_be[n_cats][n_vars], double sig_be[n_cats][n_vars],
  94 |               double aa_hp, double bb_hp, double *prop_per_alpha,
  95 |               double *prop_per_beta, const int GG, const int burn,
  96 |               const int thin, int Log, char *temp_dir);
  97 | 
  98 | // -------------------------------- //
  99 | int main(int argc, char *argv[]){
 100 | 
 101 |   // command line argument parsing
 102 |   if(argc != 14){
 103 |     fprintf(stderr, "Usage: %s <Iters> <Thin> <Burn> <Intercept Variance> <Slab Variance>\n <Beta-Bernoulli Alpha> <Beta-Bernoulli Beta> <N Categories>\n <N Observations> <N Variables> <External Seed> <Temp Directory> <Selection Type>\n", argv[0]);
 104 |     exit(EXIT_FAILURE);
 105 |   }
 106 |   const int GG = atoi(argv[1]);
 107 |   const int thin = atoi(argv[2]);
 108 |   const int burn = atoi(argv[3]);
 109 |   double alpha_variance = atof(argv[4]);
 110 |   double slab_variance = atof(argv[5]);
 111 |   double aa_hp = atof(argv[6]);
 112 |   double bb_hp = atof(argv[7]);
 113 |   n_cats = atoi(argv[8]);
 114 |   n_obs = atoi(argv[9]);
 115 |   n_vars = atoi(argv[10]);
 116 |   long int external_seed = atoi(argv[11]);
 117 |   char *temp_dir = argv[12];
 118 |   int selection_type = atoi(argv[13]);
 119 | 
 120 |   // hyperparameters and Metropolis-Hastings proposal parameters
 121 |   const double mean_alpha = 0.0;
 122 |   const double mean_slab = 0.0;
 123 | 
 124 |   printf("***********************************************************\n");
 125 |   printf("********************* Starting dmbvs **********************\n");
 126 |   // outfile info
 127 |   printf("***********************************************************\n");
 128 |   printf("*************************** MCMC **************************\n");
 129 |   printf("Total iterations = %i, Thinning = %i, Burnin = %i\n", GG, thin, burn);
 130 |   // random number generator initialization
 131 |   const gsl_rng_type * Tipo;
 132 |   Tipo = gsl_rng_default;
 133 |   rando = gsl_rng_alloc(Tipo);
 134 |   gsl_rng_set(rando, external_seed);
 135 |   printf("External seed = %lu, GSL seed = %lu, Generator type: %s\n",
 136 |          external_seed, gsl_rng_get(rando), gsl_rng_name(rando));
 137 |   printf("Text output deposited in directory:\n %s\n", temp_dir);
 138 |   printf("***********************************************************\n");
 139 |   printf("*************************** Data **************************\n");
 140 |   printf("# of categories = %i, # of observations = %i, # of covariates = %i\n",
 141 |          n_cats, n_obs, n_vars);
 142 |   printf("***********************************************************\n");
 143 |   printf("********************* Hyperparameters *********************\n");
 144 |   printf("Slab variance = %0.1f, Intercept variance = %0.1f\n", slab_variance,
 145 |          alpha_variance);
 146 |   printf("Beta-Bernoulli alpha = %0.2f, Beta-Bernoulli beta = %0.2f\n", aa_hp,
 147 |          bb_hp);
 148 |   printf("***********************************************************\n");
 149 |   printf("***********************************************************\n");
 150 | 
 151 |   ///////////////////////////////////////////////////
 152 |   // Indices (shifted right one)
 153 |   // ii: index of the observations in 1, ..., n_obs
 154 |   // jj: index of the categories in 1, ..., n_cats
 155 |   // kk: index of the covariates in 1, ..., n_vars
 156 |   // hh: index of the beta vector in 1, ..., n_vars * n_cats, is collapsed
 157 |   //     row-wise so that the 2x2 identity matrix becomes (1, 0, 0, 1)
 158 |   int ii, jj, kk, hh;
 159 | 
 160 |   // Larger objects such as data and initializations are read from files
 161 | 
 162 |   // single file pointer for all files
 163 |   FILE *fin;
 164 | 
 165 |   // reading in YY: the sample-by-category count matrix
 166 |   // all other files are read in in the same way
 167 |   char td_YY[200];
 168 |   strcpy(td_YY, temp_dir);
 169 |   strcat(td_YY, "/count_matrix.txt");
 170 |   fin = fopen(td_YY, "r");
 171 |   // dynamic allocation for YY for an array of pointers
 172 |   int **YY;
 173 |   YY = (int **) malloc(n_obs * sizeof(int *));
 174 |   for(ii = 0 ; ii < n_obs ; ii++){
 175 |     // allocate each row of the matrix YY
 176 |     YY[ii] = (int *) malloc(n_cats * sizeof(int));
 177 |     // fill in each row of the matrix YY
 178 |     for(jj = 0 ; jj < n_cats ; jj++){
 179 |       fscanf(fin, "%u", &YY[ii][jj]);
 180 |     }
 181 |   }
 182 | 
 183 |   // reading in XX: the sample-by-covariate matrix
 184 |   char td_XX[200];
 185 |   strcpy(td_XX, temp_dir);
 186 |   strcat(td_XX, "/covariates.txt");
 187 |   fin = fopen(td_XX, "r");
 188 |   double **XX;
 189 |   XX = (double **) malloc(n_obs * sizeof(double *));
 190 |   for(ii = 0 ; ii < n_obs ; ii++){
 191 |     XX[ii] = (double *) malloc(n_vars * sizeof(double));
 192 |     for(jj = 0 ; jj < n_vars ; jj++){
 193 |       fscanf(fin, "%lf", &XX[ii][jj]);
 194 |     }
 195 |   }
 196 | 
 197 |   ///////// Initializations from text files
 198 | 
 199 |   // initialization for beta
 200 |   char td_ib[200];
 201 |   strcpy(td_ib, temp_dir);
 202 |   strcat(td_ib, "/init_beta.txt");
 203 |   fin = fopen(td_ib, "r");
 204 |   double *beta;
 205 |   beta = malloc((n_vars * n_cats) * sizeof(double));
 206 |   for(hh = 0 ; hh < (n_vars * n_cats) ; hh++){
 207 |     fscanf(fin, "%lf", &beta[hh]);
 208 |   }
 209 | 
 210 |   // initialization for alpha
 211 |   char td_ia[200];
 212 |   strcpy(td_ia, temp_dir);
 213 |   strcat(td_ia, "/init_alpha.txt");
 214 |   fin = fopen(td_ia, "r");
 215 |   double *alpha;
 216 |   alpha = malloc(n_cats * sizeof(double));
 217 |   for(jj = 0 ; jj < n_cats ; jj++){
 218 |     fscanf(fin, "%lf", &alpha[jj]);
 219 |   }
 220 | 
 221 |   // for the beta proposal variance
 222 |   char td_pb[200];
 223 |   strcpy(td_pb, temp_dir);
 224 |   strcat(td_pb, "/proposal_beta.txt");
 225 |   fin = fopen(td_pb, "r");
 226 |   double *prop_per_beta;
 227 |   prop_per_beta = malloc((n_vars * n_cats) * sizeof(double));
 228 |   for(hh = 0 ; hh < (n_vars * n_cats) ; hh++){
 229 |     fscanf(fin, "%lf", &prop_per_beta[hh]);
 230 |   }
 231 | 
 232 |   // for the alpha proposal variance
 233 |   char td_pa[200];
 234 |   strcpy(td_pa, temp_dir);
 235 |   strcat(td_pa, "/proposal_alpha.txt");
 236 |   fin = fopen(td_pa, "r");
 237 |   double *prop_per_alpha;
 238 |   prop_per_alpha = malloc(n_cats * sizeof(double));
 239 |   for(jj = 0 ; jj < n_cats ; jj++){
 240 |     fscanf(fin, "%lf", &prop_per_alpha[jj]);
 241 |   }
 242 | 
 243 |   // mean and standard deviation of the independent normal priors on alpha and beta
 244 |   double mu_al[n_cats], sig_al[n_cats];
 245 |   double mu_be[n_cats][n_vars], sig_be[n_cats][n_vars];
 246 |   for(jj = 0 ; jj < n_cats ; jj++){
 247 |     mu_al[jj] = mean_alpha;
 248 |     sig_al[jj] = alpha_variance;
 249 |     for(kk = 0 ; kk < n_vars ; kk++){
 250 |       mu_be[jj][kk] = mean_slab;
 251 |       sig_be[jj][kk] = slab_variance;
 252 |     }
 253 |   }
 254 | 
 255 |   // a flag for functions that can calculate on the log scale
 256 |   int Log = 1;
 257 | 
 258 |   ///////// Initialization finished: commence the MCMC
 259 | 
 260 |   if(selection_type == 0){
 261 |     dmbvs_ss(XX, YY, alpha, beta, mu_al, sig_al, mu_be, sig_be, aa_hp, bb_hp,
 262 |              prop_per_alpha, prop_per_beta, GG, burn, thin, Log, temp_dir);
 263 |   }
 264 |   else if(selection_type == 1){
 265 |     dmbvs_gibbs(XX, YY, alpha, beta, mu_al, sig_al, mu_be, sig_be, aa_hp, bb_hp,
 266 |                 prop_per_alpha, prop_per_beta, GG, burn, thin, Log, temp_dir);
 267 |   }
 268 |   else{
 269 |     printf("Unrecognized variable selection argument\n");
 270 |   }
 271 | 
 272 | 
 273 |   // cleanup (kind of pedantic)
 274 |   fclose(fin);
 275 |   free(YY);
 276 |   free(XX);
 277 |   free(beta);
 278 |   free(alpha);
 279 |   free(prop_per_alpha);
 280 |   free(prop_per_beta);
 281 |   gsl_rng_free(rando);
 282 | 
 283 |   return(0);
 284 | 
 285 | } // end of main()
 286 | 
 287 | //////////////////////////////////////
 288 | // ----- Function definitions ----- //
 289 | //////////////////////////////////////
 290 | 
 291 | // Gibbs Sampler function
 292 | void dmbvs_ss(double **XX, int **YY, double *alpha, double *beta,
 293 |               double mu_al[n_cats], double sig_al[n_cats],
 294 |               double mu_be[n_cats][n_vars], double sig_be[n_cats][n_vars],
 295 |               double aa_hp, double bb_hp, double *prop_per_alpha,
 296 |               double *prop_per_beta, const int GG, const int burn,
 297 |               const int thin, int Log, char *temp_dir){
 298 | 
 299 |   // loop indices
 300 |   int hh, ii, jj, kk, gg;
 301 | 
 302 |   // for timing iterations
 303 |   time_t mytime;
 304 | 
 305 |   // adaptive proposal values
 306 |   double last_mean;
 307 |   double last_var;
 308 | 
 309 |   ///////// Initialize variables that don't require input or can be obtained
 310 |   ///////// from inputs
 311 | 
 312 |   // for MH acceptance/rejection ratios, initialized with zeros
 313 |   int *accepted_beta_flag;
 314 |   accepted_beta_flag = malloc((n_vars * n_cats) * sizeof(int));
 315 |   int *accepted_beta;
 316 |   accepted_beta = malloc((n_vars * n_cats) * sizeof(int));
 317 |   for(hh = 0 ; hh < (n_vars * n_cats) ; hh++){
 318 |     accepted_beta_flag[hh] = 0;
 319 |     accepted_beta[hh] = 0;
 320 |   }
 321 |   int *accepted_alpha_flag;
 322 |   accepted_alpha_flag = malloc(n_cats * sizeof(int));
 323 |   int *accepted_alpha;
 324 |   accepted_alpha = malloc(n_cats * sizeof(int));
 325 |   for(hh = 0 ; hh < n_cats ; hh++){
 326 |     accepted_alpha_flag[hh] = 0;
 327 |     accepted_alpha[hh] = 0;
 328 |   }
 329 |   // for adaptive MH need to keep track of target density mean and variance
 330 |   double *curr_mean;
 331 |   curr_mean = malloc((n_vars * n_cats) * sizeof(double));
 332 |   double *curr_var;
 333 |   curr_var = malloc((n_vars * n_cats) * sizeof(double));
 334 |   for(hh = 0 ; hh < (n_vars * n_cats) ; hh++){
 335 |     curr_mean[hh] = 0.0;
 336 |     curr_var[hh] = 0.5;
 337 |   }
 338 | 
 339 |   // variable inclusion indicator
 340 |   int *inclusion_indicator;
 341 |   inclusion_indicator = malloc((n_vars * n_cats) * sizeof(int));
 342 |   for(hh = 0 ; hh < (n_vars * n_cats) ; hh++){
 343 |     if(beta[hh] == 0){
 344 |       inclusion_indicator[hh] = 0;
 345 |     }else{
 346 |       inclusion_indicator[hh] = 1;
 347 |     }
 348 |   }
 349 | 
 350 |   // row sums of YY
 351 |   int *Ypiu;
 352 |   Ypiu = malloc(n_obs * sizeof(int));
 353 |   for(ii = 0 ; ii < n_obs ; ii++){
 354 |     for(jj = 0 ; jj < n_cats ; jj++){
 355 |       Ypiu[ii] = Ypiu[ii] + YY[ii][jj];
 356 |     }
 357 |   }
 358 | 
 359 |   // the beta_temp vector is an n_vars long vector used for updating each of the
 360 |   // jj-th rows in Beta corresponding the jj-th category in YY
 361 |   double *beta_temp;
 362 |   beta_temp = malloc(n_vars * sizeof(double));
 363 | 
 364 |   // the linear predictor matrix, represented as a vector, is in the log scale
 365 |   double **loggamma;
 366 |   loggamma = malloc(n_obs * sizeof(double *));
 367 |   for(ii = 0 ; ii < n_obs ; ii++){
 368 |     loggamma[ii] = malloc(n_cats * sizeof(double));
 369 |     for(jj = 0 ; jj < n_cats ; jj++){
 370 |       loggamma[ii][jj] = calculate_gamma(XX, alpha, beta, jj, ii, Log);
 371 |     }
 372 |   }
 373 | 
 374 |   // initialize latent variable: JJ ~ gamma()
 375 |   double **JJ;
 376 |   JJ = malloc(n_obs * sizeof(double *));
 377 |   // TT is the vector of normalization constants
 378 |   double *TT;
 379 |   TT = malloc(n_obs * sizeof(double *));
 380 |   for(ii = 0 ; ii < n_obs ; ii++){
 381 |     JJ[ii] = malloc(n_cats * sizeof(double));
 382 |     TT[ii] = 0.0;
 383 |     for(jj = 0 ; jj < n_cats ; jj++){
 384 |       JJ[ii][jj] = (double)YY[ii][jj];
 385 |       // a very small number
 386 |       if(JJ[ii][jj] < pow(10.0, -100.0)){
 387 |         JJ[ii][jj] = pow(10.0, -100.0);
 388 |       }
 389 |       TT[ii] = TT[ii] + JJ[ii][jj];
 390 |     }
 391 |   }
 392 | 
 393 |   // initialize the other clever latent variable: uu
 394 |   // note that this uses the data to initialize
 395 |   double *uu;
 396 |   uu = malloc(n_obs * sizeof(double));
 397 |   for(ii = 0 ; ii < n_obs ; ii++){
 398 |     uu[ii] = gsl_ran_gamma(rando, Ypiu[ii], 1.0/TT[ii]);
 399 |   }
 400 | 
 401 |   // output file pointers
 402 |   FILE *fout_alpha, *fout_beta, *fout_alpha_acceptance, *fout_beta_acceptance, *fout_beta_proposal;
 403 | 
 404 |   // this is stupid, I'm sure there's a better way
 405 |   char tf_alpha[200];
 406 |   strcpy(tf_alpha, temp_dir);
 407 |   strcat(tf_alpha, "/alpha.out");
 408 |   fout_alpha = fopen(tf_alpha, "w");
 409 |   char tf_alpha_acceptance[200];
 410 |   strcpy(tf_alpha_acceptance, temp_dir);
 411 |   strcat(tf_alpha_acceptance, "/alpha_acceptance.out");
 412 |   fout_alpha_acceptance = fopen(tf_alpha_acceptance, "w");
 413 |   char tf_beta[200];
 414 |   strcpy(tf_beta, temp_dir);
 415 |   strcat(tf_beta, "/beta.out");
 416 |   fout_beta = fopen(tf_beta, "w");
 417 |   char tf_beta_acceptance[200];
 418 |   strcpy(tf_beta_acceptance, temp_dir);
 419 |   strcat(tf_beta_acceptance, "/beta_acceptance.out");
 420 |   fout_beta_acceptance = fopen(tf_beta_acceptance, "w");
 421 |   char tf_beta_proposal[200];
 422 |   strcpy(tf_beta_proposal, temp_dir);
 423 |   strcat(tf_beta_proposal, "/beta_proposal.out");
 424 |   fout_beta_proposal = fopen(tf_beta_proposal, "w");
 425 | 
 426 |   // the magic starts here
 427 |   printf("Starting the sampler\n");
 428 | 
 429 |   for(gg = 0 ; gg < GG ; gg++){
 430 | 
 431 |     // timing info to the printed output
 432 |     mytime = time(NULL);
 433 |     if(gg % 1000 == 0){
 434 |       printf("Starting the %uth iteration out of %u at %s", gg, GG, ctime(&mytime));
 435 |     }
 436 | 
 437 |     // first a round of the between-model step for every covariate within every taxa
 438 |     jj = floor(gsl_rng_uniform(rando) * n_cats);//for(jj = 0 ; jj < n_cats ; jj++){
 439 | 
 440 |       // fill in beta_temp
 441 |       for(kk = 0 ; kk < n_vars ; kk++){
 442 |         hh = kk + jj * n_vars;
 443 |         beta_temp[kk] = beta[hh];
 444 |       }
 445 | 
 446 |       between_models_jj(XX, JJ, loggamma, beta_temp, accepted_beta_flag,
 447 |                         inclusion_indicator, mu_be, sig_be, aa_hp, bb_hp, jj);
 448 | 
 449 |       // update beta with beta_temp
 450 |       for(kk = 0 ; kk < n_vars ; kk++){
 451 |         hh = kk + jj * n_vars;
 452 |         beta[hh] = beta_temp[kk];
 453 |       }
 454 |     //}
 455 | 
 456 |     // now an accelerating within-model step
 457 |     for(jj = 0 ; jj < n_cats ; jj++){
 458 | 
 459 |       // fill in beta_temp and update the proposal variance
 460 |       for(kk = 0 ; kk < n_vars ; kk++){
 461 |         hh = kk + jj * n_vars;
 462 |         beta_temp[kk] = beta[hh];
 463 |         // wait until each parameter has had a few iterations
 464 |         if(gg > 2 * n_vars * n_cats){
 465 |           // calculate the online variance
 466 |           last_mean = curr_mean[hh];
 467 |           last_var = curr_var[hh];
 468 |           curr_mean[hh] = online_mean(gg, last_mean, beta[hh]);
 469 |           curr_var[hh] = online_var(gg, last_mean, last_var, curr_mean[hh], beta[hh]);
 470 |           // update proposal variance
 471 |           prop_per_beta[hh] = curr_var[hh];
 472 |         }
 473 |       }
 474 | 
 475 |       update_beta_jj_ss(XX, JJ, loggamma, beta_temp, inclusion_indicator,
 476 |                         prop_per_beta, mu_be, sig_be, aa_hp, bb_hp, jj);
 477 | 
 478 |       // update beta with beta_temp and write to file
 479 |       for(kk = 0 ; kk < n_vars ; kk++){
 480 |         hh = kk + jj * n_vars;
 481 |         beta[hh] = beta_temp[kk];
 482 |         if((gg >= burn) & (gg % thin == 0)){
 483 |           accepted_beta[hh] = accepted_beta[hh] + accepted_beta_flag[hh];
 484 |           accepted_beta_flag[hh] = 0;
 485 |           fprintf(fout_beta, "%e ", beta[hh]); // space delimited output files
 486 |         }
 487 |       }
 488 |     }
 489 |     if((gg >= burn) & (gg % thin == 0)){fprintf(fout_beta, "\n");}
 490 | 
 491 |     // update alpha and write to file
 492 |     for(jj = 0 ; jj < n_cats ; jj++){
 493 |       update_alpha_jj(JJ, loggamma, alpha, prop_per_alpha, accepted_alpha_flag,
 494 |                       mu_al, sig_al, jj);
 495 |       if((gg >= burn) & (gg % thin == 0)){
 496 |         accepted_alpha[jj] = accepted_alpha[jj] + accepted_alpha_flag[jj];
 497 |         accepted_alpha_flag[jj] = 0;
 498 |         fprintf(fout_alpha,"%e ", alpha[jj]);
 499 |       }
 500 |     }
 501 |     if((gg >= burn) & (gg % thin == 0)){fprintf(fout_alpha, "\n ");}
 502 | 
 503 |     // update JJ and consequently TT
 504 |     for(ii = 0 ; ii < n_obs ; ii++){
 505 |       TT[ii] = 0.0;
 506 |       for(jj = 0 ; jj < n_cats ; jj++){
 507 |         JJ[ii][jj] = gsl_ran_gamma(rando, YY[ii][jj] + exp(loggamma[ii][jj]), 1.0/(uu[ii] + 1.0));
 508 |         if(JJ[ii][jj] < pow(10.0, -100.0)){
 509 |           JJ[ii][jj] = pow(10.0, -100.0);
 510 |         }
 511 |         TT[ii] = TT[ii] + JJ[ii][jj];
 512 |       }
 513 |     }
 514 | 
 515 |     // update latent variables uu
 516 |     for(ii = 0 ; ii < n_obs ; ii++){
 517 |       uu[ii] = gsl_ran_gamma(rando, Ypiu[ii], 1.0/TT[ii]);
 518 |     }
 519 | 
 520 |   } // end of iterations
 521 | 
 522 |   // print acceptance ratio files
 523 |   for(jj = 0 ; jj < n_cats ; jj++){
 524 |     fprintf(fout_alpha_acceptance, "%f\n", (float)accepted_alpha[jj]/((GG - burn)/thin));
 525 |     for(kk = 0 ; kk < n_vars ; kk++){
 526 |       fprintf(fout_beta_acceptance, "%f\n", (float)accepted_beta[kk + jj * n_vars]/((GG - burn)/thin));
 527 |       fprintf(fout_beta_proposal, "%f\n", prop_per_beta[kk + jj * n_vars]);
 528 |     }
 529 |   }
 530 | 
 531 |   // cleanup (kind of pedantic)
 532 |   fclose(fout_alpha);
 533 |   fclose(fout_beta);
 534 |   fclose(fout_alpha_acceptance);
 535 |   fclose(fout_beta_acceptance);
 536 |   free(accepted_alpha);
 537 |   free(accepted_beta);
 538 |   free(accepted_alpha_flag);
 539 |   free(accepted_beta_flag);
 540 |   free(inclusion_indicator);
 541 |   free(beta_temp);
 542 |   free(loggamma);
 543 |   free(JJ);
 544 |   free(TT);
 545 |   free(uu);
 546 |   free(Ypiu);
 547 | 
 548 |   printf("Sampling finished!\n");
 549 | 
 550 |   return;
 551 | 
 552 | } // close Gibbs sampler function
 553 | 
 554 | void dmbvs_gibbs(double **XX, int **YY, double *alpha, double *beta,
 555 |                  double mu_al[n_cats], double sig_al[n_cats],
 556 |                  double mu_be[n_cats][n_vars], double sig_be[n_cats][n_vars],
 557 |                  double aa_hp, double bb_hp, double *prop_per_alpha,
 558 |                  double *prop_per_beta, const int GG, const int burn,
 559 |                  const int thin, int Log, char *temp_dir){
 560 | 
 561 |   // loop indices
 562 |   int hh, ii, jj, kk, gg;
 563 | 
 564 |   // for timing iterations
 565 |   time_t mytime;
 566 | 
 567 |   // adaptive proposal values
 568 |   double last_mean;
 569 |   double last_var;
 570 | 
 571 |   ///////// Initialize variables that don't require input or can be obtained
 572 |   ///////// from inputs
 573 | 
 574 |   // for MH acceptance/rejection ratios, initialized with zeros
 575 |   int *accepted_beta_flag;
 576 |   accepted_beta_flag = malloc((n_vars * n_cats) * sizeof(int));
 577 |   int *accepted_beta;
 578 |   accepted_beta = malloc((n_vars * n_cats) * sizeof(int));
 579 |   for(hh = 0 ; hh < (n_vars * n_cats) ; hh++){
 580 |     accepted_beta_flag[hh] = 0;
 581 |     accepted_beta[hh] = 0;
 582 |   }
 583 |   int *accepted_alpha_flag;
 584 |   accepted_alpha_flag = malloc(n_cats * sizeof(int));
 585 |   int *accepted_alpha;
 586 |   accepted_alpha = malloc(n_cats * sizeof(int));
 587 |   for(hh = 0 ; hh < n_cats ; hh++){
 588 |     accepted_alpha_flag[hh] = 0;
 589 |     accepted_alpha[hh] = 0;
 590 |   }
 591 |   // for adaptive MH need to keep track of target density mean and variance
 592 |   double *curr_mean;
 593 |   curr_mean = malloc((n_vars * n_cats) * sizeof(double));
 594 |   double *curr_var;
 595 |   curr_var = malloc((n_vars * n_cats) * sizeof(double));
 596 |   for(hh = 0 ; hh < (n_vars * n_cats) ; hh++){
 597 |     curr_mean[hh] = 0.0;
 598 |     curr_var[hh] = 0.5;
 599 |   }
 600 | 
 601 |   // variable inclusion indicator
 602 |   int *inclusion_indicator;
 603 |   inclusion_indicator = malloc((n_vars * n_cats) * sizeof(int));
 604 |   for(hh = 0 ; hh < (n_vars * n_cats) ; hh++){
 605 |     if(beta[hh] == 0){
 606 |       inclusion_indicator[hh] = 0;
 607 |     }else{
 608 |       inclusion_indicator[hh] = 1;
 609 |     }
 610 |   }
 611 | 
 612 |   // row sums of YY
 613 |   int *Ypiu;
 614 |   Ypiu = malloc(n_obs * sizeof(int));
 615 |   for(ii = 0 ; ii < n_obs ; ii++){
 616 |     for(jj = 0 ; jj < n_cats ; jj++){
 617 |       Ypiu[ii] = Ypiu[ii] + YY[ii][jj];
 618 |     }
 619 |   }
 620 | 
 621 |   // the beta_temp vector is an n_vars long vector used for updating each of the
 622 |   // jj-th rows in Beta corresponding the jj-th category in YY
 623 |   double *beta_temp;
 624 |   beta_temp = malloc(n_vars * sizeof(double));
 625 | 
 626 |   // the linear predictor matrix, represented as a vector, is in the log scale
 627 |   double **loggamma;
 628 |   loggamma = malloc(n_obs * sizeof(double *));
 629 |   for(ii = 0 ; ii < n_obs ; ii++){
 630 |     loggamma[ii] = malloc(n_cats * sizeof(double));
 631 |     for(jj = 0 ; jj < n_cats ; jj++){
 632 |       loggamma[ii][jj] = calculate_gamma(XX, alpha, beta, jj, ii, Log);
 633 |     }
 634 |   }
 635 | 
 636 |   // initialize latent variable: JJ ~ gamma()
 637 |   double **JJ;
 638 |   JJ = malloc(n_obs * sizeof(double *));
 639 |   // TT is the vector of normalization constants
 640 |   double *TT;
 641 |   TT = malloc(n_obs * sizeof(double *));
 642 |   for(ii = 0 ; ii < n_obs ; ii++){
 643 |     JJ[ii] = malloc(n_cats * sizeof(double));
 644 |     TT[ii] = 0.0;
 645 |     for(jj = 0 ; jj < n_cats ; jj++){
 646 |       JJ[ii][jj] = (double)YY[ii][jj];
 647 |       // a very small number
 648 |       if(JJ[ii][jj] < pow(10.0, -100.0)){
 649 |         JJ[ii][jj] = pow(10.0, -100.0);
 650 |       }
 651 |       TT[ii] = TT[ii] + JJ[ii][jj];
 652 |     }
 653 |   }
 654 | 
 655 |   // initialize the other clever latent variable: uu
 656 |   // note that this uses the data to initialize
 657 |   double *uu;
 658 |   uu = malloc(n_obs * sizeof(double));
 659 |   for(ii = 0 ; ii < n_obs ; ii++){
 660 |     uu[ii] = gsl_ran_gamma(rando, Ypiu[ii], 1.0/TT[ii]);
 661 |   }
 662 | 
 663 |   // output file pointers
 664 |   FILE *fout_alpha, *fout_beta, *fout_alpha_acceptance, *fout_beta_acceptance, *fout_beta_proposal;
 665 | 
 666 |   // this is stupid, I'm sure there's a better way
 667 |   char tf_alpha[200];
 668 |   strcpy(tf_alpha, temp_dir);
 669 |   strcat(tf_alpha, "/alpha.out");
 670 |   fout_alpha = fopen(tf_alpha, "w");
 671 |   char tf_alpha_acceptance[200];
 672 |   strcpy(tf_alpha_acceptance, temp_dir);
 673 |   strcat(tf_alpha_acceptance, "/alpha_acceptance.out");
 674 |   fout_alpha_acceptance = fopen(tf_alpha_acceptance, "w");
 675 |   char tf_beta[200];
 676 |   strcpy(tf_beta, temp_dir);
 677 |   strcat(tf_beta, "/beta.out");
 678 |   fout_beta = fopen(tf_beta, "w");
 679 |   char tf_beta_acceptance[200];
 680 |   strcpy(tf_beta_acceptance, temp_dir);
 681 |   strcat(tf_beta_acceptance, "/beta_acceptance.out");
 682 |   fout_beta_acceptance = fopen(tf_beta_acceptance, "w");
 683 |   char tf_beta_proposal[200];
 684 |   strcpy(tf_beta_proposal, temp_dir);
 685 |   strcat(tf_beta_proposal, "/beta_proposal.out");
 686 |   fout_beta_proposal = fopen(tf_beta_proposal, "w");
 687 | 
 688 |   // the magic starts here
 689 |   printf("Starting the sampler\n");
 690 | 
 691 |   for(gg = 0 ; gg < GG ; gg++){
 692 | 
 693 |     // timing info to the printed output
 694 |     mytime = time(NULL);
 695 |     if(gg % 1000 == 0){
 696 |       printf("Starting the %uth iteration out of %u at %s", gg, GG, ctime(&mytime));
 697 |     }
 698 | 
 699 |     // first a round of the between-model step for every covariate within every taxa
 700 |     for(jj = 0 ; jj < n_cats ; jj++){
 701 | 
 702 |       // fill in beta_temp
 703 |       for(kk = 0 ; kk < n_vars ; kk++){
 704 |         hh = kk + jj * n_vars;
 705 |         beta_temp[kk] = beta[hh];
 706 |       }
 707 | 
 708 |       between_models_jj(XX, JJ, loggamma, beta_temp, accepted_beta_flag,
 709 |                         inclusion_indicator, mu_be, sig_be, aa_hp, bb_hp, jj);
 710 | 
 711 |       // update beta with beta_temp
 712 |       for(kk = 0 ; kk < n_vars ; kk++){
 713 |         hh = kk + jj * n_vars;
 714 |         beta[hh] = beta_temp[kk];
 715 |       }
 716 |     }
 717 | 
 718 |     // now an accelerating within-model step
 719 |     for(jj = 0 ; jj < n_cats ; jj++){
 720 | 
 721 |       // fill in beta_temp and update the proposal variance
 722 |       for(kk = 0 ; kk < n_vars ; kk++){
 723 |         hh = kk + jj * n_vars;
 724 |         beta_temp[kk] = beta[hh];
 725 |         // wait until each parameter has had a few iterations
 726 |         if(gg > 2 * n_vars * n_cats){
 727 |           // calculate the online variance
 728 |           last_mean = curr_mean[hh];
 729 |           last_var = curr_var[hh];
 730 |           curr_mean[hh] = online_mean(gg, last_mean, beta[hh]);
 731 |           curr_var[hh] = online_var(gg, last_mean, last_var, curr_mean[hh], beta[hh]);
 732 |           // update proposal variance
 733 |           prop_per_beta[hh] = curr_var[hh];
 734 |         }
 735 |       }
 736 | 
 737 |       update_beta_jj(XX, JJ, loggamma, beta_temp, inclusion_indicator,
 738 |                      prop_per_beta, mu_be, sig_be, aa_hp, bb_hp, jj);
 739 | 
 740 |       // update beta with beta_temp and write to file
 741 |       for(kk = 0 ; kk < n_vars ; kk++){
 742 |         hh = kk + jj * n_vars;
 743 |         beta[hh] = beta_temp[kk];
 744 |         if((gg >= burn) & (gg % thin == 0)){
 745 |           accepted_beta[hh] = accepted_beta[hh] + accepted_beta_flag[hh];
 746 |           accepted_beta_flag[hh] = 0;
 747 |           fprintf(fout_beta, "%e ", beta[hh]); // space delimited output files
 748 |         }
 749 |       }
 750 |     }
 751 |     if((gg >= burn) & (gg % thin == 0)){fprintf(fout_beta, "\n");}
 752 | 
 753 |     // update alpha and write to file
 754 |     for(jj = 0 ; jj < n_cats ; jj++){
 755 |       update_alpha_jj(JJ, loggamma, alpha, prop_per_alpha, accepted_alpha_flag,
 756 |                       mu_al, sig_al, jj);
 757 |       if((gg >= burn) & (gg % thin == 0)){
 758 |         accepted_alpha[jj] = accepted_alpha[jj] + accepted_alpha_flag[jj];
 759 |         accepted_alpha_flag[jj] = 0;
 760 |         fprintf(fout_alpha,"%e ", alpha[jj]);
 761 |       }
 762 |     }
 763 |     if((gg >= burn) & (gg % thin == 0)){fprintf(fout_alpha, "\n ");}
 764 | 
 765 |     // update JJ and consequently TT
 766 |     for(ii = 0 ; ii < n_obs ; ii++){
 767 |       TT[ii] = 0.0;
 768 |       for(jj = 0 ; jj < n_cats ; jj++){
 769 |         JJ[ii][jj] = gsl_ran_gamma(rando, YY[ii][jj] + exp(loggamma[ii][jj]), 1.0/(uu[ii] + 1.0));
 770 |         if(JJ[ii][jj] < pow(10.0, -100.0)){
 771 |           JJ[ii][jj] = pow(10.0, -100.0);
 772 |         }
 773 |         TT[ii] = TT[ii] + JJ[ii][jj];
 774 |       }
 775 |     }
 776 | 
 777 |     // update latent variables uu
 778 |     for(ii = 0 ; ii < n_obs ; ii++){
 779 |       uu[ii] = gsl_ran_gamma(rando, Ypiu[ii], 1.0/TT[ii]);
 780 |     }
 781 | 
 782 |   } // end of iterations
 783 | 
 784 |   // print acceptance ratio files
 785 |   for(jj = 0 ; jj < n_cats ; jj++){
 786 |     fprintf(fout_alpha_acceptance, "%f\n", (float)accepted_alpha[jj]/((GG - burn)/thin));
 787 |     for(kk = 0 ; kk < n_vars ; kk++){
 788 |       fprintf(fout_beta_acceptance, "%f\n", (float)accepted_beta[kk + jj * n_vars]/((GG - burn)/thin));
 789 |       fprintf(fout_beta_proposal, "%f\n", prop_per_beta[kk + jj * n_vars]);
 790 |     }
 791 |   }
 792 | 
 793 |   // cleanup (kind of pedantic)
 794 |   fclose(fout_alpha);
 795 |   fclose(fout_beta);
 796 |   fclose(fout_alpha_acceptance);
 797 |   fclose(fout_beta_acceptance);
 798 |   free(accepted_alpha);
 799 |   free(accepted_beta);
 800 |   free(accepted_alpha_flag);
 801 |   free(accepted_beta_flag);
 802 |   free(inclusion_indicator);
 803 |   free(beta_temp);
 804 |   free(loggamma);
 805 |   free(JJ);
 806 |   free(TT);
 807 |   free(uu);
 808 |   free(Ypiu);
 809 | 
 810 |   printf("Sampling finished!\n");
 811 | 
 812 |   return;
 813 | 
 814 | } // close Gibbs (stochastic search) sampler function
 815 | 
 816 | //
 817 | double calculate_gamma(double **XX, double *alpha, double *beta, int jj, int ii, int Log){
 818 | 
 819 |   // loop indices and out
 820 |   int kk, hh;
 821 |   double out;
 822 | 
 823 |   out = alpha[jj];
 824 |   for(kk = 0 ; kk < n_vars ; kk++){
 825 | 
 826 |     hh = kk + jj * n_vars;
 827 |     out = out + beta[hh] * XX[ii][kk];
 828 | 
 829 |   }
 830 | 
 831 |   if(Log == 1){
 832 |     return(out);
 833 |   }
 834 | 
 835 |   return(exp(out));
 836 | 
 837 | }
 838 | 
 839 | // Gibbs variable selection version
 840 | void update_beta_jj(double **XX, double **JJ, double **loggamma,
 841 |                     double *beta_temp, int *inclusion_indicator,
 842 |                     double *prop_per_beta, double mu_be[n_cats][n_vars],
 843 |                     double sig_be[n_cats][n_vars], double aa_hp,
 844 |                     double bb_hp, int jj){
 845 | 
 846 |   // this function loops through n_vars so it is called for one taxa at a time
 847 | 
 848 |   double sig_prop;
 849 |   double loggamma_p[n_obs];
 850 |   int hh, ii, kk;
 851 | 
 852 |   // define proposal variable and acceptance ratio variables
 853 |   double beta_p;
 854 |   double lnu, ln_acp;
 855 | 
 856 |   double log_full_beta, log_full_beta_p;
 857 | 
 858 |   for(kk = 0 ; kk < n_vars ; kk++){
 859 | 
 860 |     // Stride for full (n_vars * n_cats) vector
 861 |     hh = kk + jj * n_vars;
 862 | 
 863 |     if(inclusion_indicator[hh] == 1){
 864 | 
 865 |       log_full_beta = 0;
 866 | 
 867 |       for(ii = 0 ; ii < n_obs ; ii++){
 868 | 
 869 |         log_full_beta = log_full_beta - gsl_sf_lngamma(exp(loggamma[ii][jj]));
 870 |         log_full_beta = log_full_beta + exp(loggamma[ii][jj]) * log(JJ[ii][jj]);
 871 | 
 872 |       }
 873 | 
 874 |       log_full_beta = log_full_beta + lprior_bbsas(beta_temp[kk], inclusion_indicator[hh],
 875 |                                                    sig_be[jj][kk], mu_be[jj][kk], aa_hp, bb_hp);
 876 |       sig_prop = prop_per_beta[hh];
 877 |       beta_p = beta_temp[kk] + adap_prop(sig_prop);
 878 | 
 879 | 
 880 |       for(ii = 0 ; ii < n_obs ; ii++){
 881 |         loggamma_p[ii] = loggamma[ii][jj] - beta_temp[kk] * XX[ii][kk] + beta_p * XX[ii][kk];
 882 |       }
 883 | 
 884 |       // calculate proposal probability
 885 |       log_full_beta_p = 0;
 886 |       for(ii = 0 ; ii < n_obs ; ii++){
 887 |         log_full_beta_p = log_full_beta_p - gsl_sf_lngamma(exp(loggamma_p[ii]));
 888 |         log_full_beta_p = log_full_beta_p + exp(loggamma_p[ii]) * log(JJ[ii][jj]);
 889 |       }
 890 |       log_full_beta_p = log_full_beta_p + lprior_bbsas(beta_p, inclusion_indicator[hh], sig_be[jj][kk], mu_be[jj][kk], aa_hp, bb_hp);
 891 | 
 892 |       ln_acp = log_full_beta_p - log_full_beta;
 893 |       lnu = log(gsl_rng_uniform(rando));
 894 | 
 895 |       if(lnu < ln_acp){
 896 | 
 897 |         beta_temp[kk] = beta_p;
 898 | 
 899 |         for(ii = 0 ; ii < n_obs ; ii++){
 900 | 
 901 |           loggamma[ii][jj] = loggamma_p[ii];
 902 | 
 903 |         }
 904 | 
 905 |       } // Close MH accept
 906 |     } // Close inclusion_indicator[hh] == 1
 907 |   } // Close for kk
 908 | } // Close function
 909 | 
 910 | // stochastic search variable selection version
 911 | void update_beta_jj_ss(double **XX, double **JJ, double **loggamma,
 912 |                        double *beta_temp, int *inclusion_indicator,
 913 |                        double *prop_per_beta, double mu_be[n_cats][n_vars],
 914 |                        double sig_be[n_cats][n_vars], double aa_hp,
 915 |                        double bb_hp, int jj){
 916 | 
 917 |   // this function loops through n_vars so it is called for one taxa at a time
 918 | 
 919 |   double sig_prop;
 920 |   double loggamma_p[n_obs];
 921 |   int hh, ii, kk;
 922 | 
 923 |   // define proposal variable and acceptance ratio variables
 924 |   double beta_p;
 925 |   double lnu, ln_acp;
 926 | 
 927 |   double log_full_beta, log_full_beta_p;
 928 | 
 929 |   kk = floor(gsl_rng_uniform(rando) * n_vars);//for(kk = 0 ; kk < n_vars ; kk++){
 930 | 
 931 |     // Stride for full (n_vars * n_cats) vector
 932 |     hh = kk + jj * n_vars;
 933 | 
 934 |     if(inclusion_indicator[hh] == 1){
 935 | 
 936 |       log_full_beta = 0;
 937 | 
 938 |       for(ii = 0 ; ii < n_obs ; ii++){
 939 | 
 940 |         log_full_beta = log_full_beta - gsl_sf_lngamma(exp(loggamma[ii][jj]));
 941 |         log_full_beta = log_full_beta + exp(loggamma[ii][jj]) * log(JJ[ii][jj]);
 942 | 
 943 |       }
 944 | 
 945 |       log_full_beta = log_full_beta + lprior_bbsas(beta_temp[kk], inclusion_indicator[hh],
 946 |                                                    sig_be[jj][kk], mu_be[jj][kk], aa_hp, bb_hp);
 947 |       sig_prop = prop_per_beta[hh];
 948 |       beta_p = beta_temp[kk] + adap_prop(sig_prop);
 949 | 
 950 | 
 951 |       for(ii = 0 ; ii < n_obs ; ii++){
 952 |         loggamma_p[ii] = loggamma[ii][jj] - beta_temp[kk] * XX[ii][kk] + beta_p * XX[ii][kk];
 953 |       }
 954 | 
 955 |       // calculate proposal probability
 956 |       log_full_beta_p = 0;
 957 |       for(ii = 0 ; ii < n_obs ; ii++){
 958 |         log_full_beta_p = log_full_beta_p - gsl_sf_lngamma(exp(loggamma_p[ii]));
 959 |         log_full_beta_p = log_full_beta_p + exp(loggamma_p[ii]) * log(JJ[ii][jj]);
 960 |       }
 961 |       log_full_beta_p = log_full_beta_p + lprior_bbsas(beta_p, inclusion_indicator[hh], sig_be[jj][kk], mu_be[jj][kk], aa_hp, bb_hp);
 962 | 
 963 |       ln_acp = log_full_beta_p - log_full_beta;
 964 |       lnu = log(gsl_rng_uniform(rando));
 965 | 
 966 |       if(lnu < ln_acp){
 967 | 
 968 |         beta_temp[kk] = beta_p;
 969 | 
 970 |         for(ii = 0 ; ii < n_obs ; ii++){
 971 | 
 972 |           loggamma[ii][jj] = loggamma_p[ii];
 973 | 
 974 |         }
 975 | 
 976 |       } // Close MH accept
 977 |     } // Close inclusion_indicator[hh] == 1
 978 |   //} // Close for kk
 979 | } // Close function
 980 | 
 981 | void between_models_jj(double **XX, double **JJ, double **loggamma,
 982 |                        double *beta_temp, int *accepted_beta_flag,
 983 |                        int *inclusion_indicator, double mu_be[n_cats][n_vars],
 984 |                        double sig_be[n_cats][n_vars], double aa_hp,
 985 |                        double bb_hp, int jj){
 986 | 
 987 |   // Metropolis-Hastings proposals
 988 |   double sig_prop;
 989 |   double mu_prop;
 990 | 
 991 |   int hh, kk, ii;
 992 | 
 993 |   // proposed value vectors
 994 |   int inclusion_indicator_p;
 995 |   double loggamma_p[n_obs];
 996 |   double beta_p;
 997 | 
 998 |   // acceptance ratio variables
 999 |   double lnu, ln_acp;
1000 | 
1001 |   // must update beta_jk for kk = 1, ..., n_vars
1002 |   double log_full_beta, log_full_beta_p;
1003 | 
1004 |   for(kk = 0 ; kk < n_vars ; kk++){
1005 | 
1006 |     hh = kk + jj * n_vars;
1007 | 
1008 |     // calculate the current log full conditional
1009 |     log_full_beta = 0;
1010 |     for(ii = 0 ; ii < n_obs ; ii++){
1011 |       log_full_beta = log_full_beta - gsl_sf_lngamma(exp(loggamma[ii][jj]));
1012 |       log_full_beta = log_full_beta + exp(loggamma[ii][jj]) * log(JJ[ii][jj]);
1013 |     }
1014 |     log_full_beta = log_full_beta + lprior_bbsas(beta_temp[kk], inclusion_indicator[hh], sig_be[jj][kk], mu_be[jj][kk], aa_hp, bb_hp);
1015 | 
1016 |     // proposing a new value for beta[jj][kk] using the prior mean and standard deviation
1017 |     sig_prop = pow(sig_be[jj][kk], 0.5);
1018 |     mu_prop = mu_be[jj][kk];
1019 | 
1020 |     // swap and sample beta
1021 |     if(inclusion_indicator[hh] == 0){
1022 |       beta_p = mu_prop + gsl_ran_gaussian_ziggurat(rando, sig_prop);
1023 |       inclusion_indicator_p = 1;
1024 |     }
1025 |     else{
1026 |       beta_p = 0.0;
1027 |       inclusion_indicator_p = 0;
1028 |     }
1029 | 
1030 |     // update proposal gamma (linear predictor)
1031 |     for(ii = 0 ; ii < n_obs ; ii++){
1032 |       loggamma_p[ii] = loggamma[ii][jj] - beta_temp[kk] * XX[ii][kk] + beta_p * XX[ii][kk];
1033 |     }
1034 | 
1035 |     // calculate the proposed log full conditional
1036 |     log_full_beta_p = 0;
1037 |     for(ii = 0 ; ii < n_obs ; ii++){
1038 |       log_full_beta_p = log_full_beta_p - gsl_sf_lngamma(exp(loggamma_p[ii]));
1039 |       log_full_beta_p = log_full_beta_p + exp(loggamma_p[ii]) * log(JJ[ii][jj]);
1040 |     }
1041 |     log_full_beta_p = log_full_beta_p + lprior_bbsas(beta_p, inclusion_indicator_p, sig_be[jj][kk], mu_be[jj][kk], aa_hp, bb_hp);
1042 | 
1043 |     ln_acp = log_full_beta_p - log_full_beta;
1044 |     lnu = log(gsl_rng_uniform(rando));
1045 | 
1046 |     // Metropolis-Hastings ratio
1047 |     if(lnu < ln_acp){
1048 | 
1049 |       accepted_beta_flag[hh] = 1;
1050 | 
1051 |       // update accepted beta into beta_temp
1052 |       beta_temp[kk] = beta_p;
1053 |       inclusion_indicator[hh] = inclusion_indicator_p;
1054 | 
1055 |       for(ii = 0 ; ii < n_obs ; ii++){
1056 |         // also update the gamma (linear predictor)
1057 |         loggamma[ii][jj] = loggamma_p[ii];
1058 |       }
1059 |     } // Close MH if
1060 |   } // Close for kk
1061 | } // Close function
1062 | 
1063 | void update_alpha_jj(double **JJ, double **loggamma, double *alpha,
1064 |                      double *prop_per_alpha, int *accepted_alpha_flag,
1065 |                      double mu_al[n_cats], double sig_al[n_cats], int jj){
1066 | 
1067 |   int ii;
1068 | 
1069 |   double sig_prop = prop_per_alpha[jj];
1070 |   double loggamma_p[n_obs];
1071 | 
1072 |   double alpha_p;
1073 |   double lnu, ln_acp;
1074 | 
1075 |   // Prepare the current and proposed full conditional values
1076 |   double log_full_alpha, log_full_alpha_p;
1077 | 
1078 |   // Calculate the full conditional for the current value
1079 |   log_full_alpha=0;
1080 |   for(ii = 0 ; ii < n_obs ; ii++){
1081 | 
1082 |     log_full_alpha = log_full_alpha - gsl_sf_lngamma(exp(loggamma[ii][jj]));
1083 |     log_full_alpha = log_full_alpha + exp(loggamma[ii][jj]) * log(JJ[ii][jj]);
1084 | 
1085 |   }
1086 |   log_full_alpha = log_full_alpha - 1.0/(2.0 * sig_al[jj]) * pow(alpha[jj] - mu_al[jj], 2.0);
1087 | 
1088 |   // Propose a new value for alpha[jj] using a random walk proposal centered on
1089 |   // the current value of alpha[jj]
1090 |   alpha_p = alpha[jj] + gsl_ran_gaussian_ziggurat(rando, sig_prop);
1091 | 
1092 |   // Gamma must be updated too
1093 |   for(ii = 0 ; ii < n_obs ; ii++){
1094 |     loggamma_p[ii] = loggamma[ii][jj] - alpha[jj] + alpha_p;
1095 |   }
1096 | 
1097 |   // Calculate the full conditional for the proposed value
1098 |   log_full_alpha_p = 0;
1099 |   for(ii = 0 ; ii < n_obs ; ii++){
1100 |     log_full_alpha_p = log_full_alpha_p - gsl_sf_lngamma(exp(loggamma_p[ii]));
1101 |     log_full_alpha_p = log_full_alpha_p + exp(loggamma_p[ii]) * log(JJ[ii][jj]);
1102 |   }
1103 | 
1104 |   log_full_alpha_p = log_full_alpha_p - 1.0/(2.0 * sig_al[jj]) * pow(alpha_p - mu_al[jj], 2.0);
1105 |   ln_acp = log_full_alpha_p - log_full_alpha;
1106 |   lnu = log(gsl_rng_uniform(rando));
1107 | 
1108 |   if(lnu < ln_acp){
1109 | 
1110 |     // If accepted, update both alpha[jj] and loggamma[ii][jj], and keep
1111 |     // track of acceptances
1112 |     accepted_alpha_flag[jj] = 1;
1113 |     alpha[jj] = alpha_p;
1114 | 
1115 |     for(ii = 0 ; ii < n_obs ; ii++){
1116 |       loggamma[ii][jj] = loggamma_p[ii];
1117 |     }
1118 |   }
1119 | 
1120 |   return;
1121 | 
1122 | } // Close function
1123 | 
1124 | // the probability of the Bernoulli trial is integrated out
1125 | double lprior_bbsas(double betajk, int sjk, double sig_bejk, double mu_bejk,
1126 |                     double aa_hp, double bb_hp){
1127 | 
1128 |   // calculate additional beta factor
1129 |   double aa = sjk + aa_hp;
1130 |   double bb = 1 - sjk + bb_hp;
1131 |   double lbeta_factor = gsl_sf_lnbeta(aa, bb) - gsl_sf_lnbeta(aa_hp, bb_hp);
1132 | 
1133 |   // piece-wise function
1134 |   if(sjk == 0){
1135 |     return(log(1.0 - exp(lbeta_factor)));
1136 |   }
1137 |   else{
1138 |     return(lbeta_factor - 0.5 * log(2.0 * M_PI * sig_bejk) - 1.0/(2.0 * sig_bejk) * pow(betajk - mu_bejk, 2.0));
1139 |   }
1140 | 
1141 | }
1142 | 
1143 | // simple adaptive MH proposal
1144 | // this uses equation (3) from Roberts & Rosenthal (2009) but without the full
1145 | // covariance of the target which makes it similar to the component-wise update
1146 | // of Haario et al. (2005)
1147 | double adap_prop(double curr_var){
1148 | 
1149 |   // need dimension (???)
1150 |   double dd = (double)n_cats * (double)n_vars;
1151 | 
1152 |   // ensures bounded convergence
1153 |   int safeguard = gsl_ran_bernoulli(rando, 0.05);
1154 |   double usually = pow(2.38, 2.0) * curr_var/dd;
1155 |   double unusually = pow(0.1, 2.0)/dd;
1156 | 
1157 |   double prop_var = (1 - safeguard) * gsl_ran_gaussian_ziggurat(rando, pow(usually, 0.5)) + safeguard * gsl_ran_gaussian_ziggurat(rando, pow(unusually, 0.5));
1158 | 
1159 |   return(prop_var);
1160 | }
1161 | 
1162 | // sort of following http://www.johndcook.com/blog/standard_deviation/
1163 | double online_mean(int iteration, double last_mean, double curr_obs){
1164 | 
1165 |   // don't fall off by one
1166 |   int n_vals = iteration++;
1167 | 
1168 |   // assume that iteration > 0 since there's a burnin proposal
1169 |   double curr_mean = last_mean + (curr_obs - last_mean)/(double)n_vals;
1170 | 
1171 |   return(curr_mean);
1172 | }
1173 | 
1174 | double online_var(int iteration, double last_mean, double last_var, double curr_mean, double curr_obs){
1175 | 
1176 |   // note that iteration == n - 1 and that (n - 1) * last_var == last_ss
1177 |   // assume that iteration > 0 since there's a burnin proposal
1178 |   double curr_ss = (double)iteration * last_var + (curr_obs - last_mean) * (curr_obs - curr_mean);
1179 | 
1180 |   return(curr_ss/(double)iteration);
1181 | }
1182 | 
1183 | 


--------------------------------------------------------------------------------