├── .Rbuildignore ├── .Rprofile ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── R ├── H_bar.R ├── RcppExports.R ├── bayesianlasso.R ├── c_chains_to_dataframe.R ├── c_chains_to_measure_as_list.R ├── coupled_chains.R ├── coupled_pairs01.R ├── gamma_couplings.R ├── get_max_coupling.R ├── get_mh_kernels.R ├── histogram_c_chains.R ├── invgamma_couplings.R ├── invgaussian_couplings.R ├── ising.R ├── logistic_regression.R ├── meetingtime.R ├── mvnorm.R ├── mvnorm_couplings.R ├── rnorm_max_coupling.R ├── rnorm_reflectionmax.R ├── unbiasedestimator.R ├── unbiasedmcmc-package.R ├── utils.R └── variableselection.R ├── README.Rmd ├── README.md ├── README_files └── figure-gfm │ ├── estimators-1.png │ ├── usage-1.png │ └── usage-2.png ├── data ├── diabetes.RData └── germancredit.RData ├── inst ├── README.R ├── check │ ├── check_discretecouplings.R │ ├── check_gamma.R │ ├── check_hbar.R │ ├── check_histograms.R │ ├── check_inversegamma.R │ ├── check_inversegaussian.R │ ├── check_ising.R │ ├── check_lags.R │ ├── check_mvnorm.R │ ├── check_normalcouplings.R │ ├── check_reflmaxcoupling.R │ ├── check_rwmh_bivariate.R │ ├── check_rwmh_doublewell.R │ ├── check_rwmh_minimalcode.R │ ├── check_rwmh_univariate.R │ ├── check_vs_mlikelihood.R │ ├── check_weirdbug.R │ └── to_check_signed_measure.R ├── reproducebaseball │ ├── baseball.mcmc.run.R │ ├── baseball.plots.R │ ├── baseball.run.R │ └── run.all.R ├── reproducebayesianlasso │ ├── bayesianlasso.mcmc.run.R │ ├── bayesianlasso.plots.R │ ├── bayesianlasso.run.R │ └── run.all.R ├── reproducebimodal │ ├── bimodal.mcmc.run.R │ ├── bimodal.plots.R │ ├── bimodal.run.R │ └── run.all.R ├── reproduceepidemiology │ ├── plummer.cut.plots.R │ ├── plummer.cut.run.R │ └── run.all.R ├── reproducegermancredit │ ├── german_credit.csv │ ├── germancredit.mcmc.run.R │ ├── germancredit.plots.R │ ├── germancredit.preparedata.R │ ├── germancredit.run.R │ └── run.all.R ├── reproduceisingmodel │ ├── ising.gibbs.meetings.run.R │ ├── ising.mcmc.run.R │ ├── ising.plots.R │ ├── ising.swap.meetings.run.R │ ├── run.all.R │ ├── run.batch.ising.R │ └── run.odyssey.ising.sh ├── reproducepumpfailures │ ├── pumpfailures.mcmc.run.R │ ├── pumpfailures.mykland.run.R │ ├── pumpfailures.plots.R │ ├── pumpfailures.run.R │ ├── pumpfailures.tuning.run.R │ └── run.all.R ├── reproducescalingdimension │ ├── run.all.R │ ├── scaling.gibbs.R │ ├── scaling.hmc.R │ ├── scaling.plots.R │ ├── scaling.rwmh.maximalcoupling.R │ └── scaling.rwmh.reflectionmaxcoupling.R └── reproducevarselection │ ├── run.all.R │ ├── varselection.clusterscript.kappas.R │ ├── varselection.differentkappas.R │ ├── varselection.differentkappas.plots.R │ ├── varselection.differentp.R │ ├── varselection.differentp.plots.R │ ├── varselection.generatedata.run.R │ ├── varselection.mcmc.run.R │ ├── varselection.script.R │ └── varselection.tables.R ├── man ├── H_bar.Rd ├── c_chains_to_dataframe.Rd ├── c_chains_to_measure_as_list.Rd ├── dinversegamma.Rd ├── dinvgaussian.Rd ├── expit.Rd ├── fast_dmvnorm.Rd ├── fast_dmvnorm_chol_inverse.Rd ├── fast_rmvnorm.Rd ├── fast_rmvnorm_chol.Rd ├── get_blasso.Rd ├── get_max_coupling.Rd ├── get_mh_kernel.Rd ├── get_variableselection.Rd ├── hello.Rd ├── histogram_c_chains.Rd ├── logistic_precomputation.Rd ├── pg_gibbs.Rd ├── rcpp_hello.Rd ├── rgamma_coupled.Rd ├── rinversegamma.Rd ├── rinversegamma_coupled.Rd ├── rinvgaussian.Rd ├── rinvgaussian_coupled.Rd ├── rmvnorm_max.Rd ├── rmvnorm_max_chol.Rd ├── rmvnorm_reflectionmax.Rd ├── rnorm_max_coupling.Rd ├── rnorm_reflectionmax.Rd ├── sample_coupled_chains.Rd ├── sample_meetingtime.Rd ├── sample_unbiasedestimator.Rd ├── setmytheme.Rd └── unbiasedmcmc-package.Rd ├── src ├── PolyaGamma.cpp ├── PolyaGamma.h ├── RNG.cpp ├── RNG.h ├── RRNG.cpp ├── RRNG.h ├── RcppExports.cpp ├── blassoutil.cpp ├── c_chains_to_measure.cpp ├── estimator_bin.cpp ├── inversegaussian.cpp ├── ising.cpp ├── logisticregression.cpp ├── logisticregressioncoupling.cpp ├── mvnorm.cpp ├── mvnorm.h ├── propensityscore.cpp ├── prune.cpp ├── sample_pairs01.cpp └── vs_mlikelihood.cpp ├── unbiasedmcmc.Rproj └── vignettes ├── .gitignore ├── introduction-normaltarget.Rmd └── polyagammagibbs.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^doc$ 4 | ^Meta$ 5 | -------------------------------------------------------------------------------- /.Rprofile: -------------------------------------------------------------------------------- 1 | .First <- function(){ 2 | print("hello") 3 | Sys.setenv(MAKEFLAGS = " -j6") 4 | } 5 | 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | *.pdf 3 | *.tex 4 | .*.swp 5 | .DS_Store 6 | TODO 7 | .Rproj.user 8 | .Rhistory 9 | *.RData 10 | .Ruserdata 11 | .Rprofile 12 | src/*.o 13 | src/*.so 14 | src/*.dll 15 | README_cache/ 16 | inst/tutorials/*Rmd 17 | inst/tutorials/*pdf 18 | inst/tutorials/*cache*/ 19 | inst/tutorials/*files*/ 20 | inst/reproducescalingdimension_old 21 | inst/reproducescalingdimension/backup 22 | inst/doc 23 | vignettes/*cache* 24 | vignettes/*cache*/*html* 25 | *.out 26 | inst/reproducepropensityscores 27 | doc 28 | Meta 29 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: unbiasedmcmc 2 | Type: Package 3 | Title: Unbiased MCMC with couplings 4 | Version: 0.3.0 5 | Author: Pierre E Jacob, John O'Leary, Yves F Atchade 6 | Maintainer: Pierre E Jacob 7 | Description: This package includes the scripts to reproduce the article 8 | "Unbiased Markov chain Monte Carlo with couplings" by Pierre E. Jacob, John 9 | O'Leary, Yves F. Atchade, where it is described how the bias of MCMC can 10 | be exactly removed, using couplings of Markov chains. The technical report 11 | is available at https://arxiv.org/abs/1708.03625. 12 | License: GPL (>= 2) 13 | Encoding: UTF-8 14 | Depends: 15 | Rcpp,RcppEigen,lubridate 16 | Imports: 17 | Rcpp (>= 0.11.6),RcppEigen 18 | LinkingTo: Rcpp,RcppEigen 19 | RoxygenNote: 7.2.1 20 | Suggests: 21 | knitr, 22 | rmarkdown 23 | VignetteBuilder: knitr 24 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(H_bar) 4 | export(beta2e) 5 | export(c_chains_to_dataframe) 6 | export(c_chains_to_measure_as_list) 7 | export(coupled_pairs01) 8 | export(create_mids) 9 | export(cut_in_fifth) 10 | export(dinversegamma) 11 | export(dinvgaussian) 12 | export(estimator_bin) 13 | export(expit) 14 | export(fast_dmvnorm) 15 | export(fast_dmvnorm_chol_inverse) 16 | export(fast_rmvnorm) 17 | export(fast_rmvnorm_chol) 18 | export(find_breaks) 19 | export(get_blasso) 20 | export(get_max_coupling) 21 | export(get_mh_kernels) 22 | export(get_variableselection) 23 | export(histogram_c_chains) 24 | export(ising_coupled_kernel) 25 | export(ising_pt_coupled_kernel) 26 | export(ising_pt_rinit) 27 | export(ising_pt_single_kernel) 28 | export(ising_rinit) 29 | export(ising_single_kernel) 30 | export(logisticregression_m_and_sigma) 31 | export(logisticregression_m_function) 32 | export(logisticregression_precomputation) 33 | export(logisticregression_sigma) 34 | export(logisticregression_sigma_function) 35 | export(logisticregression_xbeta) 36 | export(pg_gibbs) 37 | export(plot_histogram) 38 | export(rgamma_coupled) 39 | export(rinversegamma) 40 | export(rinversegamma_coupled) 41 | export(rinvgaussian) 42 | export(rinvgaussian_coupled) 43 | export(rmvnorm_max) 44 | export(rmvnorm_max_chol) 45 | export(rmvnorm_reflectionmax) 46 | export(rnorm_max_coupling) 47 | export(rnorm_reflectionmax) 48 | export(sample_beta) 49 | export(sample_coupled_chains) 50 | export(sample_meetingtime) 51 | export(sample_unbiasedestimator) 52 | export(sample_w) 53 | export(setmytheme) 54 | export(w_max_coupling_caller) 55 | export(w_rejsampler) 56 | export(w_rejsampler_caller) 57 | importFrom(Rcpp,sourceCpp) 58 | useDynLib(unbiasedmcmc) 59 | -------------------------------------------------------------------------------- /R/H_bar.R: -------------------------------------------------------------------------------- 1 | #'@rdname H_bar 2 | #'@title Compute unbiased estimators from coupled chains 3 | #'@description Compute unbiased estimators based on coupled chains. 4 | #' Presumably generated via \code{\link{sample_coupled_chains}}. 5 | #' 6 | #' The test function h should take a "chain_state" as argument and return a numeric vector. 7 | #' The estimand of interest is \eqn{\int h(x) \pi(x) dx}, where \eqn{\pi} is the invariant distribution 8 | #' of the chains. 9 | #' 10 | #' The lag is inferred from the coupled chains, so there's no argument to specify it. 11 | #'@param c_chains A list containing coupled chains generated by \code{\link{sample_coupled_chains}}. 12 | #'@param h A test function of interest, which should take a chain state ("chain_state" entry of the output of "rinit", for instance) 13 | #'and return a numeric vector 14 | #'@param k An integer at which to start computing the unbiased estimator; should be less than m 15 | #'@param m A time horizon: should be less than the length of the chains; typically the same 16 | #'m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value 17 | #'@return A value (or vector of values) of an unbiased estimator of \eqn{\int h(x) \pi(x) dx} 18 | #'@export 19 | H_bar <- function(c_chains, h = function(x) x, k = 0, m = 1){ 20 | maxiter <- c_chains$iteration 21 | if (k > maxiter){ 22 | print("error: k has to be less than the horizon of the coupled chains") 23 | return(NULL) 24 | } 25 | if (m > maxiter){ 26 | print("error: m has to be less than the horizon of the coupled chains") 27 | return(NULL) 28 | } 29 | # infer the lag from the number of rows in samples1 and samples2 30 | lag <- dim(c_chains$samples1)[1] - dim(c_chains$samples2)[1] 31 | # test the dimension of h(X) 32 | # p <- length(h(c_chains$samples1[1,])) 33 | h_of_chain <- apply(X = c_chains$samples1[(k+1):(m+1),,drop=F], MARGIN = 1, FUN = h) 34 | if (is.null(dim(h_of_chain))){ 35 | h_of_chain <- matrix(h_of_chain, ncol = 1) 36 | } else { 37 | h_of_chain <- t(h_of_chain) 38 | } 39 | H_bar <- apply(X = h_of_chain, MARGIN = 2, sum) 40 | # next, add bias correction terms 41 | # Delta_t refers to h(X_t) - h(Y_{t-lag}) 42 | if (c_chains$meetingtime <= k + lag){ 43 | # nothing else to add, because Delta_t = 0 for all t >= meeting time 44 | } else { 45 | for (time in (k+lag):(c_chains$meetingtime-1)){ 46 | # time is the index t of X_{t} where the chain start from X_{0} 47 | coefficient_t <- (floor((time-k) / lag) - ceiling(max(lag, time-m)/lag) + 1) 48 | Delta_t <- h(c_chains$samples1[time+1,]) - h(c_chains$samples2[time-lag+1,]) 49 | H_bar <- H_bar + coefficient_t * Delta_t 50 | } 51 | } 52 | # return result divided by number of terms i.e. m - k + 1 53 | return(H_bar / (m - k + 1)) 54 | } 55 | -------------------------------------------------------------------------------- /R/bayesianlasso.R: -------------------------------------------------------------------------------- 1 | #' Y and X need to be matrices, and lambda non-negative 2 | #'@export 3 | get_blasso <- function(Y, X, lambda){ 4 | p <- ncol(X) 5 | n <- nrow(X) 6 | XtX <- t(X) %*% X 7 | XtY <- t(X) %*% Y 8 | alpha1 <- (n-1)/2 + p/2 9 | lambda2 <- lambda^2 10 | # naive initialization 11 | rinit <- function(){ 12 | return(list(chain_state = c(rep(0, p), rep(1, p), 1))) 13 | } 14 | # 15 | gibbs_kernel <- function(state){ 16 | beta <- state$chain_state[1:p] 17 | tau2 <- state$chain_state[(p+1):(2*p)] 18 | sigma2 <- state$chain_state[2*p+1] 19 | res_ <- unbiasedmcmc:::blassoconditional(Y, X, XtY, XtX, tau2, sigma2) 20 | beta <- res_$beta 21 | norm <- res_$norm 22 | betaDbeta <- res_$betaDbeta 23 | sigma2 <- rinversegamma(1, alpha1, 0.5 * (norm + betaDbeta)) 24 | # update tau 25 | sqrtlambda2sigma2 <- sqrt(lambda2 * sigma2) 26 | for (component in 1:p){ 27 | tau2[component] <- 1 / rinvgaussian(1, sqrtlambda2sigma2 / abs(beta[component]), lambda2) 28 | } 29 | return(list(chain_state = c(beta, tau2, sigma2))) 30 | } 31 | 32 | coupled_gibbs_kernel <- function(state1, state2){ 33 | beta1 <- state1$chain_state[1:p] 34 | tau21 <- state1$chain_state[(p+1):(2*p)] 35 | sigma21 <- state1$chain_state[2*p+1] 36 | beta2 <- state2$chain_state[1:p] 37 | tau22 <- state2$chain_state[(p+1):(2*p)] 38 | sigma22 <- state2$chain_state[2*p+1] 39 | # 40 | if (all(tau21 == tau22) && all(sigma21 == sigma22)){ 41 | res_ <- unbiasedmcmc:::blassoconditional(Y, X, XtY, XtX, tau21, sigma21) 42 | beta1 <- res_$beta 43 | norm1 <- res_$norm 44 | betaDbeta1 <- res_$betaDbeta 45 | beta2 <- beta1 46 | norm2 <- norm1 47 | betaDbeta2 <- betaDbeta1 48 | } else { 49 | res_ <- unbiasedmcmc:::blassoconditional_coupled(Y, X, XtY, XtX, tau21, tau22, sigma21, sigma22) 50 | beta1 <- res_$beta1 51 | norm1 <- res_$norm1 52 | betaDbeta1 <- res_$betaDbeta1 53 | beta2 <- res_$beta2 54 | norm2 <- res_$norm2 55 | betaDbeta2 <- res_$betaDbeta2 56 | } 57 | sigma2s <- rinversegamma_coupled(alpha1, alpha1, 58 | 0.5 * (norm1 + betaDbeta1), 59 | 0.5 * (norm2 + betaDbeta2)) 60 | sigma21 <- sigma2s$xy[1] 61 | sigma22 <- sigma2s$xy[2] 62 | # update tau 63 | sqrtlambda2sigma21 <- sqrt(lambda2 * sigma21) 64 | sqrtlambda2sigma22 <- sqrt(lambda2 * sigma22) 65 | for (component in 1:p){ 66 | if (sigma21 == sigma22 && beta1[component] == beta2[component]){ 67 | invtau2 <- rinvgaussian(1, sqrtlambda2sigma21/ abs(beta1[component]), lambda2) 68 | tau21[component] <- 1 / invtau2 69 | tau22[component] <- tau21[component] 70 | } else { 71 | invtau2s <- rinvgaussian_coupled(sqrtlambda2sigma21 / abs(beta1[component]), 72 | sqrtlambda2sigma22 / abs(beta2[component]), 73 | lambda2, lambda2) 74 | tau21[component] <- 1 / invtau2s[1] 75 | tau22[component] <- 1 / invtau2s[2] 76 | } 77 | } 78 | chain_state1 <- c(beta1, tau21, sigma21) 79 | chain_state2 <- c(beta2, tau22, sigma22) 80 | identical_ <- all(chain_state1 == chain_state2) 81 | return(list(state1 = list(chain_state = chain_state1), 82 | state2 = list(chain_state = chain_state2), 83 | identical = identical_)) 84 | } 85 | return(list(rinit = rinit, gibbs_kernel = gibbs_kernel, coupled_gibbs_kernel = coupled_gibbs_kernel)) 86 | } 87 | -------------------------------------------------------------------------------- /R/c_chains_to_dataframe.R: -------------------------------------------------------------------------------- 1 | #'@rdname c_chains_to_dataframe 2 | #'@title Obtain data frame representation of measure from list of coupled chains 3 | #'@description From coupled chains, 4 | #' presumably generated via \code{\link{sample_coupled_chains}}, 5 | #' and a choice of integers k and m, the function constructs 6 | #' a data frame representation of an empirical signed measure, i.e. 7 | #' 8 | #' \deqn{\hat{\pi}(dx) = \sum_{n=1}^N \omega_n \delta_{Z_n}(dx)} 9 | #' 10 | #' The function returns a data frame with first column "rep" indicating index of coupled chain, 11 | #' second column "MCMC" indicating whether atom is part of the "MCMC" part of the signed measure (1) or the bias correction part (0) 12 | #' third column "weight" indicating weights, 13 | #' remaining columns "atom.1", "atom.2", etc containing components of the atoms 14 | #' 15 | #'@param c_chains A list containing coupled chains generated by \code{\link{sample_coupled_chains}}. 16 | #'@param k An integer at which to start computing the signed measure; should be less than m 17 | #'@param m A time horizon: should be less than the length of the chains; typically the same 18 | #'m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value 19 | #'@param dopar Boolean (default to FALSE) indicating whether to perform calculation using registered parallel cores 20 | #'@return A data frame 21 | #'@export 22 | c_chains_to_dataframe <- function(c_chains, k, m, dopar = FALSE, prune = TRUE){ 23 | if ("meetingtime" %in% names(c_chains)){ 24 | # the user supplied a coupled chain, rather than a list of coupled chain 25 | c_chains <- list(c_chains) 26 | } 27 | # number of coupled chains 28 | nsamples <- length(c_chains) 29 | approximation <- NULL 30 | if (dopar){ 31 | # get signed measure representation of each coupled chain 32 | approximation <- foreach(irep = 1:nsamples, .combine = rbind) %dopar% { 33 | ms_irep <- c_chains_to_measure_as_list(c_chains[[irep]], k, m) 34 | data.frame(rep = rep(irep, length(ms_irep$weights)), MCMC = ms_irep$MCMC, weight = ms_irep$weights, atom = ms_irep$atoms) 35 | } 36 | } else { 37 | approximation <- data.frame() 38 | for (irep in 1:nsamples){ 39 | ms_irep <- c_chains_to_measure_as_list(c_chains[[irep]], k, m) 40 | approximation <- rbind(approximation, 41 | data.frame(rep = rep(irep, length(ms_irep$weights)), MCMC = ms_irep$MCMC, weight = ms_irep$weights, atom = ms_irep$atoms)) 42 | } 43 | } 44 | # normalize the weights 45 | approximation$weight <- approximation$weight / nsamples 46 | # prune identical atoms 47 | if (prune){ 48 | approximation <- data.frame(prune_measure_(as.matrix(approximation[do.call(order, as.list(approximation[,c(1,2,4:ncol(approximation)),drop=F])),]))) 49 | } 50 | names(approximation) <- c("rep", "MCMC", "weight", paste0("atom.", 1:(ncol(approximation)-3))) 51 | approximation <- approximation[do.call(order, as.list(approximation[,1:2])),] 52 | return(approximation) 53 | } 54 | -------------------------------------------------------------------------------- /R/c_chains_to_measure_as_list.R: -------------------------------------------------------------------------------- 1 | #' @rdname c_chains_to_measure_as_list 2 | #' @title Obtain empirical measure (as list) from coupled chains 3 | #' @description From coupled chains, 4 | #' presumably generated via \code{\link{sample_coupled_chains}}, 5 | #' and a choice of integers k and m, the function constructs 6 | #' a representation of an empirical signed measure, i.e. 7 | #' 8 | #' \deqn{\hat{\pi}(dx) = \sum_{n=1}^N \omega_n \delta_{Z_n}(dx)} 9 | #' 10 | #' The function returns the weights \eqn{\omega} and the atoms \eqn{Z}, 11 | #' in a list with two entries, "weights" and "atoms". 12 | #' 13 | #' @param c_chains A list containing coupled chains generated by \code{\link{sample_coupled_chains}}. 14 | #' @param k An integer at which to start computing the signed measure; should be less than m 15 | #' @param m A time horizon: should be less than the length of the chains; typically the same 16 | #' m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value 17 | #' @return A list with "weights" and "atoms" 18 | #'@export 19 | c_chains_to_measure_as_list <- function(c_chains, k, m){ 20 | if (k > m){ 21 | stop("k must be <= than m") 22 | } 23 | if (m > c_chains$iteration){ 24 | stop("m has to be less than the time horizon of the coupled chains") 25 | } 26 | return(c_chains_to_measure_as_list_(c_chains, k, m)) 27 | } 28 | -------------------------------------------------------------------------------- /R/coupled_chains.R: -------------------------------------------------------------------------------- 1 | #'@rdname sample_coupled_chains 2 | #'@title Sample coupled Markov chains 3 | #'@description Sample two Markov chains, each following 'single_kernel' marginally, 4 | #' and 'coupled_kernel' jointly, until min(max(tau, m), max_iterations), where tau 5 | #' is the first time the two chains meet (the "meeting time"). 6 | #' 7 | #' Or more precisely, they meet with a delay of lag, i.e. X_t = Y_{t-lag}, and lag is one by default. 8 | #' 9 | #' Once the coupled chains are obtained, unbiased estimators can be computed for arbitrary test 10 | #' functions via the function \code{\link{H_bar}}. 11 | #' 12 | #' If you're only interested in sampling meeting times, see \code{\link{sample_meetingtime}}. 13 | #' 14 | #' 15 | #'@param single_kernel A list taking a state and returning a state, performing one step of a Markov kernel 16 | #'@param coupled_kernel A list taking two states and returning two states, performing one step of a coupled Markov kernel; 17 | #'it also returns a boolean "identical" indicating whether the two states are identical. 18 | #'@param rinit A list representing the initial state of the chain, that can be given to 'single_kernel' 19 | #'@param m A time horizon: the chains are sampled until the maximum between m and the meeting time 20 | #'@param lag A time lag, equal to one by default 21 | #'@param max_iterations A maximum number of iterations, at which to interrup the while loop; Inf by default 22 | #'@param preallocate A number of anticipated iterations, used to pre-allocate memory; 10 by default 23 | #'@return A list with 24 | #'\itemize{ 25 | #' 26 | #'\item samples1: the first chain, of length max(m, tau) 27 | #' 28 | #'\item samples2: the second chain, of length max(m, tau) - lag 29 | #' 30 | #'\item meetingtime: the meeting time; equal to Inf if while loop was interrupted 31 | #' 32 | #'\item iteration: final iteration; could be equal to m, to meetingtime, or to max_iterations 33 | #' 34 | #'\item elapsedtime: elapsed wall-clock time, in seconds 35 | #' 36 | #'\item cost: computing cost in terms of calls to Markov kernels (counting coupled kernel as twice the cost) 37 | #'} 38 | #'@export 39 | sample_coupled_chains <- function(single_kernel, coupled_kernel, rinit, m = 1, lag = 1, max_iterations = Inf, preallocate = 10){ 40 | starttime <- Sys.time() 41 | state1 <- rinit(); state2 <- rinit() 42 | dimstate <- length(state1$chain_state) 43 | nrowsamples1 <- m+preallocate+lag 44 | samples1 <- matrix(nrow = nrowsamples1, ncol = dimstate) 45 | samples2 <- matrix(nrow = nrowsamples1-lag, ncol = dimstate) 46 | samples1[1,] <- state1$chain_state 47 | samples2[1,] <- state2$chain_state 48 | # current_nsamples1 <- 1 49 | time <- 0 50 | for (t in 1:lag){ 51 | time <- time + 1 52 | state1 <- single_kernel(state1) 53 | samples1[time+1,] <- state1$chain_state 54 | } 55 | # current_nsamples1 <- current_nsamples1 + 1 56 | # iter <- 1 57 | meetingtime <- Inf 58 | while ((time < max(meetingtime, m)) && (time < max_iterations)){ 59 | time <- time + 1 # time is lag+1,lag+2,... 60 | if (is.finite(meetingtime)){ 61 | state1 <- single_kernel(state1) 62 | state2 <- state1 63 | } else { 64 | res_coupled_kernel <- coupled_kernel(state1, state2) 65 | state1 <- res_coupled_kernel$state1 66 | state2 <- res_coupled_kernel$state2 67 | if (res_coupled_kernel$identical){ 68 | meetingtime <- time 69 | } 70 | } 71 | if ((time+1) > nrowsamples1){ 72 | new_rows <- nrowsamples1 73 | nrowsamples1 <- nrowsamples1 + new_rows 74 | samples1 <- rbind(samples1, matrix(NA, nrow = new_rows, ncol = dimstate)) 75 | samples2 <- rbind(samples2, matrix(NA, nrow = new_rows, ncol = dimstate)) 76 | } 77 | samples1[time+1,] <- state1$chain_state 78 | samples2[time-lag+1,] <- state2$chain_state 79 | } 80 | samples1 <- samples1[1:(time+1),,drop=F] 81 | samples2 <- samples2[1:(time-lag+1),,drop=F] 82 | cost <- lag + 2*(meetingtime - lag) + max(0, time - meetingtime) 83 | currenttime <- Sys.time() 84 | elapsedtime <- as.numeric(lubridate::as.duration(lubridate::ymd_hms(currenttime) - lubridate::ymd_hms(starttime)), "seconds") 85 | return(list(samples1 = samples1, samples2 = samples2, 86 | meetingtime = meetingtime, iteration = time, elapsedtime = elapsedtime, cost = cost)) 87 | } 88 | 89 | -------------------------------------------------------------------------------- /R/coupled_pairs01.R: -------------------------------------------------------------------------------- 1 | # jointly sample ones and zeros 2 | #'@export 3 | coupled_pairs01 <- function(selection1, selection2){ 4 | p <- length(selection1) 5 | # jointly samples ones 6 | s1 <- sum(selection1) 7 | s2 <- sum(selection2) 8 | w11 <- selection1 / s1 9 | w12 <- selection2 / s2 10 | pmin1 <- pmin(w11, w12) 11 | alpha1 <- sum(pmin1) 12 | minor1 <- pmin1 / alpha1 13 | residual1 <- (w11 - pmin1) / (1 - alpha1) 14 | residual2 <- (w12 - pmin1) / (1 - alpha1) 15 | ind1 <- c(NA, NA) 16 | if (runif(1) < alpha1){ 17 | x <- sample(x = 1:p, size = 1, prob = minor1) 18 | ind1 <- c(x,x) 19 | } else { 20 | ind1 <- c(sample(x = 1:p, size = 1, prob = residual1), 21 | sample(x = 1:p, size = 1, prob = residual2)) 22 | } 23 | # jointly samples zeros 24 | w01 <- (1 - selection1) / (p - s1) 25 | w02 <- (1 - selection2) / (p - s2) 26 | pmin0 <- pmin(w01, w02) 27 | alpha0 <- sum(pmin0) 28 | minor0 <- pmin0 / alpha0 29 | residual1 <- (w01 - pmin0) / (1 - alpha0) 30 | residual2 <- (w02 - pmin0) / (1 - alpha0) 31 | ind0 <- c(NA, NA) 32 | if (runif(1) < alpha0){ 33 | x <- sample(x = 1:p, size = 1, prob = minor0) 34 | ind0 <- c(x,x) 35 | } else { 36 | ind0 <- c(sample(x = 1:p, size = 1, prob = residual1), 37 | sample(x = 1:p, size = 1, prob = residual2)) 38 | } 39 | return(c(ind0, ind1)) 40 | } 41 | -------------------------------------------------------------------------------- /R/gamma_couplings.R: -------------------------------------------------------------------------------- 1 | #'@rdname rgamma_coupled 2 | #'@title Sample from maximally coupled Gamma variables 3 | #'@description Draws a pair of variables, respectively Gamma(alpha1, beta1) and Gamma(alpha2, beta2) 4 | #'where the parametrization is that beta is the rate, i.e. the log-pdf of Gamma(alpha,beta) evaluated at x is 5 | #' \deqn{\alpha * log(\beta) - lgamma(\alpha) + (\alpha-1) * log(x) - \beta x} 6 | #' where \eqn{lgamma} stands for the logarithm of the Gamma function. 7 | #'@param alpha1 First shape 8 | #'@param alpha2 Second shape 9 | #'@param beta1 First rate 10 | #'@param beta2 Second rate 11 | #'@return A list with entry 'xy' for the pair of values, and boolean 'identical' indicating whether the two values 12 | #'are identical. 13 | #'@export 14 | rgamma_coupled <- function(alpha1, alpha2, beta1, beta2){ 15 | f <- get_max_coupling(function(n) rgamma(n, alpha1, beta1), 16 | function(x) dgamma(x, alpha1, beta1, log = TRUE), 17 | function(n) rgamma(n, alpha2, beta2), 18 | function(x) dgamma(x, alpha2, beta2, log = TRUE)) 19 | return(f()) 20 | } 21 | -------------------------------------------------------------------------------- /R/get_max_coupling.R: -------------------------------------------------------------------------------- 1 | #'@rdname get_max_coupling 2 | #'@title Sample from maximal coupling of two distributions p and q 3 | #'@description Takes two univariate continuous distributions (specified by random number generator and log-pdf function), 4 | #' and returns a function to sample from a maximal coupling of these two distributions. 5 | #'@param rp A function taking n as an argument and returning n samples from the distribution p 6 | #'@param dp A function taking x as an argument and returning log-pdf of p evaluated at x 7 | #'@param rq A function taking n as an argument and returning n samples from the distribution q 8 | #'@param dq A function taking x as an argument and returning log-pdf of q evaluated at x 9 | #'@return Returns a list with 10 | #' 11 | #' \itemize{ 12 | #' \item "xy": the pair of samples \eqn{(x,y)} 13 | #' 14 | #' \item "identical": TRUE if \eqn{x = y}, FALSE otherwise 15 | #' } 16 | #'@examples 17 | #' mu1 <- 0; mu2 <- 1; sigma1 <- 0.5; sigma2 <- 1.2 18 | #' f <- get_max_coupling(function(n) rnorm(n, mu1, sigma1), 19 | #' function(x) dnorm(x, mu1, sigma1, log = TRUE), 20 | #' function(n) rnorm(n, mu2, sigma2), 21 | #' function(x) dnorm(x, mu2, sigma2, log = TRUE)) 22 | #' f() 23 | #'@export 24 | get_max_coupling <- function(rp, dp, rq, dq){ 25 | function(){ 26 | x <- rp(1) 27 | if (dp(x) + log(runif(1)) < dq(x)){ 28 | return(list(xy = c(x,x), identical = TRUE)) 29 | } else { 30 | reject <- TRUE 31 | y <- NA 32 | while (reject){ 33 | y <- rq(1) 34 | reject <- (dq(y) + log(runif(1)) < dp(y)) 35 | } 36 | return(list(xy = c(x,y), identical = FALSE)) 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /R/get_mh_kernels.R: -------------------------------------------------------------------------------- 1 | #'@rdname get_mh_kernel 2 | #'@title Get random walk Metropolis-Hastings kernels 3 | #'@description This function takes a target (specified through its log-pdf) 4 | #' and a covariance matrix for a Normal random walk proposal, and returns a list containing the keys 5 | #' \code{single_kernel}, \code{coupled_kernel} corresponding to marginal 6 | #' and coupled MH kernels. 7 | #' 8 | #' The coupling is done by reflection-maximal coupling of the proposals, 9 | #' and common uniform variable for the accept/reject step. For reflection-maximal 10 | #' couplings, see \code{\link{rnorm_reflectionmax}} and \code{\link{rmvnorm_reflectionmax}}. 11 | #' 12 | #' The returned kernels can then be used in the functions \code{\link{sample_meetingtime}} or 13 | #' \code{\link{sample_coupled_chains}} or \code{\link{sample_unbiasedestimator}}. 14 | #' 15 | #'@param target function taking a vector as input and returning target log-density evaluation 16 | #'@param Sigma_proposal covariance of the Normal random walk proposal 17 | #'@return A list containing the keys 18 | #' \code{single_kernel}, \code{coupled_kernel}. 19 | #'@export 20 | get_mh_kernels <- function(target, Sigma_proposal){ 21 | if (is.null(dim(Sigma_proposal))){ 22 | Sigma_proposal <- matrix(Sigma_proposal) 23 | } 24 | dimension <- dim(Sigma_proposal)[1] 25 | Sigma_proposal_chol <- chol(Sigma_proposal) 26 | Sigma_proposal_chol_inv <- solve(chol(Sigma_proposal)) 27 | zeromean <- rep(0, dimension) 28 | # single kernel 29 | single_kernel <- function(state){ 30 | chain_state <- state$chain_state 31 | current_pdf <- state$current_pdf 32 | proposal_value <- fast_rmvnorm_chol(1, chain_state, Sigma_proposal_chol) 33 | proposal_pdf <- target(proposal_value) 34 | accept <- (log(runif(1)) < (proposal_pdf - current_pdf)) 35 | if (accept){ 36 | return(list(chain_state = proposal_value, current_pdf = proposal_pdf)) 37 | } else { 38 | return(list(chain_state = chain_state, current_pdf = current_pdf)) 39 | } 40 | } 41 | # coupled kernel 42 | coupled_kernel <- function(state1, state2){ 43 | chain_state1 <- state1$chain_state; current_pdf1 <- state1$current_pdf 44 | chain_state2 <- state2$chain_state; current_pdf2 <- state2$current_pdf 45 | if (dimension == 1){ 46 | proposal_value <- rnorm_reflectionmax(chain_state1, chain_state2, Sigma_proposal_chol[1,1]) 47 | proposal_value$xy <- matrix(proposal_value$xy, nrow = 1) 48 | } else { 49 | proposal_value <- rmvnorm_reflectionmax(chain_state1, chain_state2, Sigma_proposal_chol, Sigma_proposal_chol_inv) 50 | } 51 | proposal1 <- proposal_value$xy[,1]; proposal_pdf1 <- target(proposal1) 52 | if (proposal_value$identical){ 53 | proposal2 <- proposal1; proposal_pdf2 <- proposal_pdf1 54 | } else { 55 | proposal2 <- proposal_value$xy[,2]; proposal_pdf2 <- target(proposal2) 56 | } 57 | logu <- log(runif(1)) 58 | accept1 <- FALSE; accept2 <- FALSE 59 | if (is.finite(proposal_pdf1)){ 60 | accept1 <- (logu < (proposal_pdf1 - current_pdf1)) 61 | } 62 | if (is.finite(proposal_pdf2)){ 63 | accept2 <- (logu < (proposal_pdf2 - current_pdf2)) 64 | } 65 | if (accept1){ 66 | chain_state1 <- proposal1 67 | current_pdf1 <- proposal_pdf1 68 | } 69 | if (accept2){ 70 | chain_state2 <- proposal2 71 | current_pdf2 <- proposal_pdf2 72 | } 73 | identical_ <- proposal_value$identical && accept1 && accept2 74 | return(list(state1 = list(chain_state = chain_state1, current_pdf = current_pdf1), 75 | state2 = list(chain_state = chain_state2, current_pdf = current_pdf2), 76 | identical = identical_)) 77 | } 78 | return(list(single_kernel = single_kernel, coupled_kernel = coupled_kernel)) 79 | } 80 | -------------------------------------------------------------------------------- /R/histogram_c_chains.R: -------------------------------------------------------------------------------- 1 | ## create histogram 2 | #'@rdname histogram_c_chains 3 | #'@title histogram_c_chains 4 | #'@description Compute histogram approximations of marginal distributions based on coupled Markov chains 5 | #'@param c_chains A list of coupled chains, each as produced by \code{\link{sample_coupled_chains}} 6 | #'@param component An integer specifying which marginal to approximate 7 | #'@param k An integer (see \code{\link{H_bar}}) 8 | #'@param m Another integer (see \code{\link{H_bar}}) 9 | #'@param breaks A vector indicating how to bin the space (optional) 10 | #'@param nclass An integer specifying the number of bins to aim for, if "breaks" is not specified 11 | #'@param dopar A boolean indicating whether to parallelize the computation (requires doParallel and having registed parallel cores) 12 | #'@export 13 | histogram_c_chains <- function(c_chains, component, k, m, breaks = NULL, nclass = 30, dopar = FALSE){ 14 | nsamples <- length(c_chains) 15 | lag <- dim(c_chains[[1]]$samples1)[1] - dim(c_chains[[1]]$samples2)[1] 16 | if (is.null(breaks)){ 17 | breaks <- find_breaks(c_chains, component, nclass, k = k, m = m, lag = lag) 18 | } 19 | mids <- create_mids(breaks) 20 | width <- diff(breaks)[1] 21 | ### compute histogram 22 | res_ <- NULL 23 | if (dopar){ 24 | res_ <- foreach (ibreak = 2:length(breaks), .combine = rbind) %dopar% { 25 | estimators <- rep(0, nsamples) 26 | for (irep in 1:nsamples){ 27 | estimators[irep] <- estimator_bin(c_chains[[irep]], component, breaks[ibreak-1], breaks[ibreak], k, m, lag) 28 | } 29 | prop <- mean(estimators) 30 | sd_prop <- sd(estimators) / sqrt(nsamples) 31 | c(prop, sd_prop) 32 | } 33 | } else { 34 | res_ <- matrix(0, nrow = length(breaks)-1, ncol = 2) 35 | for (ibreak in 2:length(breaks)){ 36 | estimators <- rep(0, nsamples) 37 | for (irep in 1:nsamples){ 38 | estimators[irep] <- estimator_bin(c_chains[[irep]], component, breaks[ibreak-1], breaks[ibreak], k, m, lag) 39 | } 40 | prop <- mean(estimators) 41 | sd_prop <- sd(estimators) / sqrt(nsamples) 42 | res_[ibreak-1,] <- c(prop, sd_prop) 43 | } 44 | } 45 | prop <- res_[,1] 46 | sd_prop <- res_[,2] 47 | return(list(mids = mids, breaks = breaks, proportions = prop, sd = sd_prop, width = width)) 48 | } 49 | 50 | ## plot the result of the histogram_c_chains function 51 | #'@export 52 | plot_histogram <- function(histogram, with_bar = TRUE){ 53 | df_ <- data.frame(xmin = histogram$mids - histogram$width/2, 54 | xmax = histogram$mids + histogram$width/2, 55 | x = histogram$mids, 56 | y = histogram$proportions / histogram$width, 57 | ymin = (histogram$proportions - 2 * histogram$sd) / histogram$width, 58 | ymax = (histogram$proportions + 2 * histogram$sd) / histogram$width) 59 | 60 | g <- ggplot(df_, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax)) + geom_rect(alpha = 0.5) 61 | if (with_bar){ 62 | g <- g + geom_segment(aes(x = x, xend = x, y = 0, yend = y)) + ylab("density") 63 | } 64 | return(g) 65 | } 66 | 67 | ## create breaks based on list of c_chains 68 | #'@export 69 | find_breaks <- function(c_chains, component, nclass, k, m, lag){ 70 | all_samples <- unlist(sapply(c_chains, function(x) x$samples1[(k+1):(m+1),component])) 71 | all_samples <- c(all_samples, unlist(sapply(c_chains, function(x) x$samples2[(k):(m+1-lag),component]))) 72 | br <- hist(all_samples, plot=F, nclass = nclass)$breaks 73 | return(br) 74 | } 75 | 76 | #'@export 77 | create_mids <- function(breaks){ 78 | mids <- c() 79 | for (i in 2:length(breaks)){ 80 | mids <- c(mids, breaks[i-1] + (breaks[i] - breaks[i-1])/2) 81 | } 82 | return(mids) 83 | } 84 | ## compute estimator of component being in [lower, upper] 85 | #'@export 86 | estimator_bin <- function(c_chains, component, lower, upper, k, m, lag){ 87 | return(estimator_bin_(c_chains, component, lower, upper, k, m, lag)) 88 | } 89 | -------------------------------------------------------------------------------- /R/invgamma_couplings.R: -------------------------------------------------------------------------------- 1 | #'@rdname dinversegamma 2 | #'@title compute log-density of inverse gamma 3 | #'@description at x > 0 and with given parameters alpha, beta, given by 4 | #' alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x 5 | #'@export 6 | dinversegamma <- function(x, alpha, beta){ 7 | return(alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x) 8 | } 9 | 10 | #'@rdname rinversegamma 11 | #'@title Sample from inverse gamma 12 | #'@description with given parameters alpha, beta, with log-density given by 13 | #' alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x 14 | #'@export 15 | rinversegamma <- function(n, alpha, beta){ 16 | return(1/rgamma(n = n, shape = alpha, rate = beta)) 17 | } 18 | 19 | #'@rdname rinversegamma_coupled 20 | #'@title Sample from maximally coupled inverse gamma 21 | #'@description with given parameters alpha1, alpha2, beta1, beta2, 22 | #'where the parametrization is that the log-density of IG(alpha, beta) is 23 | #' alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x 24 | #'@export 25 | rinversegamma_coupled <- function(alpha1, alpha2, beta1, beta2){ 26 | f <- get_max_coupling(function(n) rinversegamma(n, alpha1, beta1), 27 | function(x) dinversegamma(x, alpha1, beta1), 28 | function(n) rinversegamma(n, alpha2, beta2), 29 | function(x) dinversegamma(x, alpha2, beta2)) 30 | return(f()) 31 | } 32 | -------------------------------------------------------------------------------- /R/invgaussian_couplings.R: -------------------------------------------------------------------------------- 1 | #'@rdname dinvgaussian 2 | #'@title Log-density of inverse Gaussian 3 | #'@description Computes log-pdf at x > 0 of inverse Gaussian with given parameters mu, lambda, given by 4 | #' \deqn{0.5 * log(\lambda/(2*\pi)) - 1.5 * log(x) - \lambda * (x-\mu)^2 / (2 * \mu^2 * x)} 5 | #'@return A vector of n log-pdf values, one for each element in the first argument 'x'. 6 | #'@export 7 | dinvgaussian <- function(x, mu, lambda){ 8 | return(0.5 * log(lambda/(2*pi)) - 1.5 * log(x) - lambda * (x-mu)^2 / (2 * mu^2 * x)) 9 | } 10 | 11 | #'@rdname rinvgaussian 12 | #'@title Sample from inverse Gaussian 13 | #'@description Parametrized by mu, lambda, with log-density given by 14 | #' \deqn{0.5 * log(\lambda/(2*\pi)) - 1.5 * log(x) - \lambda * (x-\mu)^2 / (2 * \mu^2 * x)} 15 | #' 16 | #' The procedure goes as follows. 17 | #' 18 | #' \itemize{ 19 | #' \item Generate nu ~ Normal(0,1). 20 | #' \item Define y = nu^2. 21 | #' \item Define x = mu + mu^2 * y / (2 * lambda) - mu / (2 * lambda) * sqrt(4 * mu * lambda * y + mu^2 * y^2). 22 | #' \item Generate Z ~ Uniform(0,1). 23 | #' \item If z <= mu / (mu + x), output x, otherwise output mu^2 / x. 24 | #' } 25 | #'@return A vector of n draws, where n is the first argument. 26 | #'@export 27 | rinvgaussian <- function(n, mu, lambda){ 28 | return(rinvgaussian_c(n, mu, lambda)) 29 | } 30 | 31 | #'@rdname rinvgaussian_coupled 32 | #'@title Sample from maximally coupled inverse Gaussian 33 | #'@description with parameters mu1, mu2, lambda1, lambda2; see \code{\link{rinvgaussian}}. 34 | #'@return A pair of values in a vector of size two. 35 | #'@export 36 | rinvgaussian_coupled <- function(mu1, mu2, lambda1, lambda2){ 37 | rinvgaussian_coupled_c(mu1, mu2, lambda1, lambda2) 38 | } 39 | -------------------------------------------------------------------------------- /R/meetingtime.R: -------------------------------------------------------------------------------- 1 | #'@rdname sample_meetingtime 2 | #'@title Sample coupled Markov chains until meeting 3 | #'@description Sample two Markov chains, each following 'single_kernel' marginally, 4 | #'until they meet, and report the meeting time, as well as the elapsed wall-clock time in seconds. 5 | #' 6 | #' This function does not record the trajectories of the chains, with the goal of being memory-light. 7 | #' To record these trajectories, see \code{\link{sample_coupled_chains}}. To directly 8 | #' compute unbiased estimators on the fly, see \code{\link{sample_unbiasedestimator}}. 9 | #' 10 | #'@param single_kernel A list taking a state and returning a state, performing one step of a Markov kernel 11 | #'@param coupled_kernel A list taking two states and returning two states, performing one step of a coupled Markov kernel; 12 | #'it also returns a boolean "identical" indicating whether the two states are identical. 13 | #'@param rinit A list representing the initial state of the chain, that can be given to 'single_kernel' 14 | #'@param lag A time lag, equal to one by default 15 | #'@param max_iterations A maximum number of iterations, at which to interrup the while loop; Inf by default 16 | #'@return A list with 17 | #'\itemize{ 18 | #' 19 | #'\item meetingtime: the meeting time; equal to Inf if while loop was interrupted 20 | #' 21 | #'\item elapsedtime: elapsed wall-clock time, in seconds 22 | #'} 23 | #'@export 24 | sample_meetingtime <- function(single_kernel, coupled_kernel, rinit, lag = 1, max_iterations = Inf){ 25 | starttime <- Sys.time() 26 | # initialize 27 | state1 <- rinit(); state2 <- rinit() 28 | # move first chain 29 | time <- 0 30 | for (t in 1:lag){ 31 | time <- time + 1 32 | state1 <- single_kernel(state1) 33 | } 34 | # move two chains until meeting (or until max_iterations) 35 | meetingtime <- Inf 36 | while (is.infinite(meetingtime) && (time < max_iterations)){ 37 | time <- time + 1 38 | # use coupled kernel 39 | coupledstates <- coupled_kernel(state1, state2) 40 | state1 <- coupledstates$state1 41 | state2 <- coupledstates$state2 42 | # check if meeting happens 43 | if (coupledstates$identical) meetingtime <- time 44 | } 45 | currentime <- Sys.time() 46 | elapsedtime <- as.numeric(lubridate::as.duration(lubridate::ymd_hms(currentime) - lubridate::ymd_hms(starttime)), "seconds") 47 | return(list(meetingtime = meetingtime, elapsedtime = elapsedtime)) 48 | } 49 | -------------------------------------------------------------------------------- /R/mvnorm.R: -------------------------------------------------------------------------------- 1 | #'@rdname fast_rmvnorm 2 | #'@title fast_rmvnorm 3 | #'@description Generate multivariate Normal draws. The function does not check 4 | #' the arguments, use at your own risk. 5 | #'@param n An integer >= 1 specifying the desired number of draws 6 | #'@param mean A vector of size d specifying the mean vector of the multivariate Normal 7 | #'@param covariance A matrix of size d x d specifying the covariance matrix of the multivariate Normal 8 | #'@return A matrix of size n x d containing n d-dimensional multivariate Normal draws (one per row) 9 | #'@examples 10 | #' fast_rmvnorm(2, rep(0, 5), diag(1, 5, 5)) 11 | #'@export 12 | fast_rmvnorm <- function(n, mean, covariance){ 13 | return(fast_rmvnorm_(n, mean, covariance)) 14 | } 15 | 16 | #'@rdname fast_rmvnorm_chol 17 | #'@title fast_rmvnorm_chol 18 | #'@description Generate multivariate Normal draws. The function does not check 19 | #' the arguments, use at your own risk. 20 | #'@param n An integer >= 1 specifying the desired number of draws 21 | #'@param mean A vector of size d specifying the mean vector of the multivariate Normal 22 | #'@param chol A matrix of size d x d specifying the upper triangular Cholesky factor 23 | #'of the covariance matrix of the multivariate Normal target, 24 | #'for instance obtained using the \code{\link[base]{chol}} 25 | #'function of R. 26 | #'@return A matrix of size n x d containing n d-dimensional multivariate Normal draws (one per row) 27 | #'@examples 28 | #' Sigma <- diag(1, 5, 5) 29 | #' Sigma[1,2] <- Sigma[2,1] <- 0.3 30 | #' fast_rmvnorm_chol(2, rep(0, 5), chol(Sigma)) 31 | #'@export 32 | fast_rmvnorm_chol <- function(nparticles, mean, chol){ 33 | return(fast_rmvnorm_cholesky_(nparticles, mean, chol)) 34 | } 35 | 36 | #'@rdname fast_dmvnorm 37 | #'@title fast_dmvnorm 38 | #'@description Compute multivariate Normal density (log-value) evaluated at each row of a given matrix. The function does not check 39 | #' the arguments, use at your own risk. 40 | #'@param x A matrix of size n times d 41 | #'@param mean A vector of size d specifying the mean vector of the multivariate Normal 42 | #'@param covariance A matrix of size d x d specifying the covariance matrix of the multivariate Normal 43 | #'@return A vector of n evaluations of the multivariate Normal log-pdf, one for each row of \code{x} 44 | #'@examples 45 | #'x <- fast_rmvnorm(2, rep(0, 5), diag(1,5,5)) 46 | #'fast_dmvnorm(x, rep(0, 5), diag(1,5,5)) 47 | #'@export 48 | fast_dmvnorm <- function(x, mean, covariance){ 49 | return(fast_dmvnorm_(x, mean, covariance)) 50 | } 51 | 52 | #'@rdname fast_dmvnorm_chol_inverse 53 | #'@title fast_dmvnorm_chol_inverse 54 | #'@description Compute multivariate Normal density (log-value) evaluated at each row of a given matrix. The function does not check 55 | #' the arguments, use at your own risk. 56 | #'@param x A matrix of size n times d 57 | #'@param mean A vector of size d specifying the mean vector of the multivariate Normal 58 | #'@param chol_inverse A matrix of size d x d specifying the inverse of the upper-triangular Cholesky 59 | #'factor of the covariance matrix of the multivariate Normal, 60 | #'for instance obtained using \code{solve(chol(Sigma))} 61 | #'@return A vector of n evaluations of the multivariate Normal log-pdf, one for each row of \code{x} 62 | #'@examples 63 | #' Sigma <- diag(1, 5, 5) 64 | #' Sigma[1,2] <- Sigma[2,1] <- 0.3 65 | #' Sigma_chol <- chol(Sigma) 66 | #' x <- fast_rmvnorm_chol(2, rep(0, 5), Sigma_chol) 67 | #' fast_dmvnorm_chol_inverse(x, rep(0, 5), solve(Sigma_chol)) 68 | #'@export 69 | fast_dmvnorm_chol_inverse <- function(x, mean, chol_inverse){ 70 | return(fast_dmvnorm_cholesky_inverse_(x, mean, chol_inverse)) 71 | } 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /R/rnorm_max_coupling.R: -------------------------------------------------------------------------------- 1 | #'@rdname rnorm_max_coupling 2 | #'@title Maximal coupling of two univariate Normal distributions 3 | #'@description Sample from maximal coupling of two univariate Normal distributions, 4 | #'specified through their means and standard deviations. See \code{\link{rmvnorm_max}} for a multivariate version. 5 | #'@param mu1 First mean 6 | #'@param mu2 Second mean 7 | #'@param sigma1 First mean 8 | #'@param sigma2 Second mean 9 | #'@return Returns a list with 10 | #' 11 | #' \itemize{ 12 | #' \item "xy": the pair of samples \eqn{(x,y)} 13 | #' 14 | #' \item "identical": TRUE if \eqn{x = y}, FALSE otherwise 15 | #' } 16 | #'@export 17 | rnorm_max_coupling <- function(mu1, mu2, sigma1, sigma2){ 18 | f <- get_max_coupling(function(n) rnorm(n, mu1, sigma1), 19 | function(x) dnorm(x, mu1, sigma1, log = TRUE), 20 | function(n) rnorm(n, mu2, sigma2), 21 | function(x) dnorm(x, mu2, sigma2, log = TRUE)) 22 | return(f()) 23 | } 24 | -------------------------------------------------------------------------------- /R/rnorm_reflectionmax.R: -------------------------------------------------------------------------------- 1 | # reflection-maximal coupling in one dimension 2 | #'@rdname rnorm_reflectionmax 3 | #'@title Reflection-maximal coupling of two univariate Normal distributions 4 | #'@description Sample from reflection-maximal coupling of two univariate Normal distributions, 5 | #'specified through their means, with common standard deviation. 6 | #'See \code{\link{rmvnorm_reflectionmax}} for a multivariate version. 7 | #'@param mu1 First mean 8 | #'@param mu2 Second mean 9 | #'@param sigma Common standard deviation 10 | #'@return Returns a list with 11 | #' \itemize{ 12 | #' 13 | #' \item "xy": the pair of samples \eqn{(x,y)} 14 | #' 15 | #' \item "identical": TRUE if \eqn{x = y}, FALSE otherwise 16 | #' } 17 | #'@export 18 | rnorm_reflectionmax <- function(mu1, mu2, sigma){ 19 | # number of samples 20 | reflmax_xy <- c(0,0) 21 | # draw std normal first 22 | xdot <- rnorm(1) 23 | # this follows the notation of Bou Rabee et al, 2018, roughly 24 | z <- (mu1 - mu2) / sigma 25 | normz <- sqrt(sum(z^2)) 26 | e <- z / normz 27 | utilde <- runif(1, 0, 1) 28 | accept <- (log(utilde) < (dnorm(xdot + z, 0, 1, log = TRUE) - dnorm(xdot, log = TRUE))) 29 | ident_ <- FALSE 30 | if (accept){ 31 | ydot <- xdot + z 32 | ident_ <- TRUE 33 | } else { 34 | ydot <- xdot - 2 * (e * xdot) * e 35 | } 36 | reflmax_xy[1] <- mu1 + sigma * xdot 37 | reflmax_xy[2] <- mu2 + sigma * ydot 38 | return(list(xy = reflmax_xy, identical = ident_)) 39 | } 40 | -------------------------------------------------------------------------------- /R/unbiasedmcmc-package.R: -------------------------------------------------------------------------------- 1 | #'@name unbiasedmcmc-package 2 | #'@aliases unbiasedmcmc 3 | #'@docType package 4 | #'@title unbiasedmcmc 5 | #'@author Pierre E. Jacob , John O'Leary, Yves F. Atchade 6 | #'@description Unbiased MCMC estimators with couplings 7 | #'@details This package contains scripts to reproduce the figures of the 8 | #' paper "Unbiased Markov chain Monte Carlo with couplings" by 9 | #' Pierre E. Jacob, John O'Leary, Yves F Atchade, available on arXiv at 10 | #' https://arxiv.org/abs/1708.03625 11 | #'@keywords package 12 | #'@useDynLib unbiasedmcmc 13 | #'@importFrom Rcpp sourceCpp 14 | NULL 15 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | 2 | # This file consists of short utility functions for working with 3 | # propensity scores, as well as the ggplot2-related setmytheme. 4 | 5 | # from util_expit --------------------------------------------------------- 6 | #'@rdname expit 7 | #'@title expit 8 | #'@description expit function 9 | #'@export 10 | expit <- function(z) 1 / (1 + exp(-z)) 11 | 12 | 13 | # from propensity --------------------------------------------------------- 14 | 15 | #'@export 16 | beta2e <- function(beta, C){ 17 | return(beta2e_(beta, C)) 18 | } 19 | 20 | #'@export 21 | cut_in_fifth <- function(x){ 22 | return(cut_in_fifth_(x)) 23 | } 24 | 25 | 26 | # from util_setmytheme ---------------------------------------------------- 27 | 28 | #'@rdname setmytheme 29 | #'@title Customize graphical settings 30 | #'@description This function customizes the theme used by ggplot2. Loads the packages ggplot2 and 31 | #'ggthemes. 32 | #'@export 33 | setmytheme <- function(){ 34 | library(ggplot2) 35 | library(ggthemes) 36 | theme_set(theme_bw()) 37 | theme_update(axis.text.x = element_text(size = 20), 38 | axis.text.y = element_text(size = 20), 39 | axis.title.x = element_text(size = 25, margin=margin(20,0,0,0)), 40 | axis.title.y = element_text(size = 25, angle = 90, margin = margin(0,20,0,0)), 41 | legend.text = element_text(size = 20), 42 | legend.title = element_text(size = 20), 43 | title = element_text(size = 30), 44 | strip.text = element_text(size = 25), 45 | strip.background = element_rect(fill="white"), 46 | legend.position = "bottom") 47 | } 48 | -------------------------------------------------------------------------------- /README_files/figure-gfm/estimators-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/unbiasedmcmc/a1eea04eef08710463ff9ff2228f5e58565fe78b/README_files/figure-gfm/estimators-1.png -------------------------------------------------------------------------------- /README_files/figure-gfm/usage-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/unbiasedmcmc/a1eea04eef08710463ff9ff2228f5e58565fe78b/README_files/figure-gfm/usage-1.png -------------------------------------------------------------------------------- /README_files/figure-gfm/usage-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/unbiasedmcmc/a1eea04eef08710463ff9ff2228f5e58565fe78b/README_files/figure-gfm/usage-2.png -------------------------------------------------------------------------------- /data/diabetes.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/unbiasedmcmc/a1eea04eef08710463ff9ff2228f5e58565fe78b/data/diabetes.RData -------------------------------------------------------------------------------- /data/germancredit.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pierrejacob/unbiasedmcmc/a1eea04eef08710463ff9ff2228f5e58565fe78b/data/germancredit.RData -------------------------------------------------------------------------------- /inst/README.R: -------------------------------------------------------------------------------- 1 | # The files of the unbiasedmcmc/inst/reproduce* 2 | # contain the scripts to reproduce the figures, model by model. 3 | 4 | # Each folder has a "run.all.R" script that can be run to execute each script in the correct order 5 | # and to produce tables/figures. 6 | 7 | # The files finishing in .plots.R create the plots and tables. 8 | 9 | # The Ising model and variable selection examples contain further details on how 10 | # to execute batch scripts, in parallel, e.g. on a cluster or via GNU parallel. 11 | 12 | # There might be hardcoded paths to be modified before executing the scripts, 13 | # although efforts have been made to remove them. 14 | -------------------------------------------------------------------------------- /inst/check/check_discretecouplings.R: -------------------------------------------------------------------------------- 1 | # This script plays with coupling of distributions defined on discrete spaces 2 | # load packages 3 | library(unbiasedmcmc) 4 | rm(list = ls()) 5 | set.seed(21) 6 | library(doParallel) 7 | library(doRNG) 8 | registerDoParallel(cores = detectCores()-2) 9 | # 10 | p <- 40 11 | s <- 20 12 | selection <- rep(0, p) 13 | selection[sample(1:p, s, replace = F)] <- (runif(s) < 0.5) 14 | selection 15 | unbiasedmcmc:::sample_pair01(selection) 16 | # this is meant to sample one zero and one one uniformly 17 | nrep <- 10000 18 | 19 | test <- foreach(irep = 1:nrep, .combine = rbind) %dorng% { 20 | unbiasedmcmc:::sample_pair01(selection) 21 | } 22 | # 23 | table(test[,1]) / nrep 24 | which(selection == 0) 25 | 1/sum(selection==0) 26 | # 27 | 28 | selection1 <- rep(0, p) 29 | selection1[sample(1:p, s, replace = F)] <- (runif(s) < 0.5) 30 | selection2 <- rep(0, p) 31 | selection2[sample(1:p, s, replace = F)] <- (runif(s) < 0.5) 32 | 33 | 34 | test <- foreach(irep = 1:10000, .combine = rbind) %dorng% { 35 | coupled_pairs01(selection1, selection2) 36 | } 37 | 38 | # 39 | table(test[,1]) / nrow(test) 40 | 1/(p - sum(selection1)) 41 | table(test[,2]) / nrow(test) 42 | 1/(p - sum(selection2)) 43 | table(test[,3]) / nrow(test) 44 | 1/sum(selection1) 45 | table(test[,4]) / nrow(test) 46 | 1/sum(selection2) 47 | # 48 | 49 | -------------------------------------------------------------------------------- /inst/check/check_gamma.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | library(doParallel) 3 | library(doRNG) 4 | # register parallel cores 5 | registerDoParallel(cores = detectCores()-2) 6 | setmytheme() 7 | rm(list = ls()) 8 | set.seed(21) 9 | 10 | # check Gamma sampler 11 | alpha <- 3.1 12 | beta <- 2 13 | x <- rgamma(100000, alpha, beta) 14 | summary(x) 15 | quantile(x, probs = c(0.99)) 16 | hist(x[x < 5], nclass = 200, prob = TRUE) 17 | curve(dgamma(x, alpha, rate = beta), col = "red", add = TRUE) 18 | 19 | ### maximal coupling of Gamma 20 | alpha1 <- 3.1 21 | beta1 <- 2.03 22 | alpha2 <- 7.4 23 | beta2 <- 1.7 24 | # sample 25 | rgamma_coupled(alpha1, alpha2, beta1, beta2) 26 | 27 | xy <- foreach(i = 1:10000) %dorng% { 28 | rgamma_coupled(alpha1, alpha2, beta1, beta2) 29 | } 30 | 31 | x1 <- sapply(xy, function(x) x$xy[1]) 32 | x2 <- sapply(xy, function(x) x$xy[2]) 33 | quantile(x1, probs = c(0.99)) 34 | hist(x1[x1 < 5], prob = TRUE, nclass = 100) 35 | curve(dgamma(x, alpha1, rate = beta1), col = "red", add = TRUE) 36 | 37 | quantile(x2, probs = c(0.99)) 38 | hist(x2, prob = TRUE, nclass = 100) 39 | curve(dgamma(x, alpha2, rate = beta2), col = "red", add = TRUE) 40 | 41 | 42 | -------------------------------------------------------------------------------- /inst/check/check_histograms.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | library(doParallel) 3 | library(doRNG) 4 | # register parallel cores 5 | registerDoParallel(cores = detectCores()-2) 6 | setmytheme() 7 | rm(list = ls()) 8 | set.seed(21) 9 | 10 | target <- function(x){ 11 | evals <- log(0.5) + dnorm(x, mean = c(-2, 2), sd = 0.2, log = TRUE) 12 | return(max(evals) + log(sum(exp(evals - max(evals))))) 13 | } 14 | 15 | kernels <- get_mh_kernels(target, 2^2) 16 | single_kernel <- kernels$single_kernel 17 | coupled_kernel <- kernels$coupled_kernel 18 | rinit <- function(){ 19 | x <- rnorm(1) 20 | list(chain_state = x, current_pdf = target(x)) 21 | } 22 | c_chains <- sample_coupled_chains(single_kernel, coupled_kernel, rinit) 23 | 24 | 25 | nsamples <- 1000 26 | meetingtime <- foreach(irep = 1:nsamples, .combine = c) %dorng% { 27 | sample_meetingtime(single_kernel, coupled_kernel, rinit)$meetingtime 28 | } 29 | summary(meetingtime) 30 | hist(meetingtime) 31 | ## 32 | m <- 100 33 | c_chains_ <- foreach(irep = 1:nsamples) %dorng% { 34 | sample_coupled_chains(single_kernel, coupled_kernel, rinit, m = m, lag = 50) 35 | } 36 | k <- 20 37 | index_ <- which(sapply(c_chains_, function(x) x$meetingtime) > 30)[1] 38 | c_chains_[[index_]] 39 | H_bar(c_chains_[[index_]], h = function(x) x, k = k, m = m) 40 | # 41 | summary(sapply(c_chains_, function(x) x$meetingtime)) 42 | # histogram 43 | component <- 1 44 | hist1 <- histogram_c_chains(c_chains_, component, k, m) 45 | barplot(height = hist1$proportions) 46 | plot_histogram(hist1) 47 | library(ggplot2) 48 | df_ <- data.frame(x = hist1$mids, y = hist1$proportions / hist1$width) 49 | g <- ggplot(df_, aes(x = x, xend = x, y = 0, yend = y)) 50 | g <- g + geom_segment(aes(x = x, xend = x, y = 0, yend = y)) + ylab("density") 51 | g 52 | -------------------------------------------------------------------------------- /inst/check/check_inversegamma.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | library(doParallel) 3 | library(doRNG) 4 | # register parallel cores 5 | registerDoParallel(cores = detectCores()-2) 6 | setmytheme() 7 | rm(list = ls()) 8 | set.seed(21) 9 | 10 | # check inverse Gamma sampler 11 | alpha <- 3.1 12 | beta <- 2 13 | x <- rinversegamma(100000, alpha, beta) 14 | summary(x) 15 | quantile(x, probs = c(0.99)) 16 | hist(x[x < 5], nclass = 200, prob = TRUE) 17 | curve(exp(dinversegamma(x, alpha, beta)), col = "red", add = TRUE) 18 | 19 | ### maximal coupling of inverse Gamma 20 | alpha1 <- 3.1 21 | beta1 <- 2.03 22 | alpha2 <- 7.4 23 | beta2 <- 1.7 24 | # sample 25 | rinversegamma_coupled(alpha1, alpha2, beta1, beta2) 26 | xy <- foreach(i = 1:10000) %dorng% { 27 | rinversegamma_coupled(alpha1, alpha2, beta1, beta2) 28 | } 29 | 30 | x1 <- sapply(xy, function(x) x$xy[1]) 31 | x2 <- sapply(xy, function(x) x$xy[2]) 32 | quantile(x1, probs = c(0.99)) 33 | 34 | hist(x1[x1 < 5], prob = TRUE, nclass = 100) 35 | curve(exp(dinversegamma(x, alpha1, beta1)), col = "red", add = TRUE) 36 | 37 | hist(x2, prob = TRUE, nclass = 100) 38 | curve(exp(dinversegamma(x, alpha2, beta2)), col = "red", add = TRUE) 39 | -------------------------------------------------------------------------------- /inst/check/check_inversegaussian.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | library(doParallel) 3 | library(doRNG) 4 | # register parallel cores 5 | registerDoParallel(cores = detectCores()-2) 6 | setmytheme() 7 | rm(list = ls()) 8 | set.seed(21) 9 | 10 | # check inverse Gamma sampler 11 | mu <- 3.1 12 | lambda <- 2 13 | x <- rinvgaussian(100000, mu, lambda) 14 | summary(x) 15 | quantile(x, probs = c(0.99)) 16 | hist(x, nclass = 200, prob = TRUE) 17 | curve(exp(dinvgaussian(x, mu, lambda)), col = "red", add = TRUE) 18 | 19 | ### maximal coupling of inverse Gamma 20 | mu1 <- 3.1 21 | lambda1 <- 2 22 | mu2 <- 1.7 23 | lambda2 <- 1.3 24 | # sample 25 | rinvgaussian_coupled(mu1, mu2, lambda1, lambda2) 26 | 27 | xy <- foreach(i = 1:10000) %dorng% { 28 | rinvgaussian_coupled(mu1, mu2, lambda1, lambda2) 29 | } 30 | 31 | x1 <- sapply(xy, function(x) x[1]) 32 | x2 <- sapply(xy, function(x) x[2]) 33 | quantile(x1, probs = c(0.99)) 34 | hist(x1, prob = TRUE, nclass = 100) 35 | curve(exp(dinvgaussian(x, mu1, lambda1)), col = "red", add = TRUE) 36 | quantile(x2, probs = c(0.99)) 37 | 38 | hist(x2, prob = TRUE, nclass = 100) 39 | curve(exp(dinvgaussian(x, mu2, lambda2)), col = "red", add = TRUE) 40 | -------------------------------------------------------------------------------- /inst/check/check_lags.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | library(doParallel) 3 | library(doRNG) 4 | # register parallel cores 5 | registerDoParallel(cores = detectCores()-2) 6 | # remove all 7 | rm(list = ls()) 8 | # set RNG seed 9 | set.seed(11) 10 | 11 | logtarget <- function(x) dnorm(x, mean = 0, sd = 1, log = TRUE) 12 | rinit <- function(){ 13 | chain_state <- rnorm(1, 1, 1) 14 | current_pdf <- logtarget(chain_state) 15 | return(list(chain_state = chain_state, current_pdf = current_pdf)) 16 | } 17 | sd_proposal <- 1 18 | MH_kernel <- function(state){ 19 | chain_state <- state$chain_state 20 | current_pdf <- state$current_pdf 21 | proposal <- rnorm(1, chain_state, sd_proposal) 22 | proposal_pdf <- logtarget(proposal) 23 | if (log(runif(1)) < (proposal_pdf - current_pdf)){ 24 | return(list(chain_state = proposal, current_pdf = proposal_pdf)) 25 | } else { 26 | return(list(chain_state = chain_state, current_pdf = current_pdf)) 27 | } 28 | } 29 | 30 | niterations <- 10000 31 | chain <- rep(0, niterations) 32 | state <- rinit() 33 | for (i in 1:niterations){ 34 | state <- MH_kernel(state) 35 | chain[i] <- state$chain_state 36 | } 37 | hist(chain, prob = TRUE, nclass = 40, main = "") 38 | curve(exp(logtarget(x)), add = TRUE, col = "red") 39 | 40 | coupledMH_kernel <- function(state1, state2){ 41 | chain_state1 <- state1$chain_state; current_pdf1 <- state1$current_pdf 42 | chain_state2 <- state2$chain_state; current_pdf2 <- state2$current_pdf 43 | # proposal from a maximal coupling 44 | proposal <- rnorm_max_coupling(chain_state1, chain_state2, sd_proposal, sd_proposal) 45 | proposal_pdf1 <- logtarget(proposal$xy[1]) 46 | # only compute target pdf on 2nd proposal if it is not identical to 1st proposal 47 | proposal_pdf2 <- proposal_pdf1 48 | if (!proposal$identical){ 49 | proposal_pdf2 <- logtarget(proposal$xy[2]) 50 | } 51 | logu <- log(runif(1)) 52 | accept1 <- FALSE; accept2 <- FALSE 53 | if (is.finite(proposal_pdf1)){ 54 | if (logu < (proposal_pdf1 - current_pdf1)){ 55 | accept1 <- TRUE 56 | chain_state1 <- proposal$xy[1]; current_pdf1 <- proposal_pdf1 57 | } 58 | } 59 | if (is.finite(proposal_pdf2)){ 60 | if(logu < (proposal_pdf2 - current_pdf2)){ 61 | accept2 <- TRUE 62 | chain_state2 <- proposal$xy[2]; current_pdf2 <- proposal_pdf2 63 | } 64 | } 65 | identical_ <- (proposal$identical) && (accept1) && accept2 66 | return(list(state1 = list(chain_state = chain_state1, current_pdf = current_pdf1), 67 | state2 = list(chain_state = chain_state2, current_pdf = current_pdf2), 68 | identical = identical_)) 69 | } 70 | 71 | nsamples <- 10000 72 | lag <- 10 73 | k <- 10 74 | m <- 50 75 | c_chains_ <- foreach(irep = 1:nsamples) %dorng% { 76 | sample_coupled_chains(MH_kernel, coupledMH_kernel, rinit, lag = lag, m = m) 77 | } 78 | names(c_chains_[[1]]) 79 | meetingtime <- sapply(c_chains_, function(x) x$meetingtime) 80 | hist(meetingtime, breaks = 1:max(meetingtime), prob = TRUE, main = "", xlab = "meeting times") 81 | mean_cost <- mean(sapply(c_chains_, function(x) x$cost)) 82 | uestimators <- sapply(c_chains_, function(x) H_bar(x, h = function(x) x^2, k = k, m = m)) 83 | 84 | uestimators2 <- foreach(irep = 1:nsamples) %dorng% { 85 | sample_unbiasedestimator(MH_kernel, coupledMH_kernel, rinit, h = function(x) x^2, k = k, m = m, lag = lag) 86 | } 87 | 88 | mean(uestimators) 89 | mean(sapply(uestimators2, function(x) x$uestimator)) 90 | var(uestimators) 91 | var(sapply(uestimators2, function(x) x$uestimator)) 92 | 93 | hist(uestimators) 94 | hist(sapply(uestimators2, function(x) x$uestimator)) 95 | 96 | mean_cost 97 | mean(sapply(uestimators2, function(x) x$cost)) 98 | 99 | # ## cost: lag calls to kernel, tau - lag calls to coupled kernel, then max(0, m - tau) calls to kernel again (if meeting < m) 100 | # cost_of_coupled_chain <- function(c_chains){ 101 | # lag <- dim(c_chains$samples1)[1] - dim(c_chains$samples2)[1] 102 | # cost <- lag + 2*(c_chains$meetingtime - lag) + max(0, c_chains$iteration - c_chains$meetingtime) 103 | # return(cost) 104 | # } 105 | # 106 | # costs_ <- sapply(c_chains_, function(x) cost_of_coupled_chain(x)) 107 | 108 | inefficiency <- mean_cost * var(uestimators) 109 | cat("inefficiency:", inefficiency, ", cost:", mean_cost) 110 | 111 | 112 | -------------------------------------------------------------------------------- /inst/check/check_mvnorm.R: -------------------------------------------------------------------------------- 1 | # This script tests the functions implementing different 2 | # random number generators and probability density functions for 3 | # multivariate Normal distributions. 4 | 5 | # load packages 6 | library(unbiasedmcmc) 7 | library(doParallel) 8 | library(doRNG) 9 | # register parallel cores 10 | registerDoParallel(cores = detectCores()-2) 11 | setmytheme() 12 | rm(list = ls()) 13 | set.seed(21) 14 | 15 | ### sampling trivariate Gaussians 16 | p <- 3 17 | mu1 <- rep(0, p) 18 | mu2 <- rep(1, p) 19 | Sigma1 <- diag(0.4, p, p) 20 | Sigma1[1,2] <- Sigma1[2,1] <- 0.2 21 | Sigma2 <- diag(1.4, p, p) 22 | Sigma2[1,2] <- Sigma2[2,1] <- -0.5 23 | 24 | # Compute Cholesky factors and inverses 25 | Sigma1_chol <- chol(Sigma1) 26 | Sigma2_chol <- chol(Sigma2) 27 | Sigma1_chol_inv <- solve(Sigma1_chol) 28 | Sigma2_chol_inv <- solve(Sigma2_chol) 29 | ## Note on Cholesky factorization: 30 | ## we have 31 | # t(Sigma1_chol) %*% Sigma1_chol 32 | # and Sigma1_chol is upper triangular 33 | 34 | # sample 35 | x <- fast_rmvnorm(2, mu2, Sigma2) 36 | 37 | # equivalent calculations of pdf evaluations 38 | fast_dmvnorm(x, mu1, Sigma1) 39 | mvtnorm::dmvnorm(x, mu1, Sigma1, log = T) 40 | fast_dmvnorm_chol_inverse(x, mu1, Sigma1_chol_inv) 41 | 42 | # and again 43 | x <- fast_rmvnorm(10, mu1, Sigma1) 44 | fast_dmvnorm(x, mu2, Sigma2) 45 | mvtnorm::dmvnorm(x, mu2, Sigma2, log = T) 46 | fast_dmvnorm_chol_inverse(x, mu2, Sigma2_chol_inv) 47 | 48 | # benchmark 49 | microbenchmark::microbenchmark( 50 | fast_dmvnorm(x, mu1, Sigma1), 51 | mvtnorm::dmvnorm(x, mu1, Sigma1, log = T), 52 | fast_dmvnorm_chol_inverse(x, mu1, Sigma1_chol_inv)) 53 | 54 | # now check random number generator 55 | n <- 1e4 56 | x2_1 <- fast_rmvnorm(n, mu2, Sigma2) 57 | x2_2 <- mvtnorm::rmvnorm(n, mu2, Sigma2) 58 | 59 | # compare means 60 | cat(colMeans(x2_1), "\n", colMeans(x2_2), "\n", mu2, "\n") 61 | 62 | # compare covariances 63 | cat(cov(x2_1)[1:2,1:2], "\n", cov(x2_2)[1:2,1:2], "\n", Sigma2[1:2,1:2], "\n") 64 | 65 | 66 | # and using cholesky 67 | x1_1 <- fast_rmvnorm(n, mu1, Sigma1) 68 | x1_3 <- fast_rmvnorm_chol(n, mu1, Sigma1_chol) 69 | 70 | # compare means 71 | cat(colMeans(x1_1), "\n", colMeans(x1_3), "\n", mu1, "\n") 72 | 73 | # compare covariances 74 | cat(cov(x1_1)[1:3,1:3], "\n", cov(x1_3)[1:3,1:3], "\n", Sigma1[1:3,1:3], "\n") 75 | -------------------------------------------------------------------------------- /inst/check/check_reflmaxcoupling.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | library(doParallel) 3 | library(doRNG) 4 | # register parallel cores 5 | registerDoParallel(cores = detectCores()-2) 6 | setmytheme() 7 | rm(list = ls()) 8 | set.seed(21) 9 | 10 | ### maximal coupling of bivariate Gaussians with identical covariance matrices 11 | mu1 <- c(0.2, 0.3) 12 | mu2 <- c(0.0, 0.8) 13 | Sigma <- diag(1, 2, 2) 14 | Sigma[1,2] <- Sigma[2,1] <- 0.8 15 | Sigma[2,2] <- 2 16 | Sigma_chol <- chol(Sigma) 17 | Sigma_inv_chol <- solve(Sigma_chol) 18 | 19 | # sample once 20 | rmvnorm_reflectionmax(mu1, mu2, Sigma_chol, Sigma_inv_chol) 21 | # function output samples (column-bound) in $xy, and a boolean indicator of identity in $identical 22 | 23 | nsamples <- 5e4 24 | xy <- foreach(i = 1:nsamples) %dorng% { 25 | rmvnorm_reflectionmax(mu1, mu2, Sigma_chol, Sigma_inv_chol) 26 | } 27 | 28 | identicals <- sapply(xy, function(x) x$identical) 29 | equalvalues <- sapply(xy, function(x) all(x$xy[,1] == x$xy[,2])) 30 | all(identicals == equalvalues) 31 | 32 | # collect samples from both distributions 33 | sample1 <- t(sapply(xy, function(x) x$xy[,1])) 34 | sample2 <- t(sapply(xy, function(x) x$xy[,2])) 35 | 36 | # and check that they follow the correct distribution 37 | colMeans(sample1) 38 | mu1 39 | colMeans(sample2) 40 | mu2 41 | 42 | cov(sample1) 43 | cov(sample2) 44 | Sigma 45 | 46 | # estimate of 1-TVD 47 | mean(sapply(xy, function(x) x$identical)) 48 | 49 | # visualize marginals 50 | hist(sample1[,1], prob = TRUE, nclass = 100) 51 | curve(dnorm(x, mu1[1], sqrt(Sigma[1,1])), add = TRUE, col = "red") 52 | 53 | hist(sample1[,2], prob = TRUE, nclass = 100) 54 | curve(dnorm(x, mu1[2], sqrt(Sigma[2,2])), add = TRUE, col = "red") 55 | 56 | hist(sample2[,1], prob = TRUE, nclass = 100) 57 | curve(dnorm(x, mu2[1], sqrt(Sigma[1,1])), add = TRUE, col = "red") 58 | 59 | hist(sample2[,2], prob = TRUE, nclass = 100) 60 | curve(dnorm(x, mu2[2], sqrt(Sigma[2,2])), add = TRUE, col = "red") 61 | 62 | 63 | mu1 <- 1 64 | mu2 <- 2 65 | Sigma_proposal <- diag(2, 1, 1) 66 | Sigma_chol <- chol(Sigma_proposal) 67 | Sigma_chol_inv <- solve(Sigma_chol) 68 | nsamples <- 5e4 69 | xy <- foreach(i = 1:nsamples) %dorng% { 70 | rnorm_reflectionmax(mu1, mu2, Sigma_chol[1,1]) 71 | } 72 | hist(sapply(xy, function(x) x$xy[1]), prob = TRUE, nclass = 100) 73 | curve(dnorm(x, mu1, Sigma_chol[1]), add = T) 74 | 75 | hist(sapply(xy, function(x) x$xy[2]), prob = TRUE, nclass = 100) 76 | curve(dnorm(x, mu2, Sigma_chol[1]), add = T) 77 | 78 | rmvnorm_reflectionmax(mu1, mu2, Sigma_chol, Sigma_chol_inv) 79 | 80 | mean(sapply(xy, function(xy) xy$identical)) 81 | 82 | xymax <- foreach(i = 1:nsamples) %dorng% { 83 | rnorm_max_coupling(mu1, mu2, Sigma_chol[1,1], Sigma_chol[1,1]) 84 | } 85 | mean(sapply(xymax, function(xy) xy$identical)) 86 | 87 | -------------------------------------------------------------------------------- /inst/check/check_rwmh_doublewell.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | rm(list = ls()) 3 | # double well potential log-pdf 4 | target <- function(x) -0.1 * ( ((x[1]-1)^2-x[2]^2 )^2 + 10*(x[1]^2-5)^2+ (x[1]+x[2])^4 + (x[1]-x[2])^4) 5 | # compute marginals pdfs and nornalizing constant 6 | targetmarginal1 <- function(x) integrate(function(a) sapply(a, function(z) exp(target(c(x, z)))), lower = -6, upper = 6, subdivisions = 1e4)$value 7 | targetmarginal2 <- function(x) integrate(function(a) sapply(a, function(z) exp(target(c(z, x)))), lower = -6, upper = 6, subdivisions = 1e4)$value 8 | Z <- integrate(function(x) sapply(x, function(z) targetmarginal1(z)), lower = -6, upper = 6, subdivisions = 1e4)$value 9 | 10 | # xgrid <- seq(from = -4, to = 4, length.out = 1e2) 11 | # ygrid <- seq(from = -4, to = 4, length.out = 1e2) 12 | # df_ <- expand.grid(xgrid, ygrid) 13 | # df_$z <- apply(df_, 1, target) 14 | # library(ggplot2) 15 | # ggplot(df_, aes(x = Var1, y = Var2, z = z)) + geom_contour(binwidth = 5) 16 | 17 | # define random walk Metropolis-Hastings kernels 18 | kernels <- get_mh_kernels(target, Sigma_proposal = diag(1, 2, 2)) 19 | # define initial distribution 20 | rinit <- function(){ 21 | x <- rnorm(n = 2, mean = 2, sd = 25) 22 | return(list(chain_state = x, current_pdf = target(x))) 23 | } 24 | ## register parallel cores 25 | library(doParallel) 26 | library(doRNG) 27 | registerDoParallel(cores = detectCores()-2) 28 | nrepeats <- 5e2 29 | # sample meeting times "tau" 30 | meetings_ <- foreach(rep = 1:nrepeats) %dorng% { 31 | sample_meetingtime(kernels$single_kernel, kernels$coupled_kernel, rinit) 32 | } 33 | hist(sapply(meetings_, function(x) x$meetingtime), nclass = 50, xlab = "meeting time", main = "") 34 | 35 | ## now run coupled chains for max(m, tau) steps 36 | coupledchains_ <- foreach(rep = 1:nrepeats) %dorng% { 37 | sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = 10000, lag = 1000) 38 | } 39 | names(coupledchains_[[1]]) 40 | hist(sapply(coupledchains_, function(x) x$meetingtime)) 41 | 42 | # let's see what happened to the chains that took longest to meet 43 | index_max <- which.max(sapply(coupledchains_, function(x) x$meetingtime)) 44 | plot(coupledchains_[[index_max]]$samples1[,1], coupledchains_[[index_max]]$samples1[,2], type = "l", xlim = c(-5, 5), ylim = c(-5, 5)) 45 | lines(coupledchains_[[index_max]]$samples2[,1], coupledchains_[[index_max]]$samples2[,2], col = "red") 46 | matplot(coupledchains_[[index_max]]$samples1, type = "l", col = "black") 47 | matplot(coupledchains_[[index_max]]$samples2, type = "l", col = "red", add = T) 48 | 49 | library(ggplot2) 50 | hist1 <- histogram_c_chains(coupledchains_, component = 1, k = 2000, m = 5000, dopar = FALSE) 51 | plot_histogram(hist1) + stat_function(fun = function(x) sapply(x, function(v) targetmarginal1(v)/Z)) + xlim(-4,4) 52 | 53 | hist2 <- histogram_c_chains(coupledchains_, component = 2, k = 2000, m = 10000, dopar = TRUE) 54 | plot_histogram(hist2) + stat_function(fun = function(x) sapply(x, function(v) targetmarginal2(v)/Z)) + xlim(-4,4) 55 | 56 | 57 | -------------------------------------------------------------------------------- /inst/check/check_rwmh_minimalcode.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | rm(list = ls()) 3 | # double well potential log-pdf 4 | target <- function(x) -0.1 * ( ((x[1]-1)^2-x[2]^2 )^2 + 10*(x[1]^2-5)^2+ (x[1]+x[2])^4 + (x[1]-x[2])^4) 5 | # compute marginals pdfs and nornalizing constant 6 | targetmarginal1 <- function(x) integrate(function(a) sapply(a, function(z) exp(target(c(x, z)))), lower = -6, upper = 6, subdivisions = 1e4)$value 7 | targetmarginal2 <- function(x) integrate(function(a) sapply(a, function(z) exp(target(c(z, x)))), lower = -6, upper = 6, subdivisions = 1e4)$value 8 | Z <- integrate(function(x) sapply(x, function(z) targetmarginal1(z)), lower = -6, upper = 6, subdivisions = 1e4)$value 9 | 10 | # xgrid <- seq(from = -4, to = 4, length.out = 1e2) 11 | # ygrid <- seq(from = -4, to = 4, length.out = 1e2) 12 | # df_ <- expand.grid(xgrid, ygrid) 13 | # df_$z <- apply(df_, 1, target) 14 | # library(ggplot2) 15 | # ggplot(df_, aes(x = Var1, y = Var2, z = z)) + geom_contour(binwidth = 5) 16 | 17 | # define random walk Metropolis-Hastings kernels 18 | kernels <- get_mh_kernels(target, Sigma_proposal = diag(1, 2, 2)) 19 | # define initial distribution 20 | rinit <- function(){ 21 | x <- rnorm(n = 2, mean = 2, sd = 25) 22 | return(list(chain_state = x, current_pdf = target(x))) 23 | } 24 | ## register parallel cores 25 | library(doParallel) 26 | library(doRNG) 27 | registerDoParallel(cores = detectCores()-2) 28 | nrepeats <- 5e2 29 | # sample meeting times "tau" 30 | meetings_ <- foreach(rep = 1:nrepeats) %dorng% { 31 | sample_meetingtime(kernels$single_kernel, kernels$coupled_kernel, rinit) 32 | } 33 | hist(sapply(meetings_, function(x) x$meetingtime), nclass = 50, xlab = "meeting time", main = "") 34 | 35 | ## now run coupled chains for max(m, tau) steps 36 | coupledchains_ <- foreach(rep = 1:nrepeats) %dorng% { 37 | sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = 10000, lag = 1000) 38 | } 39 | names(coupledchains_[[1]]) 40 | hist(sapply(coupledchains_, function(x) x$meetingtime)) 41 | 42 | # let's see what happened to the chains that took longest to meet 43 | index_max <- which.max(sapply(coupledchains_, function(x) x$meetingtime)) 44 | plot(coupledchains_[[index_max]]$samples1[,1], coupledchains_[[index_max]]$samples1[,2], type = "l", xlim = c(-5, 5), ylim = c(-5, 5)) 45 | lines(coupledchains_[[index_max]]$samples2[,1], coupledchains_[[index_max]]$samples2[,2], col = "red") 46 | matplot(coupledchains_[[index_max]]$samples1, type = "l", col = "black") 47 | matplot(coupledchains_[[index_max]]$samples2, type = "l", col = "red", add = T) 48 | 49 | library(ggplot2) 50 | hist1 <- histogram_c_chains(coupledchains_, component = 1, k = 2000, m = 5000, dopar = FALSE) 51 | plot_histogram(hist1) + stat_function(fun = function(x) sapply(x, function(v) targetmarginal1(v)/Z)) + xlim(-4,4) 52 | 53 | hist2 <- histogram_c_chains(coupledchains_, component = 2, k = 2000, m = 10000, dopar = TRUE) 54 | plot_histogram(hist2) + stat_function(fun = function(x) sapply(x, function(v) targetmarginal2(v)/Z)) + xlim(-4,4) 55 | 56 | 57 | -------------------------------------------------------------------------------- /inst/check/check_rwmh_univariate.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | library(doParallel) 3 | library(doRNG) 4 | # register parallel cores 5 | registerDoParallel(cores = detectCores()-2) 6 | setmytheme() 7 | rm(list = ls()) 8 | set.seed(21) 9 | 10 | ## test MH kernels on a univariate target 11 | ## define target distribution 12 | target <- function(x){ 13 | evals <- log(0.5) + dnorm(x, mean = c(-2, 2), sd = 0.2, log = TRUE) 14 | return(max(evals) + log(sum(exp(evals - max(evals))))) 15 | } 16 | 17 | Sigma_proposal <- diag(2, 1, 1) 18 | Sigma_proposal_chol <- chol(Sigma_proposal) 19 | Sigma_proposal_chol_inv <- solve(chol(Sigma_proposal)) 20 | 21 | kernels <- get_mh_kernels(target, Sigma_proposal) 22 | single_kernel <- kernels$single_kernel 23 | coupled_kernel <- kernels$coupled_kernel 24 | rinit <- function(){ 25 | x <- rnorm(1, 5, 5) 26 | return(list(chain_state = x, current_pdf = target(x))) 27 | } 28 | 29 | ### Test of the single kernel 30 | niterations <- 25000 31 | state <- rinit() 32 | chain <- matrix(ncol=1, nrow=niterations) 33 | chain[1,] <- state$chain_state 34 | for (t in 2:niterations){ 35 | state <- single_kernel(state) 36 | chain[t,] <- state$chain_state 37 | } 38 | it <- floor(seq(from = 1, to = niterations, length.out = 1000)) 39 | qplot(x=it, y=chain[it], geom = "line") + ylab("X") + xlab("iteration") 40 | hist(chain[1000:niterations], nclass = 100, prob = TRUE) 41 | curve(sapply(x, function(y) exp(target(y))), add = T) 42 | mean(chain > 1) 43 | 44 | # Markov kernel of the coupled chain 45 | niterations <- 50000 46 | state1 <- rinit() 47 | state2 <- rinit() 48 | chain1 <- matrix(ncol=1, nrow=niterations) 49 | chain2 <- matrix(ncol=1, nrow=niterations) 50 | chain1[1,] <- state1$chain_state 51 | chain2[1,] <- state2$chain_state 52 | meetingtime <- Inf 53 | for (t in 2:niterations){ 54 | coupled_result <- coupled_kernel(state1, state2) 55 | state1 <- coupled_result$state1 56 | state2 <- coupled_result$state2 57 | if (coupled_result$identical && is.infinite(meetingtime)){ 58 | meetingtime <- t 59 | } 60 | chain1[t,] <- state1$chain_state 61 | chain2[t,] <- state2$chain_state 62 | } 63 | print(meetingtime) 64 | it <- floor(seq(from = 1, to = niterations, length.out = 1000)) 65 | qplot(x=it, y=chain1[it], geom = "line") + ylab("X") + xlab("iteration") + geom_line(aes(y = chain2[it]), colour = "red") 66 | 67 | hist(chain1[1000:niterations], nclass = 100, prob = TRUE) 68 | hist(chain2[1000:niterations], nclass = 100, prob = TRUE, add = TRUE, col = rgb(1,0,0,0.5)) 69 | curve(sapply(x, function(y) exp(target(y))), add = T) 70 | 71 | ## a meeting 72 | res <- sample_coupled_chains(single_kernel, coupled_kernel, rinit, m = 3) 73 | res$meetingtime 74 | res$iteration 75 | H_bar(res, h = function(x) x, k = 3, m = 3) 76 | # # 77 | qplot(x = 0:res$iteration, y = res$samples1[,1], geom = "line") + 78 | geom_line(aes(x = 0:(res$iteration-1), y = res$samples2[,1]), col = rgb(1,0,0,0.5)) + 79 | geom_line(aes(x = 1:res$iteration, y = res$samples2[,1]), col = rgb(1,0,0,1), linetype = 2) 80 | 81 | ### distribution of meeting times 82 | nsamples <- 500 83 | meetingtime <- foreach(irep = 1:nsamples, .combine = c) %dorng% { 84 | sample_meetingtime(single_kernel, coupled_kernel, rinit)$meetingtime 85 | } 86 | summary(meetingtime) 87 | hist(meetingtime) 88 | 89 | 90 | -------------------------------------------------------------------------------- /inst/check/check_vs_mlikelihood.R: -------------------------------------------------------------------------------- 1 | # This script attempts at optimizing the code that computes the marginal likelihood 2 | # in the variable selection example, and compares different implementations 3 | library(unbiasedmcmc) 4 | rm(list = ls()) 5 | set.seed(1) 6 | # 7 | # simulate data 8 | n <- 100 9 | p <- 5000 10 | SNR <- 3 11 | s_star <- 10 12 | s0 <- 100 13 | sigma0 <- 1 14 | beta_star <- SNR * sqrt(sigma0^2 * log(p) / n) * c(2,-3,2,2,-3,3,-2,3,-2,3, rep(0, p-10)) 15 | # independent design 16 | X <- matrix(rnorm(n * p), nrow = n, ncol = p) # fast_rmvnorm_chol(n, rep(0, p), diag(1, p, p)) 17 | X <- scale(X) 18 | Y <- X %*% matrix(beta_star, ncol = 1) + rnorm(n, 0, sigma0) 19 | Y <- scale(Y) 20 | Y2 <- (t(Y) %*% Y)[1,1] 21 | g <- p^3 22 | kappa <- 1 23 | 24 | rinit <- function(){ 25 | x <- rep(0, p) 26 | x[sample(1:p, min(s0, p), replace = F)] <- (runif(min(s0, p)) < 0.5) 27 | return(x) 28 | } 29 | 30 | marginal_likelihood <- function(selection){ 31 | Xselected <- as.matrix(X[, selection==1,drop=F]) 32 | if (sum(selection) != 0){ 33 | P <- Xselected %*% tcrossprod(x = solve(crossprod(x = Xselected, y = Xselected)), y = Xselected) 34 | } else { 35 | P <- matrix(0, n, n) 36 | } 37 | R2_gamma <- t(Y) %*% P %*% Y/ Y2 38 | return((-sum(selection)/2 * log(1+g) - (n/2) * log(1 + g*(1-R2_gamma)))[1,1]) 39 | } 40 | 41 | marginal_likelihood_alt <- function(selection){ 42 | Xselected <- as.matrix(X[, selection==1,drop=F]) 43 | if (sum(selection) != 0){ 44 | #P1 <- Xselected %*% solve(t(Xselected) %*% Xselected) %*% t(Xselected) 45 | # using crossprod, it's a bit faster 46 | P <- Xselected %*% tcrossprod(x = solve(crossprod(x = Xselected, y = Xselected)), y = Xselected) 47 | } else { 48 | P <- matrix(0, n, n) 49 | } 50 | posteriorvalue <- -(sum(selection) + 1) / 2 * log(g + 1) 51 | posteriorvalue <- posteriorvalue - n / 2 * log(Y2 - g / (g + 1) * t(Y) %*% P %*% Y) 52 | return(posteriorvalue[1,1]) 53 | } 54 | 55 | cppFunction(' 56 | double marginal_likelihood_c_(Eigen::VectorXd selection, const Eigen::MatrixXd & X, const Eigen::VectorXd & Y, double Y2, double g){ 57 | double l = 0.; 58 | int n = X.rows(); 59 | int p = X.cols(); 60 | int s = selection.sum(); 61 | if (s > 0){ 62 | Eigen::MatrixXd Xselected(n,s); 63 | int counter = 0; 64 | for (int column = 0; column < p; column++){ 65 | if (selection(column)){ 66 | Xselected.col(counter) = X.col(column); 67 | counter++; 68 | } 69 | } 70 | l = Y.transpose() * Xselected * ((Xselected.transpose() * Xselected).inverse()) * Xselected.transpose() * Y; 71 | } else { 72 | l = 0.; 73 | } 74 | l = -((double) s + 1.) / 2. * log(g + 1.) - (double) n / 2. * log(Y2 - g / (g + 1.) * l); 75 | return l; 76 | } 77 | ', depends="RcppEigen") 78 | 79 | sel <- rinit() 80 | marginal_likelihood_c_(sel, X, Y, Y2, g) 81 | marginal_likelihood_alt(sel) 82 | 83 | marginal_likelihood_c_(rep(0, p), X, Y, Y2, g) 84 | marginal_likelihood_alt(rep(0, p)) 85 | 86 | selection1 <- rinit() 87 | selection2 <- rinit() 88 | 89 | marginal_likelihood(selection1) 90 | marginal_likelihood(selection2) 91 | marginal_likelihood(selection1) - marginal_likelihood(selection2) 92 | 93 | marginal_likelihood_alt(selection1) 94 | marginal_likelihood_alt(selection2) 95 | marginal_likelihood_alt(selection1) - marginal_likelihood_alt(selection2) 96 | 97 | 98 | library(microbenchmark) 99 | microbenchmark(marginal_likelihood(rinit()), 100 | marginal_likelihood_alt(rinit()), 101 | marginal_likelihood_c_(rinit(), X, Y, Y2, g)) 102 | -------------------------------------------------------------------------------- /inst/check/check_weirdbug.R: -------------------------------------------------------------------------------- 1 | # This script illustrates a weird phenomenon when using 2 | # nrow/ncol to get a matrix' dimension instead of dim. 3 | 4 | # The mystery has been elucidated thanks to Louis Aslett, 5 | # and explanations are given in the blog post 6 | # https://statisfaction.wordpress.com/2017/12/10/nrow-references-and-copies/ 7 | 8 | # The question is: why is the following code so slow? 9 | dimstate = 100 10 | nmcmc = 1e4 11 | chain = matrix(0, nrow = nmcmc, ncol = dimstate) 12 | for (imcmc in 1:nmcmc){ 13 | if (imcmc == nrow(chain)){ 14 | } 15 | x = rnorm(dimstate, mean = 0, sd = 1) 16 | chain[imcmc,] = x 17 | } 18 | 19 | # Attempts at finding the reason, identifying "nrow" as the problem 20 | # in combination to changing the matrix chain. 21 | dimstate = 100 22 | nmcmc = 1e4 23 | chain = matrix(0, nrow = nmcmc, ncol = dimstate) 24 | for (imcmc in 1:nmcmc){ 25 | if (imcmc == nrow(chain)){ 26 | } 27 | x = rnorm(dimstate, mean = 0, sd = 1) 28 | # chain[imcmc,] = x 29 | } 30 | 31 | dimstate = 100 32 | nmcmc = 1e4 33 | chain = matrix(0, nrow = nmcmc, ncol = dimstate) 34 | for (imcmc in 1:nmcmc){ 35 | if (imcmc == nmcmc){ 36 | } 37 | x = rnorm(dimstate, mean = 0, sd = 1) 38 | chain[imcmc,] = x 39 | } 40 | 41 | # illustration that dim behaves very differently compared to nrow 42 | dimstate = 100 43 | nmcmc = 1e4 44 | chain = matrix(0, nrow = nmcmc, ncol = dimstate) 45 | for (imcmc in 1:nmcmc){ 46 | if (imcmc == dim(chain)[1]){ 47 | } 48 | x = rnorm(dimstate, mean = 0, sd = 1) 49 | chain[imcmc,] = x 50 | } 51 | 52 | # 53 | x <- matrix(0, nrow=1e5, ncol=100) # matrix has ref count 1 54 | x[1,1] <- 1 # ref count is 1, so write with no copy 55 | nrow(x) # ref count is 2 even though nothing was touched 56 | x[1,1] <- 1 # ref count still 2, so R copies before writing first element. Now the ref count drops to 1 again 57 | x[2,2] <- 1 # this writes without a copy as ref count got reset on last line 58 | nrow(x) # ref count jumps 59 | x[3,3] <- 1 # copy invoked again! Aaaargh! 60 | -------------------------------------------------------------------------------- /inst/check/to_check_signed_measure.R: -------------------------------------------------------------------------------- 1 | # This script plays with the computation of estimators 2 | # using signed measure representations 3 | 4 | library(unbiasedmcmc) 5 | library(doParallel) 6 | library(doRNG) 7 | library(dplyr) 8 | registerDoParallel(cores = detectCores()-2) 9 | rm(list = ls()) 10 | set.seed(1) 11 | 12 | #### Bivariate target 13 | target <- function(x) fast_dmvnorm(matrix(x, nrow = 1), mean = c(0,0.5), covariance = diag(c(0.5, 0.2))) 14 | Sigma_proposal <- diag(1, 2) 15 | rinit <- function(){ 16 | x <- fast_rmvnorm(1, c(10, 5), diag(3, 2, 2)) 17 | return(list(chain_state = x, current_pdf = target(x))) 18 | } 19 | kernels <- get_mh_kernels(target, Sigma_proposal) 20 | 21 | cx <- sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit) 22 | cx$samples1 %>% head 23 | cx$meetingtime 24 | dim(cx$samples1)[1] 25 | dim(cx$samples1)[2] 26 | 27 | ms <- c_chains_to_measure_as_list(cx, 0, 0) 28 | tail(ms$atoms) 29 | cat(sum(ms$weights * ms$atoms[,1]), sum(ms$weights * ms$atoms[,ncol(ms$atoms)]), "\n") 30 | H_bar(cx, h = function(x) x, k = 0, m = 0) 31 | 32 | ms$MCMC 33 | sum(ms$weights[ms$MCMC==1]) 34 | sum(ms$weights[ms$MCMC==0]) 35 | 36 | cx <- sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = 100) 37 | cx$meetingtime 38 | ms <- c_chains_to_measure_as_list(cx, 30, 100) 39 | tail(ms$atoms) 40 | cat(sum(ms$weights * ms$atoms[,1]), sum(ms$weights * ms$atoms[,ncol(ms$atoms)]), "\n") 41 | H_bar(cx, h = function(x) x, k = 30, m = 100) 42 | ms$MCMC 43 | sum(ms$weights[ms$MCMC]) 44 | sum(ms$weights[!ms$MCMC]) 45 | 46 | cx <- sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = 100, lag = 5) 47 | cx$meetingtime 48 | ms <- c_chains_to_measure_as_list(cx, 3, 100) 49 | ms$MCMC 50 | sum(ms$weights[ms$MCMC]) 51 | sum(ms$weights[!ms$MCMC]) 52 | 53 | cat(sum(ms$weights * ms$atoms[,1]), sum(ms$weights * ms$atoms[,ncol(ms$atoms)]), "\n") 54 | H_bar(cx, h = function(x) x, k = 3, m = 100) 55 | 56 | c_chains_to_dataframe(cx, k = 3, m = 100) 57 | 58 | nrepeats <- 10 59 | coupledchains_ <- foreach(rep = 1:nrepeats) %dorng% { 60 | sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = 100, lag = 1) 61 | } 62 | summary(sapply(coupledchains_, function(l) l$meetingtime)) 63 | 64 | k <- 20 65 | m <- 100 66 | df_ <- c_chains_to_dataframe(coupledchains_, k, m, dopar = T) 67 | head(df_) 68 | colSums(df_$weight * df_[,4:ncol(df_)]) 69 | rowMeans(sapply(X = coupledchains_, FUN = function(x) H_bar(x, h = function(v) v, k = k, m = m))) 70 | 71 | cat(sum(df_$weight * df_$atom.1 * cos(df_$atom.2)), "\n") 72 | mean(sapply(X = coupledchains_, FUN = function(x) H_bar(x, h = function(v) v[1] * cos(v[2]), k = k, m = m))) 73 | 74 | 75 | 76 | 77 | ##### Univariate target 78 | target <- function(x){ 79 | evals <- log(0.5) + dnorm(x, mean = c(-2, 2), sd = 0.5, log = TRUE) 80 | return(max(evals) + log(sum(exp(evals - max(evals))))) 81 | } 82 | Sigma_proposal <- diag(2, 1, 1) 83 | rinit <- function(){ 84 | x <- rnorm(1, 5, 5) 85 | return(list(chain_state = x, current_pdf = target(x))) 86 | } 87 | kernels <- get_mh_kernels(target, Sigma_proposal) 88 | 89 | nrepeats <- 100 90 | m <- 200 91 | coupledchains_ <- foreach(rep = 1:nrepeats) %dorng% { 92 | sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = m, lag = 50) 93 | } 94 | summary(sapply(coupledchains_, function(l) l$meetingtime)) 95 | k <- 20 96 | df_unpruned <- c_chains_to_dataframe(coupledchains_, k, m, dopar = F, prune = FALSE) 97 | df_pruned <- c_chains_to_dataframe(coupledchains_, k, m, dopar = F, prune = TRUE) 98 | head(df_pruned) 99 | dim(df_pruned) 100 | dim(df_unpruned) 101 | 102 | 103 | sum(df_pruned$weight * df_pruned$atom.1) 104 | colSums(df_pruned$weight * df_pruned[,4:ncol(df_pruned),drop=F]) 105 | mean(sapply(X = coupledchains_, FUN = function(x) H_bar(x, h = function(v) v, k = k, m = m))) 106 | 107 | df_pruned %>% summarise(mean1 = sum(weight * atom.1)) 108 | df_pruned %>% group_by(MCMC) %>% summarise(mean1 = sum(weight * atom.1), sumweight = sum(weight), len = n(), lenuniqueweight = length(unique(weight))) %>% ungroup() %>% as.data.frame 109 | MCMC_traj <- sapply(coupledchains_, function(x) x$samples1[(k+1):(m+1),]) 110 | length(as.numeric(MCMC_traj)) 111 | mean(MCMC_traj) 112 | 113 | 114 | hist1 <- histogram_c_chains(coupledchains_, component = 1, k = 150, m = m) 115 | setmytheme() 116 | plot_histogram(hist1) 117 | 118 | -------------------------------------------------------------------------------- /inst/reproducebaseball/baseball.mcmc.run.R: -------------------------------------------------------------------------------- 1 | # load packages 2 | library(unbiasedmcmc) 3 | setmytheme() 4 | rm(list = ls()) 5 | set.seed(21) 6 | library(doParallel) 7 | library(doRNG) 8 | registerDoParallel(cores = detectCores()) 9 | 10 | # 11 | # this example is taken from Rosenthal "Parallel Computing and Monte Carlo algorithms", 2000. 12 | # The data are baseball player's batting averages, as in Efron and Morris 1975 or Morris 1983 13 | # In particular, the data are listed in Morris 1983, and available as part of Rosenthal's online code: 14 | # http://probability.ca/jeff/comp/james.c 15 | 16 | Y <- c(0.395, 0.375, 0.355, 0.334, 0.313, 0.313, 0.291, 0.269, 0.247, 0.247, 0.224, 0.224, 17 | 0.224, 0.224, 0.224, 0.200, 0.175, 0.148) 18 | ndata <- length(Y) 19 | 20 | # The data are Y_i for i in {1,...,K}, with here K = 18. 21 | # The model specifies Y_i ~ Normal(theta_i, V), with V known and fixed to 0.00434 22 | V <- 0.00434 23 | # The prior is theta_i ~ Normal(mu, A), with mu ~ flat prior, and A ~ InverseGamma(a,b) 24 | # with density proportional to exp(-b/x) x^{-a-1}; Rosenthal chooses a = -1, b = 2 25 | a <- -1 26 | b <- 2 27 | # To target the posterior distribution, we follow Rosenthal and consider the following Gibbs sampler. 28 | # A given rest: IG(a + (K-1)/2, b + (sum_i=1^K (theta_i - theta_bar)^2)/2) 29 | # mu given rest: Normal(theta_bar, A / K) 30 | # theta_i given rest: Normal( (mu * V + Y_i * A) / (V + A), A * V / (V + A)) 31 | # ... where theta_bar is the average of the theta_i, for i in {1,...,K} 32 | 33 | 34 | # we store the parameters as (mu, A, theta_1, ..., theta_K) so the parameter space is of dimension 20 35 | # the initialization comes from Rosenthal (except the initialization of mu and A which is irrelevant) 36 | rinit <- function(){ 37 | return(list(chain_state = c(0,1,rep(mean(Y), ndata)))) 38 | } 39 | # here is the code for the Gibbs sampler 40 | single_kernel <- function(state){ 41 | theta <- state$chain_state[3:(ndata+2)] 42 | theta_bar <- mean(theta) 43 | # update of A given rest 44 | A <- rinversegamma(1, a + 0.5 * (ndata-1), b + 0.5 * sum((theta - theta_bar)^2)) 45 | # update of mu given rest 46 | mu <- rnorm(1, theta_bar, sqrt(A/ndata)) 47 | # update of each theta_i 48 | theta <- rnorm(ndata, (mu * V + Y * A) / (V + A), sqrt(A * V / (V + A))) 49 | return(list(chain_state = c(mu, A, theta))) 50 | } 51 | 52 | ## Modify niterations 53 | niterations <- 5e5 54 | chain <- matrix(nrow = niterations, ncol = ndata+2) 55 | state <- rinit() 56 | chain[1,] <- state$chain_state 57 | for (iteration in 2:niterations){ 58 | state <- single_kernel(state) 59 | chain[iteration,] <- state$chain_state 60 | } 61 | save(niterations, chain, file = "baseball.mcmc.RData") 62 | load(file = "baseball.mcmc.RData") 63 | -------------------------------------------------------------------------------- /inst/reproducebaseball/run.all.R: -------------------------------------------------------------------------------- 1 | ## set working directory to the directory containing this script 2 | 3 | print("run long MCMC") 4 | source("baseball.mcmc.run.R") 5 | 6 | print("run coupled chains with m=100") 7 | source("baseball.run.R") 8 | 9 | print("produce plots") 10 | source("baseball.plots.R") 11 | -------------------------------------------------------------------------------- /inst/reproducebayesianlasso/bayesianlasso.mcmc.run.R: -------------------------------------------------------------------------------- 1 | # load packages 2 | library(unbiasedmcmc) 3 | library(coda) 4 | # 5 | rm(list = ls()) 6 | set.seed(1) 7 | library(doParallel) 8 | library(doRNG) 9 | registerDoParallel(cores = detectCores() - 1) 10 | 11 | data(diabetes) 12 | X <- scale(diabetes$x2) 13 | Y <- matrix(scale(diabetes$y), ncol = 1) 14 | p <- ncol(X) 15 | 16 | 17 | mcmc_blasso <- function(nmcmc, burnin, lambda){ 18 | pb <- get_blasso(Y, X, lambda) 19 | state <- pb$rinit() 20 | chain <- matrix(nrow = nmcmc, ncol = length(state$chain_state)) 21 | for (imcmc in 1:nmcmc){ 22 | state <- pb$gibbs_kernel(state) 23 | chain[imcmc,] <- state$chain_state 24 | } 25 | return(chain) 26 | } 27 | 28 | 29 | ## Modify nmcmc 30 | nmcmc <- 50000 31 | burnin <- floor(nmcmc / 10) 32 | 33 | # result <- mcmc_blasso(nmcmc, burnin, .1) 34 | # # matplot(result[,1:10], type = "l") 35 | # postmeans <- colMeans(result[burnin:nmcmc,]) 36 | # ess <- effectiveSize(result[burnin:nmcmc,1:p]) 37 | 38 | lambdas <- 10^(seq(from = -2, to = 3, length.out = 25)) 39 | df <- foreach (ilambda = 1:length(lambdas), .combine = rbind) %dorng% { 40 | lambda <- lambdas[ilambda] 41 | print(lambda) 42 | result <- mcmc_blasso(nmcmc, burnin, lambda) 43 | postmeans <- colMeans(result[burnin:nmcmc,1:p]) 44 | ess <- effectiveSize(result[burnin:nmcmc,1:p]) 45 | 46 | data.frame(ilambda = rep(ilambda, p), lambda = rep(lambda, p), component = 1:p, 47 | ess = ess, postmeans = postmeans) 48 | } 49 | save(df, nmcmc, burnin, lambdas, file = "bayesianlasso.mcmc.RData") 50 | 51 | 52 | -------------------------------------------------------------------------------- /inst/reproducebayesianlasso/bayesianlasso.plots.R: -------------------------------------------------------------------------------- 1 | # load packages 2 | library(unbiasedmcmc) 3 | library(latex2exp) 4 | library(dplyr) 5 | setmytheme() 6 | rm(list = ls()) 7 | set.seed(21) 8 | 9 | data(diabetes) 10 | X <- scale(diabetes$x2) 11 | Y <- matrix(scale(diabetes$y), ncol = 1) 12 | p <- ncol(X) 13 | 14 | # lambdas <- 10^(seq(from = -2, to = 3, length.out = 25)) 15 | 16 | ## meeting times as a function of lambda 17 | load("bayesianlasso.meetings.RData") 18 | head(df) 19 | meetingquantile <- df %>% group_by(ilambda) %>% summarise(lambda = mean(lambda), mean = mean(meetingtime), q99 = quantile(meetingtime, probs = 0.99)) 20 | meetingquantile %>% head 21 | g <- ggplot(meetingquantile, aes(x = lambda, y = mean)) + geom_point() 22 | g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4)) 23 | g <- g + scale_y_log10(breaks = c(10, 100, 1000, 5500)) 24 | g <- g + xlab(expression(lambda)) + ylab("average meeting time") 25 | g 26 | ggsave(filename = "bayesianlasso.meetings.pdf", plot = g, width = 8, height = 6) 27 | 28 | ## effective sample sizes as a function of lambda 29 | load("bayesianlasso.mcmc.RData") 30 | g <- ggplot(df, aes(x = lambda, y = ess / (nmcmc - burnin + 1))) + geom_point() 31 | g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4)) 32 | g <- g + xlab(expression(lambda)) + ylab("effective sample sizes") 33 | g 34 | ggsave(filename = "bayesianlasso.ess.pdf", plot = g, width = 8, height = 6) 35 | 36 | ## posterior mean of beta as function of lambda 37 | load("bayesianlasso.estimators.RData") 38 | summary.df <- df %>% group_by(ilambda, lambda, component) %>% summarise(m = mean(estimator), sd = sd(estimator), nsamples = n()) 39 | summary.df$component %>% unique 40 | summary.df$nsamples %>% unique 41 | 42 | load("bayesianlasso.extraestimators.RData") 43 | summary.extra.df <- rbind(df, extra.df) %>% group_by(ilambda, lambda, component) %>% summarise(m = mean(estimator), sd = sd(estimator), nsamples = n()) 44 | 45 | # g <- ggplot(summary.df %>% filter(component <= 64), aes(x = lambda, y = sd/sqrt(nsamples), group = component)) + geom_line() + geom_point() 46 | # g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4)) + theme(legend.position = "none") 47 | # g <- g + ylim(0,0.4) 48 | # g 49 | # 50 | # g <- ggplot(summary.extra.df %>% filter(component <= 64), aes(x = lambda, y = sd/sqrt(nsamples), group = component)) + geom_line() + geom_point() 51 | # g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4)) + theme(legend.position = "none") 52 | # g <- g + ylim(0,0.4) 53 | # g 54 | 55 | g <- ggplot(summary.df %>% filter(component <= 64), aes(x = lambda, y = m, group = component)) + geom_line() + geom_point() 56 | g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4)) + theme(legend.position = "none") 57 | g <- g + geom_segment(aes(y = m - 2*sd / sqrt(nsamples), yend = m + 2*sd / sqrt(nsamples), xend = lambda)) 58 | g <- g + xlab(expression(lambda)) + ylab(TeX("$E_{\\lambda} \\lbrack \\beta | Y,X \\rbrack $")) 59 | g <- g + ylim(-5,4) 60 | g 61 | ggsave(filename = "bayesianlasso.paths.pdf", plot = g, width = 8, height = 6) 62 | 63 | g <- ggplot(summary.extra.df %>% filter(component <= 64), aes(x = lambda, y = m, group = component)) + geom_line() + geom_point() 64 | g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4)) + theme(legend.position = "none") 65 | g <- g + geom_segment(aes(y = m - 2*sd / sqrt(nsamples), yend = m + 2*sd / sqrt(nsamples), xend = lambda)) 66 | g <- g + xlab(expression(lambda)) + ylab(TeX("$E_{\\lambda} \\lbrack \\beta | Y,X \\rbrack $")) 67 | g <- g + ylim(-5,4) 68 | g 69 | ggsave(filename = "bayesianlasso.refinedpaths.pdf", plot = g, width = 8, height = 6) 70 | 71 | -------------------------------------------------------------------------------- /inst/reproducebayesianlasso/bayesianlasso.run.R: -------------------------------------------------------------------------------- 1 | # load packages 2 | library(unbiasedmcmc) 3 | setmytheme() 4 | rm(list = ls()) 5 | set.seed(21) 6 | library(doParallel) 7 | library(doRNG) 8 | library(dplyr) 9 | 10 | registerDoParallel(cores = detectCores()-2) 11 | 12 | data(diabetes) 13 | X <- scale(diabetes$x2) 14 | Y <- matrix(scale(diabetes$y), ncol = 1) 15 | p <- ncol(X) 16 | 17 | ## Modify lambdas 18 | # lambdas <- 10^(seq(from = -2, to = 3, length.out = 25)) 19 | lambdas <- 10^(seq(from = -2, to = 1, length.out = 15)) 20 | 21 | 22 | nrep <- 100 23 | df <- data.frame() 24 | for (ilambda in 1:length(lambdas)){ 25 | print(ilambda) 26 | lambda <- lambdas[ilambda] 27 | pb <- get_blasso(Y, X, lambda) 28 | meetings <- foreach(irep = 1:nrep) %dorng% { 29 | sample_meetingtime(pb$gibbs_kernel, pb$coupled_gibbs_kernel, pb$rinit) 30 | } 31 | meetingtimes <- sapply(meetings, function(x) x$meetingtime) 32 | df <- rbind(df, data.frame(ilambda = ilambda, lambda = lambda, rep = 1:nrep, meetingtime = meetingtimes)) 33 | save(df, lambdas, nrep, file = "bayesianlasso.meetings.RData") 34 | } 35 | save(df, lambdas, nrep, file = "bayesianlasso.meetings.RData") 36 | 37 | load("bayesianlasso.meetings.RData") 38 | head(df) 39 | meetingquantile <- df %>% group_by(ilambda) %>% summarise(mean = mean(meetingtime), q99 = quantile(meetingtime, probs = 0.99)) 40 | meetingquantile$q99 41 | 42 | 43 | df <- data.frame() 44 | for (ilambda in 1:length(lambdas)){ 45 | print(ilambda) 46 | lambda <- lambdas[ilambda] 47 | k <- meetingquantile$q99[ilambda] 48 | m <- 10 * k 49 | pb <- get_blasso(Y, X, lambda) 50 | ues <- foreach(irep = 1:nrep) %dorng% { 51 | sample_unbiasedestimator(pb$gibbs_kernel, pb$coupled_gibbs_kernel, pb$rinit, h = function(x) x, k = k, m = m) 52 | } 53 | for (irep in 1:nrep){ 54 | df <- rbind(df, data.frame(ilambda = ilambda, lambda = lambda, rep = rep(irep, (2*p+1)), 55 | component = 1:(2*p+1), estimator = ues[[irep]]$uestimator)) 56 | } 57 | save(df, lambdas, nrep, file = "bayesianlasso.estimators.RData") 58 | } 59 | save(df, lambdas, nrep, file = "bayesianlasso.estimators.RData") 60 | 61 | load("bayesianlasso.estimators.RData") 62 | summary.df <- df %>% group_by(ilambda, lambda, component) %>% summarise(m = mean(estimator), sd = sd(estimator), nsamples = n()) 63 | summary.df$component %>% unique 64 | summary.df$nsamples %>% unique 65 | 66 | g <- ggplot(summary.df %>% filter(component <= 64), aes(x = lambda, y = m, group = component)) + geom_line() + scale_x_log10() 67 | g + geom_segment(aes(y = m - 2*sd / sqrt(nrep), yend = m + 2*sd / sqrt(nrep), xend = lambda)) 68 | 69 | 70 | # compute extra estimators for first 10 lambdas 71 | nrep <- 1000 72 | extra.df <- data.frame() 73 | for (ilambda in 1:10){ 74 | print(ilambda) 75 | lambda <- lambdas[ilambda] 76 | k <- meetingquantile$q99[ilambda] 77 | m <- 10 * k 78 | pb <- get_blasso(Y, X, lambda) 79 | ues <- foreach(irep = 1:nrep) %dorng% { 80 | sample_unbiasedestimator(pb$gibbs_kernel, pb$coupled_gibbs_kernel, pb$rinit, h = function(x) x, k = k, m = m) 81 | } 82 | # meetingtimes <- sapply(ues, function(x) x$meetingtime) 83 | for (irep in 1:nrep){ 84 | extra.df <- rbind(extra.df, data.frame(ilambda = ilambda, lambda = lambda, rep = rep(irep, (2*p+1)), 85 | component = 1:(2*p+1), estimator = ues[[irep]]$uestimator)) 86 | } 87 | save(extra.df, file = "bayesianlasso.extraestimators.RData") 88 | } 89 | save(extra.df, file = "bayesianlasso.extraestimators.RData") 90 | 91 | load("bayesianlasso.extraestimators.RData") 92 | 93 | summary.extra.df <- rbind(df, extra.df) %>% group_by(ilambda, lambda, component) %>% summarise(m = mean(estimator), sd = sd(estimator), nsamples = n()) 94 | summary.extra.df$nsamples %>% unique 95 | 96 | g <- ggplot(summary.extra.df %>% filter(component <= 64), aes(x = lambda, y = m, group = component)) + geom_line() + scale_x_log10() 97 | g + geom_segment(aes(y = m - 2*sd / sqrt(nsamples), yend = m + 2*sd / sqrt(nsamples), xend = lambda)) 98 | 99 | # g <- ggplot(summary.extra.df %>% filter(component <= 64), aes(x = ilambda, y = m, group = component)) + geom_line() + scale_x_log10() 100 | # g + geom_segment(aes(y = m - 2*sd / sqrt(nsamples), yend = m + 2*sd / sqrt(nsamples), xend = ilambda)) 101 | -------------------------------------------------------------------------------- /inst/reproducebayesianlasso/run.all.R: -------------------------------------------------------------------------------- 1 | # set working directory to folder containing this file 2 | 3 | print("run various coupled MCMC") 4 | source("bayesianlasso.run.R") 5 | 6 | print("run long MCMC") 7 | source("bayesianlasso.mcmc.run.R") 8 | # this is used to get effective sample size as a function of lambda 9 | 10 | print("produce plots") 11 | source("bayesianlasso.plots.R") 12 | 13 | -------------------------------------------------------------------------------- /inst/reproducebimodal/bimodal.mcmc.run.R: -------------------------------------------------------------------------------- 1 | # load packages 2 | library(unbiasedmcmc) 3 | rm(list = ls()) 4 | set.seed(21) 5 | 6 | # 7 | target <- function(x){ 8 | evals <- log(0.5) + dnorm(x, mean = c(-4, 4), sd = 1, log = TRUE) 9 | return(max(evals) + log(sum(exp(evals - max(evals))))) 10 | } 11 | # 12 | get_pb <- function(sd_proposal, initmean, initsd){ 13 | single_kernel <- function(state){ 14 | chain_state <- state$chain_state 15 | current_pdf <- state$current_pdf 16 | proposal_value <- rnorm(1, mean=chain_state, sd=sd_proposal) 17 | proposal_pdf <- target(proposal_value) 18 | accept <- (log(runif(1)) < (proposal_pdf - current_pdf)) 19 | if (accept){ 20 | return(list(chain_state = proposal_value, current_pdf = proposal_pdf)) 21 | } else { 22 | return(list(chain_state = chain_state, current_pdf = current_pdf)) 23 | } 24 | } 25 | rinit <- function(){ 26 | chain_state <- rnorm(1, initmean, initsd) 27 | current_pdf <- target(chain_state) 28 | return(list(chain_state = chain_state, current_pdf = current_pdf)) 29 | } 30 | 31 | return(list(rinit = rinit, single_kernel = single_kernel)) 32 | } 33 | # 34 | 35 | # test function 36 | testfunction <- function(x) (x > 3) 37 | ## Modify nmcmc 38 | nmcmc <- 100000 39 | burnin <- 10000 40 | # easy setting: good proposal, but bad init 41 | pb <- get_pb(3, initmean = 10, initsd = 10) 42 | current <- pb$rinit() 43 | chain <- rep(0, nmcmc) 44 | for (imcmc in 1:nmcmc){ 45 | current <- pb$single_kernel(current) 46 | chain[imcmc] <- current$chain_state 47 | } 48 | 49 | 50 | library(coda) 51 | mcmcvar.easy <- spectrum0(sapply(chain[burnin:nmcmc], testfunction))$spec 52 | mcmcvar.easy 53 | mean(sapply(chain[burnin:nmcmc], testfunction)) 54 | save(nmcmc, mcmcvar.easy, file = "bimodal.mcmc.RData") 55 | 56 | 57 | # intermediate setting: bad proposal, good init 58 | pb <- get_pb(1, initmean = 10, initsd = 10) 59 | current <- pb$rinit() 60 | chain <- rep(0, nmcmc) 61 | for (imcmc in 1:nmcmc){ 62 | current <- pb$single_kernel(current) 63 | chain[imcmc] <- current$chain_state 64 | } 65 | library(coda) 66 | mcmcvar.intermediate <- spectrum0(sapply(chain[burnin:nmcmc], testfunction))$spec 67 | save(nmcmc, mcmcvar.easy, mcmcvar.intermediate, file = "bimodal.mcmc.RData") 68 | 69 | 70 | -------------------------------------------------------------------------------- /inst/reproducebimodal/run.all.R: -------------------------------------------------------------------------------- 1 | ## set working directory to the directory containing this script 2 | 3 | print("runs on bimodal target") 4 | 5 | print("generate meeting times for different MCMC proposals / different initial distribution") 6 | source("bimodal.run.R") 7 | 8 | print("generate long MCMC run") 9 | source("bimodal.mcmc.run.R") 10 | 11 | print("produce plots and tables") 12 | source("bimodal.plots.R") 13 | -------------------------------------------------------------------------------- /inst/reproduceepidemiology/plummer.cut.plots.R: -------------------------------------------------------------------------------- 1 | # load packages 2 | library(unbiasedmcmc) 3 | library(latex2exp) 4 | library(dplyr) 5 | setmytheme() 6 | rm(list = ls()) 7 | set.seed(21) 8 | library(doParallel) 9 | library(doRNG) 10 | registerDoParallel(cores = detectCores()) 11 | 12 | # 13 | # Plummer's example 14 | dimension <- 2 15 | 16 | filename <- "plummer.tuning.RData" 17 | load(file = filename) 18 | meetingtime <- sapply(c_chains_1, function(x) x$meetingtime) 19 | summary(meetingtime) 20 | hist(meetingtime) 21 | # from which we have found 22 | k 23 | m 24 | 25 | # ## 26 | load(file = "plummer.results.RData") 27 | nsamples 28 | k 29 | m 30 | 31 | meetingtime <- sapply(c_chains_2, function(x) x$meetingtime) 32 | summary(meetingtime) 33 | hist(meetingtime) 34 | # 35 | 36 | mean_estimators <- foreach(irep = 1:nsamples) %dorng% { 37 | H_bar(c_chains_2[[irep]], k = k, m = m) 38 | } 39 | 40 | square_estimators <- foreach(irep = 1:nsamples) %dorng% { 41 | H_bar(c_chains_2[[irep]], h = function(x) x^2, k = k, m = m) 42 | } 43 | 44 | est_mean <- rep(0, dimension) 45 | est_var <- rep(0, dimension) 46 | for (component in 1:dimension){ 47 | estimators <- sapply(mean_estimators, function(x) x[component]) 48 | est_mean[component] <- mean(estimators) 49 | cat("estimated mean: ", est_mean[component], "+/- ", 2*sd(estimators)/sqrt(nsamples), "\n") 50 | s_estimators <- sapply(square_estimators, function(x) x[component]) 51 | cat("estimated second moment: ", mean(s_estimators), "+/- ", 2*sd(s_estimators)/sqrt(nsamples), "\n") 52 | est_var[component] <- mean(s_estimators) - est_mean[component]^2 53 | cat("estimated variance: ", est_var[component], "\n") 54 | } 55 | 56 | nsamples <- length(c_chains_2) 57 | meetingtime <- sapply(c_chains_2, function(x) x$meetingtime) 58 | summary(meetingtime) 59 | 60 | # ggsave(filename = "plummer.meetingtimes.pdf", plot = g, width = 7, height = 7) 61 | gmeetingtime <- qplot(x = meetingtime, geom = "blank") + geom_histogram(aes(y = ..density..)) + xlab("meeting time") + ylab("density") 62 | gmeetingtime 63 | ggsave(filename = "plummer.meetingtimes.pdf", plot = gmeetingtime, width = 5, height = 5) 64 | 65 | sum(sapply(c_chains_2, function(x) x$iteration)) / nsamples 66 | 67 | ### cut distribution from tedious parallel MCMC 68 | histogram1 <- histogram_c_chains(c_chains_2, 1, k, m, nclass = 35) 69 | histogram2 <- histogram_c_chains(c_chains_2, 2, k, m, nclass = 30) 70 | load(file = "plummer.mcmc.RData") 71 | 72 | hist_mcmc <- hist(theta2s[,1], breaks = histogram1$breaks, plot = F) 73 | # hist_mcmc <- hist(theta2s[,1], plot = F) 74 | g1 <- plot_histogram(histogram1, with_bar = T) + xlab(TeX("$\\theta_{2,1}$")) + ylab("density") 75 | g1 <- g1 + geom_line(data=data.frame(x = hist_mcmc$mids, y = hist_mcmc$density), aes(x = x, y = y, xmin = NULL, xmax = NULL, ymin = NULL, ymax = NULL), colour = "red") 76 | g1 <- g1 + scale_x_continuous(breaks = c(-2.5, -2, -1.5)) 77 | g1 78 | ggsave("plummer.histogram1.pdf", plot = g1, width = 5, height = 5) 79 | 80 | hist_mcmc <- hist(theta2s[,2], breaks = histogram2$breaks, plot = F) 81 | g2 <- plot_histogram(histogram2, with_bar = T) + xlab(TeX("$\\theta_{2,2}$")) + ylab("density") 82 | g2 <- g2 + geom_line(aes(x = hist_mcmc$mids, y = hist_mcmc$density), colour = "red") 83 | g2 <- g2 + scale_x_continuous(breaks = c(10, 15, 20, 25)) 84 | g2 85 | ggsave("plummer.histogram2.pdf", plot = g2, width = 5, height = 5) 86 | -------------------------------------------------------------------------------- /inst/reproduceepidemiology/run.all.R: -------------------------------------------------------------------------------- 1 | ## set working directory to the directory containing this script 2 | 3 | print("runs on cut distribution") 4 | 5 | print("producing all results") 6 | source("plummer.cut.run.R") 7 | 8 | print("producing plots") 9 | source("plummer.cut.plots.R") 10 | -------------------------------------------------------------------------------- /inst/reproducegermancredit/germancredit.mcmc.run.R: -------------------------------------------------------------------------------- 1 | 2 | # load packages 3 | library(unbiasedmcmc) 4 | rm(list = ls()) 5 | set.seed(21) 6 | 7 | # 8 | ## This example is about the Polya Gamma Gibbs sampler for logistic regression models, as applied to the German credit data of Lichman 2013. 9 | 10 | data(germancredit) 11 | n <- nrow(X) 12 | p <- ncol(X) 13 | 14 | # prior 15 | b <- matrix(0, nrow = p, ncol = 1) 16 | B <- diag(10, p, p) 17 | logistic_setting <- logisticregression_precomputation(Y, X, b, B) 18 | # define MCMC transition kernel 19 | single_kernel <- function(chain_state, logistic_setting){ 20 | zs <- abs(logisticregression_xbeta(logistic_setting$X, t(chain_state))) 21 | w <- BayesLogit::rpg(logistic_setting$n, h=1, z=zs) 22 | res <- logisticregression_m_and_sigma(w, X, logistic_setting$invB, logistic_setting$KTkappaplusinvBtimesb) 23 | chain_state <- t(fast_rmvnorm_chol(1, res$m, res$Cholesky)) 24 | return(chain_state) 25 | } 26 | 27 | rinit <- function(){ 28 | t(fast_rmvnorm(1, mean = b, covariance = B)) 29 | } 30 | 31 | chain_state <- rinit() 32 | zs <- abs(logisticregression_xbeta(logistic_setting$X, t(chain_state))) 33 | w <- BayesLogit::rpg(logistic_setting$n, h=1, z=zs) 34 | res <- logisticregression_m_and_sigma(w, X, logistic_setting$invB, logistic_setting$KTkappaplusinvBtimesb) 35 | Sigma_ <- solve(res$Sigma_inverse) 36 | Sigma_alt <- t(res$Cholesky) %*% res$Cholesky 37 | 38 | Sigma_[1:5,1:5] 39 | Sigma_alt[1:5,1:5] 40 | 41 | mean_ <- res$m 42 | n <- 1e5 43 | xx_2 <- fast_rmvnorm_chol(n, mean_, res$Cholesky) 44 | colMeans(xx_2[,1:5]) 45 | mean_[1:5] 46 | cov(xx_2[,1:5]) 47 | Sigma_[1:5,1:5] 48 | # xx_3 <- fast_rmvnorm_chol(n, mean_, chol(Sigma_)) 49 | # colMeans(xx_3[,1:5]) 50 | # cov(xx_3[,11:15]) 51 | # Sigma_[11:15,11:15] 52 | # modify niterations 53 | 54 | niterations <- 100000 55 | 56 | chain <- matrix(nrow = niterations, ncol = p) 57 | chain[1,] <- rinit() 58 | for (iteration in 2:niterations){ 59 | chain[iteration,] <- single_kernel(chain[iteration-1,], logistic_setting) 60 | } 61 | save(niterations, chain, file = "germancredit.mcmc.RData") 62 | load("germancredit.mcmc.RData") 63 | matplot(chain[100:niterations,1:5], type = "l") 64 | 65 | idx <- which('Instalment.per.cent' == colnames(X)) 66 | hist(chain[100:niterations,idx]) 67 | -------------------------------------------------------------------------------- /inst/reproducegermancredit/germancredit.plots.R: -------------------------------------------------------------------------------- 1 | 2 | # load packages 3 | library(unbiasedmcmc) 4 | library(dplyr) 5 | setmytheme() 6 | rm(list = ls()) 7 | set.seed(21) 8 | library(doParallel) 9 | library(doRNG) 10 | registerDoParallel(cores = detectCores()) 11 | data(germancredit) 12 | load("germancredit.tuning.RData") 13 | 14 | #ndata <- 10 15 | 16 | 17 | # Plot histogram of meetingtimes 18 | nsamples <- meetingtime.list$nsamples 19 | meetingtimes <- sapply(X = meetingtime.list$meetings_1, FUN = function(x) x$meetingtime) 20 | #upper quantiles: 90% = 81, 95% = 96, 99% = 135. In the paper we consider 110. 21 | 22 | mt_df = data.frame(meetingtime=meetingtimes) 23 | head(mt_df) 24 | #mt_df <- data.frame(meetingtime=sapply(c_chains, function(x) x$meetingtime)) 25 | g <- ggplot(data=mt_df,aes(x=meetingtime))+geom_histogram(aes(y = ..density..), binwidth=10) 26 | g <- g + xlab("meeting time") 27 | g 28 | ggsave(filename="pgg.meetingtime.pdf", plot=g, width=7, height=7) 29 | 30 | 31 | 32 | # Plot distances and number met for a particular run 33 | g3 <- ggplot(df_w,aes(x=iter,y=nmet_w))+geom_line() + xlab('iteration') + ylab("count") 34 | g3 35 | ggsave(filename = "pgg.nmetw.pdf", plot = g3, width = 7, height = 7) 36 | 37 | g2 <- ggplot(df_w,aes(x=iter,y=dist_w))+geom_line() + xlab('iteration') + ylab('distance') #+ ylab(TeX("$||w_1-w_2||_2$")) 38 | g2 39 | ggsave(filename = "pgg.distw.pdf", plot = g2, width = 7, height = 7) 40 | 41 | g1 <- ggplot(df_beta,aes(x=iter,y=dist_beta))+geom_line() + xlab('iteration') + ylab('distance') #ylab(TeX("$||\\beta_1-\\beta_2||_2$")) 42 | g1 43 | ggsave(filename = "pgg.distbeta.pdf", plot = g1, width = 7, height = 7) 44 | 45 | 46 | 47 | 48 | # Plot efficiency as a function of k 49 | tuning.k.list$nsamples 50 | g <- qplot(x = tuning.k.list$ks, y = 1/(tuning.k.list$cost * tuning.k.list$v), geom = "line") #+ scale_y_continuous(breaks = c(0.2, 0.3, 0.4)) 51 | g <- g + geom_point() + ylab('asymptotic efficiency') + xlab("k") 52 | g 53 | ggsave(filename = "pgg.asympt_eff.pdf", plot = g, width = 7, height = 7) 54 | 55 | 56 | 57 | 58 | 59 | 60 | # Plot histogram 61 | 62 | load(file = "germancredit.c_chain.RData") 63 | load(file = "germancredit.mcmc.RData") 64 | 65 | idx1 <- which('Instalment.per.cent' == colnames(X)) 66 | idx2 <- which('Duration.in.Current.address' == colnames(X)) 67 | idx <- idx1 68 | 69 | nclass <- 27 #27 for idx1, 24 for idx2 70 | rng <- range(find_breaks(c_chains_, idx, nclass, k, m, lag = 1)) 71 | breaks = seq(rng[1],rng[2],length=nclass) 72 | 73 | histogram1 <- histogram_c_chains(c_chains_, idx, k, m, nclass = nclass) 74 | 75 | niterations <- nrow(chain) 76 | hist_mcmc <- hist(chain[1000:niterations, idx], breaks = histogram1$breaks, plot = FALSE) 77 | 78 | g1 <- plot_histogram(histogram1, with_bar = TRUE) + xlab(expression(beta)) + ylab("density") 79 | g1 <- g1 + geom_line(aes(x = hist_mcmc$mids, y = hist_mcmc$density), colour = "red") 80 | g1 81 | ggsave(filename = "pgg.histogram1.pdf", plot = g1, width = 7, height = 7) 82 | 83 | library(coda) 84 | testfunction <- function(x) x[idx1] 85 | mcmc_ <- apply(X = chain[1000:nrow(chain),,drop=F], MARGIN = 1, FUN = testfunction) 86 | mcmcvar <- spectrum0(mcmc_)$spec 87 | mcmcvar 88 | 89 | nsamples <- length(c_chains_) 90 | k <- 110 91 | m <- 1100 92 | taus <- sapply(c_chains_, FUN = function(x) x$meetingtime) 93 | estimators <- foreach(irep = 1:nsamples) %dorng% { 94 | H_bar(c_chains_[[irep]], h = testfunction, k = k, m = m) 95 | } 96 | v <- var(unlist(estimators)) 97 | c <- mean(2 * taus + pmax(1, m + 1 - taus)) 98 | inef <- c * v 99 | # loss of efficiency for this choice of k and m 100 | inef / mcmcvar 101 | # 102 | 103 | -------------------------------------------------------------------------------- /inst/reproducegermancredit/germancredit.preparedata.R: -------------------------------------------------------------------------------- 1 | # German Credit Data 2 | data <- read.csv('german_credit.csv') 3 | Y <- data[,"Creditability"] 4 | x.categorical <- c('Account.Balance', 'Payment.Status.of.Previous.Credit', 'Purpose', 'Value.Savings.Stocks', 5 | 'Length.of.current.employment', 'Sex...Marital.Status', 'Guarantors', 'Most.valuable.available.asset', 6 | 'Concurrent.Credits', 'Type.of.apartment', 'Occupation', 'Telephone', 'Foreign.Worker') 7 | x.quant <- c('Duration.of.Credit..month.', 'Credit.Amount', 'Instalment.per.cent', 'Duration.in.Current.address', 8 | 'Age..years.', 'No.of.Credits.at.this.Bank', 'No.of.dependents') 9 | for(x in x.categorical){ 10 | data[,x] = as.factor(data[,x]) 11 | } 12 | 13 | 14 | fmla <- paste('~',paste(c(x.quant,x.categorical),collapse ='+')) 15 | X <- model.matrix(formula(fmla), data=data) 16 | 17 | save(Y, X, file = "germancredit.RData") 18 | -------------------------------------------------------------------------------- /inst/reproducegermancredit/run.all.R: -------------------------------------------------------------------------------- 1 | # set working directory to folder containing this file 2 | 3 | 4 | print("run various coupled MCMC") 5 | source("germancredit.run.R") 6 | 7 | print("run long MCMC") 8 | source("germancredit.mcmc.run.R") 9 | 10 | print("produce plots") 11 | source("germancredit.plots.R") 12 | 13 | -------------------------------------------------------------------------------- /inst/reproduceisingmodel/ising.gibbs.meetings.run.R: -------------------------------------------------------------------------------- 1 | ### This script plays with a coupled Gibbs sampler (i.e. single site updates) 2 | ### for a basic Ising model, with different values of the temperatures 3 | library(unbiasedmcmc) 4 | rm(list = ls()) 5 | set.seed(21) 6 | # 7 | library(doRNG) 8 | library(doParallel) 9 | registerDoParallel(cores = detectCores()-2) 10 | # library(dplyr) 11 | # library(abind) 12 | 13 | # size of the grid 14 | size <- 32 15 | # possible values of sum of neighbors 16 | ss_ <- c(-4,-2,0,2,4) 17 | # inverse temperature 18 | # beta <- 0.4 19 | # precomputed probability for single-site flips 20 | # proba_ <- exp(ss_*beta) / (exp(ss_*beta) + exp(-ss_*beta)) 21 | # # initialization 22 | # chain_state1 <- ising_rinit() 23 | # chain_state2 <- ising_rinit() 24 | # # MCMC loop 25 | # niterations <- 1000 26 | # # store history of sum of states 27 | # sumstates1 <- rep(0, niterations) 28 | # sumstates1[1] <- unbiasedmcmc:::ising_sum_(chain_state1) 29 | # sumstates2 <- rep(0, niterations) 30 | # sumstates2[1] <- unbiasedmcmc:::ising_sum_(chain_state2) 31 | # 32 | # for (iter in 2:niterations){ 33 | # res_ <- ising_coupled_kernel(chain_state1, chain_state2, proba_) 34 | # chain_state1 <- res_$chain_state1 35 | # chain_state2 <- res_$chain_state2 36 | # sumstates1[iter] <- unbiasedmcmc:::ising_sum_(chain_state1) 37 | # sumstates2[iter] <- unbiasedmcmc:::ising_sum_(chain_state2) 38 | # } 39 | 40 | # matplot(cbind(sumstates1, sumstates2), type = "l") 41 | 42 | ising_meeting <- function(beta, m = 1, max_iterations = Inf){ 43 | ss_ <- c(-4,-2,0,2,4) 44 | proba_ <- exp(ss_*beta) / (exp(ss_*beta) + exp(-ss_*beta)) 45 | chain_state1 <- ising_rinit() 46 | chain_state2 <- ising_rinit() 47 | current_nsamples1 <- 1 48 | chain_state1 <- ising_single_kernel(chain_state1, proba_) 49 | current_nsamples1 <- current_nsamples1 + 1 50 | iter <- 1 51 | meet <- FALSE 52 | finished <- FALSE 53 | meetingtime <- Inf 54 | while (!finished && iter < max_iterations){ 55 | iter <- iter + 1 56 | if (meet){ 57 | chain_state1 <- ising_single_kernel(chain_state1, proba_) 58 | chain_state2 <- chain_state1 59 | } else { 60 | res_coupled_kernel <- ising_coupled_kernel(chain_state1, chain_state2, proba_) 61 | chain_state1 <- res_coupled_kernel$chain_state1 62 | chain_state2 <- res_coupled_kernel$chain_state2 63 | if (all(chain_state1 == chain_state2) && !meet){ 64 | # recording meeting time tau 65 | meet <- TRUE 66 | meetingtime <- iter 67 | } 68 | } 69 | current_nsamples1 <- current_nsamples1 + 1 70 | # stop after max(m, tau) steps 71 | if (iter >= max(meetingtime, m)){ 72 | finished <- TRUE 73 | } 74 | } 75 | return(list(meetingtime = meetingtime, iteration = iter, finished = finished)) 76 | } 77 | 78 | 79 | nrep <- 100 80 | # betas <- seq(from = 0.3, to = 0.55, length.out = 15) 81 | betas <- c(0.3, 0.317857142857143, 0.335714285714286, 0.353571428571429, 82 | 0.371428571428571, 0.389285714285714, 0.407142857142857, 0.425, 83 | 0.442857142857143, 0.460714285714286, 0.478571428571429) 84 | singlesite.meetings.df <- data.frame() 85 | for (ibeta in seq_along(betas)){ 86 | print(ibeta) 87 | beta <- betas[ibeta] 88 | ccs_ <- foreach(irep = 1:nrep) %dorng% { 89 | ising_meeting(beta, m = 1, max_iterations = Inf) 90 | } 91 | meetingtimes <- sapply(ccs_, function(x) x$meetingtime) 92 | singlesite.meetings.df <- rbind(singlesite.meetings.df, 93 | data.frame(meeting = meetingtimes, beta = rep(beta, nrep))) 94 | print(summary(meetingtimes)) 95 | save(nrep, betas, singlesite.meetings.df, file = "ising.singlesite.meetings.RData") 96 | } 97 | 98 | load(file = "ising.singlesite.meetings.RData") 99 | 100 | # tail(singlesite.meetings.df) 101 | # 102 | # setmytheme() 103 | # g <- ggplot(singlesite.meetings.df %>% group_by(beta) %>% summarise(m = mean(meeting)), 104 | # aes(x = beta, y = m)) + geom_line() + geom_point() + scale_y_log10() 105 | # g <- g + xlab(expression(theta)) + ylab("average meeting time") 106 | # g 107 | # 108 | # ggsave(filename = "ising.singlesite.meetings.pdf", plot = g, width = 8, height = 6) 109 | -------------------------------------------------------------------------------- /inst/reproduceisingmodel/ising.mcmc.run.R: -------------------------------------------------------------------------------- 1 | ### This script plays with parallel tempering 2 | ### for a basic Ising model, with different values of the temperatures 3 | ### i.e. Gibbs sampler at each temperature and swap moves between chains 4 | library(unbiasedmcmc) 5 | rm(list = ls()) 6 | set.seed(21) 7 | # 8 | library(doRNG) 9 | library(doParallel) 10 | registerDoParallel(cores = detectCores()-2) 11 | 12 | # size of the grid 13 | size <- 32 14 | # possible values of sum of neighbors 15 | ss_ <- c(-4,-2,0,2,4) 16 | 17 | nchains <- 16 18 | # number of iterations 19 | niterations <- 1e5 20 | # history 21 | history_sumstates <- matrix(0, ncol = nchains, nrow = niterations) 22 | # probability of doing a swap move 23 | proba_swapmove <- 0.01 24 | # inverse temperatures 25 | betas <- seq(from = 0.3, to = 0.55, length.out = nchains) 26 | # precomputed probability for single-site flips 27 | probas_ <- sapply(betas, function(beta) exp(ss_*beta) / (exp(ss_*beta) + exp(-ss_*beta))) 28 | # initialization 29 | current_states <- ising_pt_rinit(nchains) 30 | sumstates <- unlist(lapply(current_states, unbiasedmcmc:::ising_sum_)) 31 | history_sumstates[1,] <- sumstates 32 | nswap_attempts <- 0 33 | nswap_accepts <- rep(0, nchains-1) 34 | # MCMC loop 35 | for (iteration in 2:niterations){ 36 | if (iteration %% 10000 == 1) cat("iteration", iteration, "/", niterations, "\n") 37 | res_ <- ising_pt_single_kernel(current_states, sumstates, betas, probas_, proba_swapmove) 38 | current_states <- res_$chain_states 39 | sumstates <- res_$sumstates 40 | nswap_accepts <- nswap_accepts + res_$nswap_accepts 41 | nswap_attempts <- nswap_attempts + res_$nswap_attempts 42 | history_sumstates[iteration,] <- sumstates 43 | } 44 | save(nswap_accepts, nswap_attempts, history_sumstates, file = "ising.mcmc.RData") 45 | 46 | # load("ising.mcmc.RData") 47 | # niterations <- nrow(history_sumstates) 48 | # # swap acceptance in % 49 | # cat(paste0(round(100*nswap_accepts/nswap_attempts, 2), " %"), "\n") 50 | # par(mfrow = c(2,1)) 51 | # # traceplots 52 | # matplot(apply(history_sumstates[2000:niterations,1:5], 2, function(x) cumsum(x)/(1:(niterations-2000+1))), type = "l", ylab = "sum of states at different temperatures") 53 | # # plot estimated average natural parameters 54 | # plot(betas, colMeans(history_sumstates[2000:niterations,]), type = "l") 55 | # 56 | # par(mfrow = c(1,1)) 57 | # matplot(history_sumstates[2000:1e4,1:5], type = "l") 58 | # 59 | # library(coda) 60 | # vars <- sapply(1:nchains, function(i) spectrum0(history_sumstates[1e4:niterations,i])$spec) 61 | # sd_errors <- sqrt(vars/(niterations-1e4)) 62 | # sd_errors 63 | # estimates <- colMeans(history_sumstates[1e4:niterations,]) 64 | # ggplot(data.frame(betas = betas, estimates = estimates, sd_errors = sd_errors), 65 | # aes(x = betas, y = estimates, ymin = estimates-2*sd_errors, ymax=estimates+2*sd_errors)) + geom_errorbar() + 66 | # geom_line() 67 | -------------------------------------------------------------------------------- /inst/reproduceisingmodel/run.all.R: -------------------------------------------------------------------------------- 1 | ## set working directory to the directory containing this script 2 | 3 | print("obtain meeting times for single-site Gibbs, on a grid of values of beta") 4 | source("ising.gibbs.meetings.run.R") 5 | 6 | print("obtain meeting times for parallel tempering, on a grid of values of beta") 7 | source("ising.swap.meetings.run.R") 8 | 9 | print("long run of parallel tempering") 10 | source("ising.mcmc.run.R") 11 | 12 | ## to run the rest of the calculation you need to run "run.batch.ising.R" many times 13 | ## which will populate an output/ subfolder 14 | 15 | ## See explanations in "run.batch.ising.R" and "run.odyssey.ising.sh". 16 | 17 | ## Example of command to run the script 10 times, using GNU Parallel and 5 processors: 18 | ## parallel -j5 'R CMD BATCH --no-save "--args {1}" run.batch.ising.R out/meeting.job{1}.out' ::: {1..10} 19 | 20 | ## Once the batch scripts are run, we can proceed and create the plots. 21 | 22 | print("produce plots") 23 | source("ising.plots.R") 24 | 25 | -------------------------------------------------------------------------------- /inst/reproduceisingmodel/run.odyssey.ising.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -J ising_rep # Job name 3 | #SBATCH -n 1 # Number of cores 4 | #SBATCH -N 1 # All cores on one machine 5 | #SBATCH -t 0-24:00 # Runtime in D-HH:MM 6 | #SBATCH -p shared # Partition to submit to (general, shared, serial_requeue) *************** modify this line 7 | #SBATCH --mem-per-cpu=2000M # Memory pool for all cores (see also --mem-per-cpu) 8 | #SBATCH --mail-type=END # Type of email notification- BEGIN,END,FAIL,ALL 9 | #SBATCH --mail-user=username@fas.harvard.edu # Email *************** modify this line 10 | #SBATCH --array=1-100 # *************** modify this line 11 | 12 | ## LOAD SOFTWARE ENV ## 13 | source new-modules.sh 14 | module load R/3.4.2-fasrc01 15 | export R_LIBS_USER=$HOME/apps/R:$R_LIBS_USER 16 | input=run.batch.ising.R # *************** modify this line 17 | cd /n/home12/pjacob/isingmodel/ # *************** modify this line 18 | 19 | srun R CMD BATCH --no-save "--args $SLURM_ARRAY_TASK_ID" $input out/$input.$SLURM_ARRAY_TASK_ID.out #****** modify this line 20 | 21 | 22 | ### How to run the calculations on the Harvard Odyssey cluster? 23 | ### command to ssh to the cluster, e.g. 24 | # ssh username@odyssey.rc.fas.harvard.edu 25 | 26 | ### 27 | # First, install the 'unbiasedmcmc' package on the cluster (e.g. upload zip file and R CMD INSTALL). 28 | # Do this in an interactive session, e.g. 29 | #### 30 | # srun --pty -p test -t 20 --mem 3000 /bin/bash 31 | ### to have access to R: 32 | # source new-modules.sh 33 | # module load R/3.4.2-fasrc01 34 | # export R_LIBS_USER=$HOME/apps/R:$R_LIBS_USER 35 | # R CMD INSTALL package.tar.gz 36 | 37 | 38 | ### commands such as the following may be useful when it comes to transferring files 39 | # scp /home/bla*bla blabla@odyssey.rc.fas.harvard.edu:~/blabla/ 40 | # scp blabla@odyssey.rc.fas.harvard.edu:~/blabla/output/*RData /home/blabla/ 41 | ### see more details on: https://www.rc.fas.harvard.edu/resources/documentation/copying-data-to-and-from-odyssey-using-scp/ 42 | 43 | ### Create appropriate folders (subfolders named "output" and "out" in particular). 44 | ### tune run.batch.ising.R appropriately (it should save output in output/ subfolder). 45 | 46 | ### Make sure path names are OK in run.odyssey.ising.sh. 47 | ### choose partition, size of array (e.g. 1-500 for 500 jobs), runtime 48 | ### To start the jobs: 49 | # sbatch run.odyssey.ising.sh 50 | 51 | ### To monitor jobs, 52 | # sacct -j JOBID 53 | ### or 54 | # squeue -j 9999999 55 | ### Or go to the website https://portal.rc.fas.harvard.edu/jobs/ 56 | ### To cancel a job, 57 | # scancel 9999999 58 | -------------------------------------------------------------------------------- /inst/reproducepumpfailures/pumpfailures.mcmc.run.R: -------------------------------------------------------------------------------- 1 | 2 | # load packages 3 | library(unbiasedmcmc) 4 | setmytheme() 5 | rm(list = ls()) 6 | set.seed(21) 7 | registerDoParallel(cores = detectCores()) 8 | 9 | # 10 | ## This example is about failures of nuclear pumps. It's classic (e.g. Example 10.17 in Robert & Casella Monte Carlo Statistical Methods) 11 | ## It's used as an example in Murdoch and Green's perfect samplers paper and also Reutter and Johnson 1995 12 | ## about using coupled chains to monitor MCMC convergence 13 | 14 | # The data: 15 | # number of failures 16 | s <- c(5, 1, 5, 14, 3, 19, 1, 1, 4, 22) 17 | # times 18 | t <- c(94.3, 15.7, 62.9, 126, 5.24, 31.4, 1.05, 1.05, 2.1, 10.5) 19 | # the model says s_k ~ Poisson(lambda_k * t_k), for k = 1,...,10 20 | ndata <- 10 21 | # and lambda_k ~ Gamma(alpha,beta), beta ~ Gamma(gamma, delta) 22 | alpha <- 1.802 23 | gamma <- 0.01 24 | delta <- 1 25 | # full conditionasl: 26 | # lambda_k given rest: Gamma(alpha + s_k, beta + t_k) 27 | # beta given rest: Gamma(gamma + 10*alpha, delta + sum_{k=1}^10 lambda_k) 28 | 29 | single_kernel <- function(state){ 30 | lambda <- state$chain_state[1:ndata] 31 | beta <- state$chain_state[ndata+1] 32 | for (k in 1:ndata){ 33 | lambda[k] <- rgamma(1, shape = alpha + s[k], rate = beta + t[k]) 34 | } 35 | beta <- rgamma(1, shape = gamma + 10*alpha, rate = delta + sum(lambda)) 36 | return(list(chain_state = c(lambda, beta))) 37 | } 38 | 39 | rinit <- function(){ 40 | return(list(chain_state = rep(1, ndata+1))) 41 | } 42 | 43 | 44 | niterations <- 5e5 45 | chain <- matrix(nrow = niterations, ncol = ndata+1) 46 | current <- rinit() 47 | chain[1,] <- current$chain_state 48 | for (iteration in 2:niterations){ 49 | current <- single_kernel(current) 50 | chain[iteration,] <- current$chain_state 51 | } 52 | save(niterations, chain, file = "pump.mcmc.RData") 53 | # load("pump.mcmc.RData") 54 | # hist(chain[,ndata+1]) 55 | -------------------------------------------------------------------------------- /inst/reproducepumpfailures/pumpfailures.mykland.run.R: -------------------------------------------------------------------------------- 1 | # load packages 2 | 3 | ## This example is about failures of nuclear pumps. It's classic (e.g. Example 10.17 in Robert & Casella Monte Carlo Statistical Methods) 4 | ## It's used as an example in Murdoch and Green's perfect samplers paper and also Reutter and Johnson 1995 5 | ## about using coupled chains to monitor MCMC convergence 6 | 7 | # The data: 8 | # number of failures 9 | s <- c(5, 1, 5, 14, 3, 19, 1, 1, 4, 22) 10 | # times 11 | t <- c(94.3, 15.7, 62.9, 126, 5.24, 31.4, 1.05, 1.05, 2.1, 10.5) 12 | # the model says s_k ~ Poisson(lambda_k * t_k), for k = 1,...,10 13 | ndata <- 10 14 | # and lambda_k ~ Gamma(alpha,beta), beta ~ Gamma(gamma, delta) 15 | alpha <- 1.802 16 | gamma <- 0.01 17 | delta <- 1 18 | # full conditionasl: 19 | # lambda_k given rest: Gamma(alpha + s_k, beta + t_k) 20 | # beta given rest: Gamma(gamma + 10*alpha, delta + sum_{k=1}^10 lambda_k) 21 | 22 | p_D <- function(Lambda,d1,d2){ 23 | F_d1 <- pgamma(d1,gamma+10*alpha,Lambda+delta) 24 | F_d2 <- pgamma(d2,gamma+10*alpha,Lambda+delta) 25 | return(F_d2-F_d1) 26 | } 27 | 28 | d_function <- function(Lambda, Lambda_tilde, d1, d2){ 29 | if(Lambda% filter(target_type == "dense"), aes(x = d, y = mean_time, group = init_type, linetype = factor(init_type))) + geom_line() + ylab("average meeting time") 12 | g <- g + scale_linetype("initialization:") + scale_x_continuous(breaks = sort(unique((df %>% filter(target_type == "dense"))$d))) 13 | g <- g + scale_y_log10(breaks = c(10,100,1e3, 1e4)) + xlab("dimension") 14 | g 15 | ggsave(filename = "scalingdimension.rwmh.maxcoupling.pdf", plot = g, width = 8, height = 6) 16 | 17 | load(file = "scalingdimension.rwmh.reflmaxcoupling.RData") 18 | g <- ggplot(df, aes(x = d, y = mean_time, group = init_type, linetype = init_type)) + geom_line() + ylab("average meeting time") 19 | g <- g + scale_x_continuous(breaks = sort(unique((df %>% filter(target_type == "dense"))$d))) 20 | g <- g + xlab("dimension") + scale_linetype("initialization:") 21 | g 22 | ggsave(filename = "scalingdimension.rwmh.reflmaxcoupling.pdf", plot = g, width = 8, height = 6) 23 | 24 | load(file = "scalingdimension.gibbs.sparse.RData") 25 | df.sparse <- df 26 | load("scalingdimension.gibbs.dense.RData") 27 | df.dense <- df 28 | 29 | g <- ggplot(df.sparse, aes(x = d, y = mean_time, group = init_type, linetype = factor(init_type))) + geom_line() + ylab("average meeting time") 30 | g <- g + scale_linetype("initialization:") + scale_x_continuous(breaks = sort(unique(df.sparse$d))) + xlab("dimension") 31 | g 32 | ggsave(filename = "scalingdimension.gibbs.sparse.pdf", plot = g, width = 8, height = 6) 33 | 34 | g <- ggplot(df.dense, aes(x = d, y = median_time, group = init_type, linetype = factor(init_type))) + geom_line() + ylab("median meeting time") 35 | g <- g + scale_linetype("initialization:") + scale_x_continuous(breaks = sort(unique(df.dense$d))) + xlab("dimension") 36 | # g <- g + scale_y_log10(breaks = c(1e1, 1e2, 1e3, 1e4, 1e5)) 37 | g 38 | ggsave(filename = "scalingdimension.gibbs.dense.pdf", plot = g, width = 8, height = 6) 39 | 40 | load("scalingdimension.wishart.hmc.meetings.RData") 41 | df.summary <- df %>% group_by(dimension, init_type) %>% summarise(mean_time = mean(meetingtimes)) 42 | g <- ggplot(df.summary, aes(x = dimension, y = mean_time, group = init_type, linetype = factor(init_type))) + geom_line() + ylab("average meeting time") 43 | g <- g + scale_linetype("initialization:") + scale_x_continuous(breaks = sort(unique(df.summary$dimension))) + xlab("dimension") + ylim(0, 60) 44 | g 45 | 46 | ggsave(filename = "scalingdimension.hmc.dense.pdf", plot = g, width = 8, height = 6) 47 | 48 | -------------------------------------------------------------------------------- /inst/reproducevarselection/run.all.R: -------------------------------------------------------------------------------- 1 | ## set working directory to the directory containing this script 2 | 3 | 4 | print("generate synthetic data sets") 5 | source("varselection.generatedata.run.R") 6 | 7 | print("generate long MCMC chain") 8 | source("varselection.mcmc.run.R") 9 | 10 | 11 | ## The rest requires running batch scripts. 12 | 13 | ## The tables were obtained by running varselection.script.R using GNU parallel. 14 | ## The script takes 8 arguments: 15 | # JOB_ID 16 | # NRUNS 17 | # design # 0 for independent design, 1 for correlated 18 | # n # number of rows 19 | # p # number of columns 20 | # SNR # Signal to noise ratio, e.g. 0.5, 1, or 2 21 | # k # set to zero if only interested in meeting times 22 | # m # set to zero if only interested in meeting times 23 | 24 | # for instance to run the script on 10 machines, 10 times on each machine, 25 | # with either design=0 and design=1, with n=500, with p=1000 and p=5000, and with k=m=0 26 | 27 | # parallel -j10 'Rscript varselection.script.R {}' ::: {1..10} ::: 10 ::: {0,1} ::: 500 ::: {1000,5000} ::: {0.5,1,2} ::: 0 ::: 0 28 | 29 | ## Once this has been run with the desired values of n and p 30 | print("producing tables") 31 | source("varselection.tables.R") 32 | 33 | ## The figures were obtained as follows. 34 | 35 | ## For the impact of dimension 36 | print("producing results on the impact of dimension") 37 | source("varselection.differentp.R") 38 | source("varselection.differentp.plots.R") 39 | 40 | 41 | ## For the impact of the hyperparameter kappa 42 | ## First, the script "varselection.clusterscript.kappas.R" was run on a cluster. 43 | ## This was done by creating a script, say "run.odyssey.varselection.kappas.sh", which contains the following lines 44 | 45 | # #!/bin/bash 46 | # #SBATCH -J varselection_rep # Job name 47 | # #SBATCH -n 1 # Number of cores 48 | # #SBATCH -N 1 # All cores on one machine 49 | # #SBATCH -t 0-24:00 # Runtime in D-HH:MM 50 | # #SBATCH -p shared # Partition to submit to (general, shared, serial_requeue) 51 | # #SBATCH --mem-per-cpu=2000M # Memory pool for all cores (see also --mem-per-cpu) 52 | # #SBATCH --mail-type=END # Type of email notification- BEGIN,END,FAIL,ALL 53 | # #SBATCH --mail-user=username@provider.com # Email 54 | # #SBATCH --array=1-600 # Requesting 100 jobs 55 | # 56 | # ## LOAD SOFTWARE ENV ## 57 | # source new-modules.sh 58 | # module load R/3.4.2-fasrc01 59 | # export R_LIBS_USER=$HOME/apps/R:$R_LIBS_USER 60 | # input=varselection.differentkapps.R #****** modify this line 61 | # cd /n/home12/pjacob/ #****** modify this line 62 | # 63 | # srun R CMD BATCH --no-save $input out/$input.$SLURM_ARRAY_TASK_ID.out #****** modify this line 64 | 65 | 66 | ## and then running in a command line 67 | ## sbatch run.odyssey.varselection.kappas.sh 68 | 69 | ## Once these are produced, it remains to run long MCMC chains for comparison, 70 | ## and to produce the plots. This is done as follows. 71 | print("producing results on the impact of hyperparameter kappa") 72 | source("varselection.differentkappas.R") 73 | source("varselection.differentkappas.plots.R") 74 | -------------------------------------------------------------------------------- /inst/reproducevarselection/varselection.differentp.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | setmytheme() 3 | rm(list = ls()) 4 | set.seed(21) 5 | # 6 | library(doRNG) 7 | library(doParallel) 8 | library(tidyr) 9 | library(viridis) 10 | library(dplyr) 11 | registerDoParallel(cores = detectCores()-2) 12 | # 13 | 14 | # later put n = 500 and p = 1000 15 | n <- 500 16 | SNR <- 1 17 | # 18 | # load data 19 | load(paste0("varselection.dataSNR", SNR, ".RData")) 20 | X_full <- X 21 | Y_full <- Y 22 | kappa <- 2 23 | # subset data to desired size 24 | # 25 | s0 <- 100 26 | proportion_singleflip <- 0.5 27 | 28 | nrep <- 1000 29 | nps <- 5 30 | ps <- c(100, 250, 500, 750, 1000) 31 | df.ue <- data.frame() 32 | meetingsfilepath <- paste0("varselection.meetings.differentp.n", n, ".RData") 33 | 34 | for (ip in seq_along(ps)){ 35 | print(ip) 36 | p <- ps[ip] 37 | Y <- Y_full[1:n] 38 | X <- X_full[1:n,1:p] 39 | Y2 <- (t(Y) %*% Y)[1,1] 40 | g <- p^3 41 | # load model 42 | vs <- get_variableselection(Y,X,g,kappa,s0,proportion_singleflip) 43 | prior <- vs$prior 44 | marginal_likelihood <- vs$marginal_likelihood 45 | rinit <- vs$rinit 46 | single_kernel <- vs$single_kernel 47 | coupled_kernel <- vs$coupled_kernel 48 | unbiasedestimator <- vs$unbiasedestimator 49 | # Unbiased MCMC 50 | ues <- foreach(i = 1:nrep) %dorng% { 51 | unbiasedestimator(single_kernel, coupled_kernel, rinit, h = function(x) x, k = 0, m = 0) 52 | } 53 | meetings <- sapply(ues, function(x) x$meetingtime) 54 | df.ue <- rbind(df.ue, data.frame(irep = 1:nrep, ip = rep(ip, nrep), p = rep(p, nrep), meetings = meetings)) 55 | save(df.ue, file = meetingsfilepath) 56 | } 57 | 58 | load(meetingsfilepath) 59 | df.summary <- df.ue %>% group_by(ip,p) %>% summarise(m = mean(meetings)) %>% mutate(scaledm = m/p) 60 | 61 | g <- ggplot(df.ue, aes(x = p, y = meetings/p, group = p)) + geom_violin() + ylab("meeting times / p") 62 | g <- g + scale_x_continuous(breaks = ps) 63 | g 64 | 65 | ggsave(filename = "varselection.meetings.differentp.pdf", plot = g, width = 8, height = 6) 66 | 67 | 68 | -------------------------------------------------------------------------------- /inst/reproducevarselection/varselection.differentp.plots.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | setmytheme() 3 | rm(list = ls()) 4 | set.seed(21) 5 | # 6 | library(doRNG) 7 | library(doParallel) 8 | library(tidyr) 9 | library(viridis) 10 | library(dplyr) 11 | registerDoParallel(cores = detectCores()-2) 12 | # 13 | 14 | # later put n = 500 and p = 1000 15 | n <- 500 16 | SNR <- 1 17 | # 18 | # load data 19 | load(paste0("varselection.dataSNR", SNR, ".RData")) 20 | X_full <- X 21 | Y_full <- Y 22 | kappa <- 2 23 | # subset data to desired size 24 | # 25 | s0 <- 100 26 | proportion_singleflip <- 0.5 27 | 28 | nrep <- 1000 29 | nps <- 5 30 | ps <- c(100, 250, 500, 750, 1000) 31 | df.ue <- data.frame() 32 | meetingsfilepath <- paste0("varselection.meetings.differentp.n", n, ".RData") 33 | 34 | load(meetingsfilepath) 35 | df.summary <- df.ue %>% group_by(ip,p) %>% summarise(m = mean(meetings)) %>% mutate(scaledm = m/p) 36 | 37 | g <- ggplot(df.ue, aes(x = p, y = meetings/p, group = p)) + geom_violin() + ylab("meeting times / p") 38 | g <- g + scale_x_continuous(breaks = ps) 39 | g 40 | 41 | ggsave(filename = "varselection.meetings.differentp.pdf", plot = g, width = 8, height = 6) 42 | -------------------------------------------------------------------------------- /inst/reproducevarselection/varselection.generatedata.run.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | rm(list = ls()) 3 | set.seed(1) 4 | # registerDoParallel(cores = detectCores()) 5 | 6 | # simulate data 7 | n <- 1000 8 | p <- 5000 9 | for (SNR in c(0.5, 1, 2)){ 10 | s_star <- 10 11 | s0 <- 100 12 | sigma0 <- 1 13 | beta_star <- SNR * sqrt(sigma0^2 * log(p) / n) * c(2,-3,2,2,-3,3,-2,3,-2,3, rep(0, p-10)) 14 | # independent design 15 | X <- matrix(rnorm(n * p), nrow = n, ncol = p) # fast_rmvnorm_chol(n, rep(0, p), diag(1, p, p)) 16 | X <- scale(X) 17 | Y <- X %*% matrix(beta_star, ncol = 1) + rnorm(n, 0, sigma0) 18 | Y <- scale(Y) 19 | 20 | save(X, Y, beta_star, file = paste0("varselection.dataSNR", SNR, ".RData")) 21 | } 22 | # correlated design 23 | n <- 1000 24 | p <- 5000 25 | for (SNR in c(0.5, 1, 2)){ 26 | s_star <- 10 27 | s0 <- 100 28 | sigma0 <- 1 29 | beta_star <- SNR * sqrt(sigma0^2 * log(p) / n) * c(2,-3,2,2,-3,3,-2,3,-2,3, rep(0, p-10)) 30 | covariance <- matrix(0, nrow = p, ncol = p) 31 | for (i in 1:p){ 32 | for (j in 1:p){ 33 | covariance[i,j] <- exp(-abs(i-j)) 34 | } 35 | } 36 | 37 | X <- fast_rmvnorm(n, mean = rep(0, p), covariance) 38 | # X <- matrix(rnorm(n * p), nrow = n, ncol = p) # fast_rmvnorm_chol(n, rep(0, p), diag(1, p, p)) 39 | X <- scale(X) 40 | Y <- X %*% matrix(beta_star, ncol = 1) + rnorm(n, 0, sigma0) 41 | Y <- scale(Y) 42 | 43 | save(X, Y, beta_star, file = paste0("varselection.data.correlatedSNR", SNR, ".RData")) 44 | } 45 | -------------------------------------------------------------------------------- /inst/reproducevarselection/varselection.mcmc.run.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | # setmytheme() 3 | rm(list = ls()) 4 | set.seed(1) 5 | 6 | # file paths 7 | n <- 500 8 | SNR <- 1 9 | p <- 1000 10 | # 11 | load(paste0("varselection.dataSNR", SNR, ".RData")) 12 | 13 | Y <- Y[1:n] 14 | X <- X[1:n,1:p] 15 | Y2 <- (t(Y) %*% Y)[1,1] 16 | g <- p^3 17 | 18 | s0 <- 100 19 | kappa <- .1 20 | proportion_singleflip <- 0.5 21 | 22 | vs <- get_variableselection(Y,X,g,kappa,s0,proportion_singleflip) 23 | 24 | prior <- vs$prior 25 | marginal_likelihood <- vs$marginal_likelihood 26 | rinit <- vs$rinit 27 | single_kernel <- vs$single_kernel 28 | 29 | nmcmc <- 2e4 30 | 31 | current_gamma <- rinit() 32 | current_pdf <- marginal_likelihood(current_gamma) + prior(current_gamma) 33 | chain <- matrix(nrow = nmcmc, ncol = p) 34 | pdfs <- rep(0, nmcmc) 35 | chain[1,] <- current_gamma 36 | pdfs[1] <- current_pdf 37 | for (imcmc in 2:nmcmc){ 38 | result <- single_kernel(current_gamma, current_pdf) 39 | current_gamma <- result$state 40 | current_pdf <- result$pdf 41 | chain[imcmc,] <- current_gamma 42 | pdfs[imcmc] <- current_pdf 43 | } 44 | # 45 | 46 | plot(pdfs, type = "l") 47 | 48 | burnin <- nmcmc/2 49 | 50 | postburnin <- chain[burnin:nmcmc,] 51 | postmean <- colMeans(postburnin) 52 | 53 | # plot(postmean, ylim = c(0,1), type = "l") 54 | plot(postmean[1:20], ylim = c(0,1), type = "b") 55 | print(postmean[1:20]) 56 | 57 | library(coda) 58 | mcmcvar <- spectrum0(postburnin[,1:10]) 59 | mcmcvar 60 | # save(nmcmc, burnin, pdfs, postmean, mcmcvar, file = "varselection.SNR1.n500.p1000.mcmc.RData") 61 | # 62 | -------------------------------------------------------------------------------- /inst/reproducevarselection/varselection.script.R: -------------------------------------------------------------------------------- 1 | # Script that sets an initial seed using rlecuyer's package 2 | # and compute a number of estimators using that seed 3 | # 4 | arguments <- commandArgs(TRUE) 5 | if (length(arguments)!=8){ 6 | cat("needs 8 integers as arguments; JOB_ID, NRUNS, design, n, p, SNR, k, m") 7 | q() 8 | } 9 | 10 | ### 11 | # arguments <- c("1", "10", "0", 1000", "500", "2", "0", "0") 12 | ### means JOB_ID=1, NRUNS=10, design=0, n=1000, p=500, SNR=2, k=0, m=0 13 | 14 | JOB_ID <- as.numeric(arguments[1]) 15 | NRUNS <- as.numeric(arguments[2]) # number of runs for this job 16 | design <- as.numeric(arguments[3]) # 0 for independent design, 1 for correlated 17 | n <- as.numeric(arguments[4]) 18 | p <- as.numeric(arguments[5]) 19 | SNR <- as.numeric(arguments[6]) 20 | k <- as.numeric(arguments[7]) 21 | m <- as.numeric(arguments[8]) 22 | 23 | # file paths 24 | workingdirectory <- "" 25 | resultpath <- paste0("varselection.design", design, ".SNR", SNR, ".n", n, ".p", p, ".k", k, ".m", m, ".job", JOB_ID, ".RData") 26 | # 27 | setwd(workingdirectory) 28 | # load packages 29 | library(rlecuyer) 30 | library(unbiasedmcmc) 31 | setmytheme() 32 | # initial seed 33 | .lec.SetPackageSeed(c(42, 66, 101, 123454, 7, 54321)) 34 | nstream <- 1000 # number larger than total # of processors expected to run this script 35 | stream.names <- paste(1:nstream) 36 | .lec.CreateStream(stream.names) 37 | .lec.CurrentStream(paste(JOB_ID)) 38 | 39 | ### beginning of job 40 | ### load data (generated by "varselection.generatedata.R") 41 | 42 | if (design == 0){ 43 | load(paste0("varselection.dataSNR", SNR, ".RData")) 44 | } else { 45 | load(paste0("varselection.data.correlatedSNR", SNR, ".RData")) 46 | } 47 | Ysub <- Y[1:n] 48 | Xsub <- X[1:n,1:p] 49 | Y2 <- (t(Ysub) %*% Ysub)[1,1] 50 | g <- p^3 51 | s0 <- 100 52 | kappa <- 2 53 | proportion_singleflip <- 0.5 54 | 55 | vs <- get_variableselection(Ysub,Xsub,g,kappa,s0,proportion_singleflip) 56 | 57 | # load functions to generate meeting times 58 | prior <- vs$prior 59 | marginal_likelihood <- vs$marginal_likelihood 60 | rinit <- vs$rinit 61 | single_kernel <- vs$single_kernel 62 | coupled_kernel <- vs$coupled_kernel 63 | coupled_chains <- vs$coupled_chains 64 | unbiasedestimator <- vs$unbiasedestimator 65 | 66 | result <- list() 67 | for (irun in 1:NRUNS){ 68 | cat("Run #", irun, "\n") 69 | ue <- unbiasedestimator(single_kernel, coupled_kernel, rinit, h = function(x) x, k = k, m = m) 70 | result[[irun]] <- list(irun = irun, JOB_ID = JOB_ID, 71 | mcmcestimator = ue$mcmcestimator, correction = ue$correction, 72 | uestimator = ue$uestimator, meetingtime = ue$meetingtime, niterations = ue$iteration) 73 | save(result, file = resultpath) 74 | } 75 | .lec.CurrentStreamEnd() 76 | -------------------------------------------------------------------------------- /inst/reproducevarselection/varselection.tables.R: -------------------------------------------------------------------------------- 1 | library(unbiasedmcmc) 2 | # setmytheme() 3 | library(dplyr) 4 | library(tidyr) 5 | rm(list = ls()) 6 | set.seed(1) 7 | 8 | # the function gets the results from all the jobs matching the 9 | # given parameters 10 | get_results <- function(design, SNR, n, p, k){ 11 | resultsfiles <- list.files(pattern = paste0("varselection.design", design, ".SNR", SNR, ".n", n, ".p", p, ".k", k, ".*")) 12 | results <- list() 13 | iresult <- 1 14 | resultsfiles 15 | nfiles <- length(resultsfiles) 16 | for (ifile in 1:nfiles){ 17 | load(resultsfiles[ifile]) 18 | cat("file", ifile, "/", nfiles, ", containing", length(result), "runs\n") 19 | for (ires in 1:length(result)){ 20 | results[[iresult]] <- result[[ires]] 21 | iresult <- iresult + 1 22 | } 23 | } 24 | return(results) 25 | } 26 | # 27 | 28 | designs <- c(0,1) 29 | SNRs <- c(0.5, 1, 2) 30 | ns <- c(500,1000) 31 | ps <- c(1000, 5000) 32 | k <- 0 33 | df_ <- data.frame() 34 | for (design in designs){ 35 | for (SNR in SNRs){ 36 | cat("SNR=", SNR, "\n") 37 | for (n in ns){ 38 | for (p in ps){ 39 | cat("p=", p, "\n") 40 | results <- get_results(design, SNR, n, p, k) 41 | meetings <- sapply(results, function(x) x$meetingtime) 42 | df_ <- rbind(df_, data.frame(design = design, SNR = SNR, n = n, p = p, meanmeeting = mean(meetings), sdmeeting = sd(meetings), nrep = length(meetings))) 43 | } 44 | } 45 | } 46 | } 47 | 48 | df_$sderror <- df_$sdmeeting/sqrt(df_$nrep) 49 | 50 | table.independent <- df_ %>% filter(design == 0) %>% select(SNR,n,p,meanmeeting) %>% arrange(n,SNR,p) %>% 51 | spread(SNR, meanmeeting) %>% setNames(c("n", "p", "SNR = 0.5", "SNR = 1", "SNR = 2")) 52 | 53 | library(xtable) 54 | cap <- "bla" 55 | formatted.df <- xtable(table.independent, digits = 0, caption = cap) 56 | formatted.df 57 | 58 | 59 | table.correlated <- df_ %>% filter(design == 1) %>% select(SNR,n,p,meanmeeting) %>% arrange(n,SNR,p) %>% 60 | spread(SNR, meanmeeting) %>% setNames(c("n", "p", "SNR = 0.5", "SNR = 1", "SNR = 2")) 61 | formatted.df <- xtable(table.correlated, digits = 0, caption = cap) 62 | formatted.df 63 | 64 | -------------------------------------------------------------------------------- /man/H_bar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/H_bar.R 3 | \name{H_bar} 4 | \alias{H_bar} 5 | \title{Compute unbiased estimators from coupled chains} 6 | \usage{ 7 | H_bar(c_chains, h = function(x) x, k = 0, m = 1) 8 | } 9 | \arguments{ 10 | \item{c_chains}{A list containing coupled chains generated by \code{\link{sample_coupled_chains}}.} 11 | 12 | \item{h}{A test function of interest, which should take a chain state ("chain_state" entry of the output of "rinit", for instance) 13 | and return a numeric vector} 14 | 15 | \item{k}{An integer at which to start computing the unbiased estimator; should be less than m} 16 | 17 | \item{m}{A time horizon: should be less than the length of the chains; typically the same 18 | m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value} 19 | } 20 | \value{ 21 | A value (or vector of values) of an unbiased estimator of \eqn{\int h(x) \pi(x) dx} 22 | } 23 | \description{ 24 | Compute unbiased estimators based on coupled chains. 25 | Presumably generated via \code{\link{sample_coupled_chains}}. 26 | 27 | The test function h should take a "chain_state" as argument and return a numeric vector. 28 | The estimand of interest is \eqn{\int h(x) \pi(x) dx}, where \eqn{\pi} is the invariant distribution 29 | of the chains. 30 | 31 | The lag is inferred from the coupled chains, so there's no argument to specify it. 32 | } 33 | -------------------------------------------------------------------------------- /man/c_chains_to_dataframe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/c_chains_to_dataframe.R 3 | \name{c_chains_to_dataframe} 4 | \alias{c_chains_to_dataframe} 5 | \title{Obtain data frame representation of measure from list of coupled chains} 6 | \usage{ 7 | c_chains_to_dataframe(c_chains, k, m, dopar = FALSE, prune = TRUE) 8 | } 9 | \arguments{ 10 | \item{c_chains}{A list containing coupled chains generated by \code{\link{sample_coupled_chains}}.} 11 | 12 | \item{k}{An integer at which to start computing the signed measure; should be less than m} 13 | 14 | \item{m}{A time horizon: should be less than the length of the chains; typically the same 15 | m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value} 16 | 17 | \item{dopar}{Boolean (default to FALSE) indicating whether to perform calculation using registered parallel cores} 18 | } 19 | \value{ 20 | A data frame 21 | } 22 | \description{ 23 | From coupled chains, 24 | presumably generated via \code{\link{sample_coupled_chains}}, 25 | and a choice of integers k and m, the function constructs 26 | a data frame representation of an empirical signed measure, i.e. 27 | 28 | \deqn{\hat{\pi}(dx) = \sum_{n=1}^N \omega_n \delta_{Z_n}(dx)} 29 | 30 | The function returns a data frame with first column "rep" indicating index of coupled chain, 31 | second column "MCMC" indicating whether atom is part of the "MCMC" part of the signed measure (1) or the bias correction part (0) 32 | third column "weight" indicating weights, 33 | remaining columns "atom.1", "atom.2", etc containing components of the atoms 34 | } 35 | -------------------------------------------------------------------------------- /man/c_chains_to_measure_as_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/c_chains_to_measure_as_list.R 3 | \name{c_chains_to_measure_as_list} 4 | \alias{c_chains_to_measure_as_list} 5 | \title{Obtain empirical measure (as list) from coupled chains} 6 | \usage{ 7 | c_chains_to_measure_as_list(c_chains, k, m) 8 | } 9 | \arguments{ 10 | \item{c_chains}{A list containing coupled chains generated by \code{\link{sample_coupled_chains}}.} 11 | 12 | \item{k}{An integer at which to start computing the signed measure; should be less than m} 13 | 14 | \item{m}{A time horizon: should be less than the length of the chains; typically the same 15 | m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value} 16 | } 17 | \value{ 18 | A list with "weights" and "atoms" 19 | } 20 | \description{ 21 | From coupled chains, 22 | presumably generated via \code{\link{sample_coupled_chains}}, 23 | and a choice of integers k and m, the function constructs 24 | a representation of an empirical signed measure, i.e. 25 | 26 | \deqn{\hat{\pi}(dx) = \sum_{n=1}^N \omega_n \delta_{Z_n}(dx)} 27 | 28 | The function returns the weights \eqn{\omega} and the atoms \eqn{Z}, 29 | in a list with two entries, "weights" and "atoms". 30 | } 31 | -------------------------------------------------------------------------------- /man/dinversegamma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/invgamma_couplings.R 3 | \name{dinversegamma} 4 | \alias{dinversegamma} 5 | \title{compute log-density of inverse gamma} 6 | \usage{ 7 | dinversegamma(x, alpha, beta) 8 | } 9 | \description{ 10 | at x > 0 and with given parameters alpha, beta, given by 11 | alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x 12 | } 13 | -------------------------------------------------------------------------------- /man/dinvgaussian.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/invgaussian_couplings.R 3 | \name{dinvgaussian} 4 | \alias{dinvgaussian} 5 | \title{Log-density of inverse Gaussian} 6 | \usage{ 7 | dinvgaussian(x, mu, lambda) 8 | } 9 | \value{ 10 | A vector of n log-pdf values, one for each element in the first argument 'x'. 11 | } 12 | \description{ 13 | Computes log-pdf at x > 0 of inverse Gaussian with given parameters mu, lambda, given by 14 | \deqn{0.5 * log(\lambda/(2*\pi)) - 1.5 * log(x) - \lambda * (x-\mu)^2 / (2 * \mu^2 * x)} 15 | } 16 | -------------------------------------------------------------------------------- /man/expit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{expit} 4 | \alias{expit} 5 | \title{expit} 6 | \usage{ 7 | expit(z) 8 | } 9 | \description{ 10 | expit function 11 | } 12 | -------------------------------------------------------------------------------- /man/fast_dmvnorm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mvnorm.R 3 | \name{fast_dmvnorm} 4 | \alias{fast_dmvnorm} 5 | \title{fast_dmvnorm} 6 | \usage{ 7 | fast_dmvnorm(x, mean, covariance) 8 | } 9 | \arguments{ 10 | \item{x}{A matrix of size n times d} 11 | 12 | \item{mean}{A vector of size d specifying the mean vector of the multivariate Normal} 13 | 14 | \item{covariance}{A matrix of size d x d specifying the covariance matrix of the multivariate Normal} 15 | } 16 | \value{ 17 | A vector of n evaluations of the multivariate Normal log-pdf, one for each row of \code{x} 18 | } 19 | \description{ 20 | Compute multivariate Normal density (log-value) evaluated at each row of a given matrix. The function does not check 21 | the arguments, use at your own risk. 22 | } 23 | \examples{ 24 | x <- fast_rmvnorm(2, rep(0, 5), diag(1,5,5)) 25 | fast_dmvnorm(x, rep(0, 5), diag(1,5,5)) 26 | } 27 | -------------------------------------------------------------------------------- /man/fast_dmvnorm_chol_inverse.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mvnorm.R 3 | \name{fast_dmvnorm_chol_inverse} 4 | \alias{fast_dmvnorm_chol_inverse} 5 | \title{fast_dmvnorm_chol_inverse} 6 | \usage{ 7 | fast_dmvnorm_chol_inverse(x, mean, chol_inverse) 8 | } 9 | \arguments{ 10 | \item{x}{A matrix of size n times d} 11 | 12 | \item{mean}{A vector of size d specifying the mean vector of the multivariate Normal} 13 | 14 | \item{chol_inverse}{A matrix of size d x d specifying the inverse of the upper-triangular Cholesky 15 | factor of the covariance matrix of the multivariate Normal, 16 | for instance obtained using \code{solve(chol(Sigma))}} 17 | } 18 | \value{ 19 | A vector of n evaluations of the multivariate Normal log-pdf, one for each row of \code{x} 20 | } 21 | \description{ 22 | Compute multivariate Normal density (log-value) evaluated at each row of a given matrix. The function does not check 23 | the arguments, use at your own risk. 24 | } 25 | \examples{ 26 | Sigma <- diag(1, 5, 5) 27 | Sigma[1,2] <- Sigma[2,1] <- 0.3 28 | Sigma_chol <- chol(Sigma) 29 | x <- fast_rmvnorm_chol(2, rep(0, 5), Sigma_chol) 30 | fast_dmvnorm_chol_inverse(x, rep(0, 5), solve(Sigma_chol)) 31 | } 32 | -------------------------------------------------------------------------------- /man/fast_rmvnorm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mvnorm.R 3 | \name{fast_rmvnorm} 4 | \alias{fast_rmvnorm} 5 | \title{fast_rmvnorm} 6 | \usage{ 7 | fast_rmvnorm(n, mean, covariance) 8 | } 9 | \arguments{ 10 | \item{n}{An integer >= 1 specifying the desired number of draws} 11 | 12 | \item{mean}{A vector of size d specifying the mean vector of the multivariate Normal} 13 | 14 | \item{covariance}{A matrix of size d x d specifying the covariance matrix of the multivariate Normal} 15 | } 16 | \value{ 17 | A matrix of size n x d containing n d-dimensional multivariate Normal draws (one per row) 18 | } 19 | \description{ 20 | Generate multivariate Normal draws. The function does not check 21 | the arguments, use at your own risk. 22 | } 23 | \examples{ 24 | fast_rmvnorm(2, rep(0, 5), diag(1, 5, 5)) 25 | } 26 | -------------------------------------------------------------------------------- /man/fast_rmvnorm_chol.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mvnorm.R 3 | \name{fast_rmvnorm_chol} 4 | \alias{fast_rmvnorm_chol} 5 | \title{fast_rmvnorm_chol} 6 | \usage{ 7 | fast_rmvnorm_chol(nparticles, mean, chol) 8 | } 9 | \arguments{ 10 | \item{mean}{A vector of size d specifying the mean vector of the multivariate Normal} 11 | 12 | \item{chol}{A matrix of size d x d specifying the upper triangular Cholesky factor 13 | of the covariance matrix of the multivariate Normal target, 14 | for instance obtained using the \code{\link[base]{chol}} 15 | function of R.} 16 | 17 | \item{n}{An integer >= 1 specifying the desired number of draws} 18 | } 19 | \value{ 20 | A matrix of size n x d containing n d-dimensional multivariate Normal draws (one per row) 21 | } 22 | \description{ 23 | Generate multivariate Normal draws. The function does not check 24 | the arguments, use at your own risk. 25 | } 26 | \examples{ 27 | Sigma <- diag(1, 5, 5) 28 | Sigma[1,2] <- Sigma[2,1] <- 0.3 29 | fast_rmvnorm_chol(2, rep(0, 5), chol(Sigma)) 30 | } 31 | -------------------------------------------------------------------------------- /man/get_blasso.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bayesianlasso.R 3 | \name{get_blasso} 4 | \alias{get_blasso} 5 | \title{Y and X need to be matrices, and lambda non-negative} 6 | \usage{ 7 | get_blasso(Y, X, lambda) 8 | } 9 | \description{ 10 | Y and X need to be matrices, and lambda non-negative 11 | } 12 | -------------------------------------------------------------------------------- /man/get_max_coupling.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_max_coupling.R 3 | \name{get_max_coupling} 4 | \alias{get_max_coupling} 5 | \title{Sample from maximal coupling of two distributions p and q} 6 | \usage{ 7 | get_max_coupling(rp, dp, rq, dq) 8 | } 9 | \arguments{ 10 | \item{rp}{A function taking n as an argument and returning n samples from the distribution p} 11 | 12 | \item{dp}{A function taking x as an argument and returning log-pdf of p evaluated at x} 13 | 14 | \item{rq}{A function taking n as an argument and returning n samples from the distribution q} 15 | 16 | \item{dq}{A function taking x as an argument and returning log-pdf of q evaluated at x} 17 | } 18 | \value{ 19 | Returns a list with 20 | 21 | \itemize{ 22 | \item "xy": the pair of samples \eqn{(x,y)} 23 | 24 | \item "identical": TRUE if \eqn{x = y}, FALSE otherwise 25 | } 26 | } 27 | \description{ 28 | Takes two univariate continuous distributions (specified by random number generator and log-pdf function), 29 | and returns a function to sample from a maximal coupling of these two distributions. 30 | } 31 | \examples{ 32 | mu1 <- 0; mu2 <- 1; sigma1 <- 0.5; sigma2 <- 1.2 33 | f <- get_max_coupling(function(n) rnorm(n, mu1, sigma1), 34 | function(x) dnorm(x, mu1, sigma1, log = TRUE), 35 | function(n) rnorm(n, mu2, sigma2), 36 | function(x) dnorm(x, mu2, sigma2, log = TRUE)) 37 | f() 38 | } 39 | -------------------------------------------------------------------------------- /man/get_mh_kernel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_mh_kernels.R 3 | \name{get_mh_kernels} 4 | \alias{get_mh_kernels} 5 | \title{Get random walk Metropolis-Hastings kernels} 6 | \usage{ 7 | get_mh_kernels(target, Sigma_proposal) 8 | } 9 | \arguments{ 10 | \item{target}{function taking a vector as input and returning target log-density evaluation} 11 | 12 | \item{Sigma_proposal}{covariance of the Normal random walk proposal} 13 | } 14 | \value{ 15 | A list containing the keys 16 | \code{single_kernel}, \code{coupled_kernel}. 17 | } 18 | \description{ 19 | This function takes a target (specified through its log-pdf) 20 | and a covariance matrix for a Normal random walk proposal, and returns a list containing the keys 21 | \code{single_kernel}, \code{coupled_kernel} corresponding to marginal 22 | and coupled MH kernels. 23 | 24 | The coupling is done by reflection-maximal coupling of the proposals, 25 | and common uniform variable for the accept/reject step. For reflection-maximal 26 | couplings, see \code{\link{rnorm_reflectionmax}} and \code{\link{rmvnorm_reflectionmax}}. 27 | 28 | The returned kernels can then be used in the functions \code{\link{sample_meetingtime}} or 29 | \code{\link{sample_coupled_chains}} or \code{\link{sample_unbiasedestimator}}. 30 | } 31 | -------------------------------------------------------------------------------- /man/get_variableselection.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/variableselection.R 3 | \name{get_variableselection} 4 | \alias{get_variableselection} 5 | \title{Y and X need to be matrices, and lambda non-negative} 6 | \usage{ 7 | get_variableselection(Y, X, g, kappa, s0, proportion_singleflip) 8 | } 9 | \description{ 10 | Y and X need to be matrices, and lambda non-negative 11 | } 12 | -------------------------------------------------------------------------------- /man/hello.Rd: -------------------------------------------------------------------------------- 1 | \name{hello} 2 | \alias{hello} 3 | \title{Hello, World!} 4 | \usage{ 5 | hello() 6 | } 7 | \description{ 8 | Prints 'Hello, world!'. 9 | } 10 | \examples{ 11 | hello() 12 | } 13 | -------------------------------------------------------------------------------- /man/histogram_c_chains.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/histogram_c_chains.R 3 | \name{histogram_c_chains} 4 | \alias{histogram_c_chains} 5 | \title{histogram_c_chains} 6 | \usage{ 7 | histogram_c_chains( 8 | c_chains, 9 | component, 10 | k, 11 | m, 12 | breaks = NULL, 13 | nclass = 30, 14 | dopar = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{c_chains}{A list of coupled chains, each as produced by \code{\link{sample_coupled_chains}}} 19 | 20 | \item{component}{An integer specifying which marginal to approximate} 21 | 22 | \item{k}{An integer (see \code{\link{H_bar}})} 23 | 24 | \item{m}{Another integer (see \code{\link{H_bar}})} 25 | 26 | \item{breaks}{A vector indicating how to bin the space (optional)} 27 | 28 | \item{nclass}{An integer specifying the number of bins to aim for, if "breaks" is not specified} 29 | 30 | \item{dopar}{A boolean indicating whether to parallelize the computation (requires doParallel and having registed parallel cores)} 31 | } 32 | \description{ 33 | Compute histogram approximations of marginal distributions based on coupled Markov chains 34 | } 35 | -------------------------------------------------------------------------------- /man/logistic_precomputation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/logistic_regression.R 3 | \name{logisticregression_precomputation} 4 | \alias{logisticregression_precomputation} 5 | \title{Precomputation to prepare for the Polya-Gamma sampler} 6 | \usage{ 7 | logisticregression_precomputation(Y, X, b, B) 8 | } 9 | \description{ 10 | This function takes the canonical elements defining the logistic regression 11 | problem (the vector of outcome Y, covariate matrix X, the prior mean b and the prior variance B), 12 | and precomputes some quantities repeatedly used in the Polya-Gamma sampler and variants of it. 13 | The precomputed quantities are returned in a list, which is then meant to be passed to the samplers. 14 | } 15 | -------------------------------------------------------------------------------- /man/pg_gibbs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/logistic_regression.R 3 | \name{pg_gibbs} 4 | \alias{pg_gibbs} 5 | \title{Polya-Gamma Gibbs sampler} 6 | \usage{ 7 | pg_gibbs(niterations, logistic_setting) 8 | } 9 | \value{ 10 | a matrix where each row corresponds to an iteration of the sampler, and contains 11 | the regression coefficient at that iteration. 12 | } 13 | \description{ 14 | This implements the sampler proposed in 15 | Nicholas G Polson, James G Scott, and Jesse Windle. Bayesian inference for logistic models using 16 | Polya–Gamma latent variables. Journal of the American statistical Association, 108(504):1339–1349, 2013. 17 | The arguments are: 18 | \itemize{ 19 | \item niterations: the number of desired MCMC iterations, 20 | \item logistic_setting: a list of precomputed quantities obtained via 'logistic_precomputation'. 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /man/rcpp_hello.Rd: -------------------------------------------------------------------------------- 1 | \name{rcpp_hello} 2 | \alias{rcpp_hello} 3 | \title{Hello, Rcpp!} 4 | \usage{ 5 | rcpp_hello() 6 | } 7 | \description{ 8 | Returns an \R \code{list} containing the character vector 9 | \code{c("foo", "bar")} and the numeric vector \code{c(0, 1)}. 10 | } 11 | \examples{ 12 | rcpp_hello() 13 | } 14 | -------------------------------------------------------------------------------- /man/rgamma_coupled.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gamma_couplings.R 3 | \name{rgamma_coupled} 4 | \alias{rgamma_coupled} 5 | \title{Sample from maximally coupled Gamma variables} 6 | \usage{ 7 | rgamma_coupled(alpha1, alpha2, beta1, beta2) 8 | } 9 | \arguments{ 10 | \item{alpha1}{First shape} 11 | 12 | \item{alpha2}{Second shape} 13 | 14 | \item{beta1}{First rate} 15 | 16 | \item{beta2}{Second rate} 17 | } 18 | \value{ 19 | A list with entry 'xy' for the pair of values, and boolean 'identical' indicating whether the two values 20 | are identical. 21 | } 22 | \description{ 23 | Draws a pair of variables, respectively Gamma(alpha1, beta1) and Gamma(alpha2, beta2) 24 | where the parametrization is that beta is the rate, i.e. the log-pdf of Gamma(alpha,beta) evaluated at x is 25 | \deqn{\alpha * log(\beta) - lgamma(\alpha) + (\alpha-1) * log(x) - \beta x} 26 | where \eqn{lgamma} stands for the logarithm of the Gamma function. 27 | } 28 | -------------------------------------------------------------------------------- /man/rinversegamma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/invgamma_couplings.R 3 | \name{rinversegamma} 4 | \alias{rinversegamma} 5 | \title{Sample from inverse gamma} 6 | \usage{ 7 | rinversegamma(n, alpha, beta) 8 | } 9 | \description{ 10 | with given parameters alpha, beta, with log-density given by 11 | alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x 12 | } 13 | -------------------------------------------------------------------------------- /man/rinversegamma_coupled.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/invgamma_couplings.R 3 | \name{rinversegamma_coupled} 4 | \alias{rinversegamma_coupled} 5 | \title{Sample from maximally coupled inverse gamma} 6 | \usage{ 7 | rinversegamma_coupled(alpha1, alpha2, beta1, beta2) 8 | } 9 | \description{ 10 | with given parameters alpha1, alpha2, beta1, beta2, 11 | where the parametrization is that the log-density of IG(alpha, beta) is 12 | alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x 13 | } 14 | -------------------------------------------------------------------------------- /man/rinvgaussian.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/invgaussian_couplings.R 3 | \name{rinvgaussian} 4 | \alias{rinvgaussian} 5 | \title{Sample from inverse Gaussian} 6 | \usage{ 7 | rinvgaussian(n, mu, lambda) 8 | } 9 | \value{ 10 | A vector of n draws, where n is the first argument. 11 | } 12 | \description{ 13 | Parametrized by mu, lambda, with log-density given by 14 | \deqn{0.5 * log(\lambda/(2*\pi)) - 1.5 * log(x) - \lambda * (x-\mu)^2 / (2 * \mu^2 * x)} 15 | 16 | The procedure goes as follows. 17 | 18 | \itemize{ 19 | \item Generate nu ~ Normal(0,1). 20 | \item Define y = nu^2. 21 | \item Define x = mu + mu^2 * y / (2 * lambda) - mu / (2 * lambda) * sqrt(4 * mu * lambda * y + mu^2 * y^2). 22 | \item Generate Z ~ Uniform(0,1). 23 | \item If z <= mu / (mu + x), output x, otherwise output mu^2 / x. 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /man/rinvgaussian_coupled.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/invgaussian_couplings.R 3 | \name{rinvgaussian_coupled} 4 | \alias{rinvgaussian_coupled} 5 | \title{Sample from maximally coupled inverse Gaussian} 6 | \usage{ 7 | rinvgaussian_coupled(mu1, mu2, lambda1, lambda2) 8 | } 9 | \value{ 10 | A pair of values in a vector of size two. 11 | } 12 | \description{ 13 | with parameters mu1, mu2, lambda1, lambda2; see \code{\link{rinvgaussian}}. 14 | } 15 | -------------------------------------------------------------------------------- /man/rmvnorm_max.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mvnorm_couplings.R 3 | \name{rmvnorm_max} 4 | \alias{rmvnorm_max} 5 | \title{Maximal coupling of two multivariate Normal distributions} 6 | \usage{ 7 | rmvnorm_max(mu1, mu2, Sigma1, Sigma2) 8 | } 9 | \arguments{ 10 | \item{mu1}{First mean} 11 | 12 | \item{mu2}{First mean} 13 | 14 | \item{Sigma1}{First covariance matrix} 15 | 16 | \item{Sigma2}{Second covariance matrix} 17 | } 18 | \value{ 19 | A list containing 'xy', a matrix with 2 columns (one for each draw), 20 | and a boolean indicator 'identical' indicating whether the two draws 21 | are identical. 22 | } 23 | \description{ 24 | Sample from maximal coupling of two multivariate Normal distributions, 25 | specified through their means and covariance matrices. See \code{\link{rmvnorm_max_chol}} 26 | for a version using Cholesky factors and their inverses. 27 | See \code{\link{rmvnorm_reflectionmax}} 28 | for a reflection-maximal coupling, in the case Sigma1=Sigma2. 29 | } 30 | \examples{ 31 | p <- 3 32 | mu1 <- rep(0, p) 33 | mu2 <- rep(1, p) 34 | Sigma1 <- diag(0.4, p, p) 35 | Sigma1[1,2] <- Sigma1[2,1] <- 0.2 36 | Sigma2 <- diag(1.4, p, p) 37 | Sigma2[1,2] <- Sigma2[2,1] <- -0.5 38 | rmvnorm_max(mu1, mu2, Sigma1, Sigma2) 39 | } 40 | -------------------------------------------------------------------------------- /man/rmvnorm_max_chol.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mvnorm_couplings.R 3 | \name{rmvnorm_max_chol} 4 | \alias{rmvnorm_max_chol} 5 | \title{Maximal coupling of two multivariate Normal distributions} 6 | \usage{ 7 | rmvnorm_max_chol( 8 | mu1, 9 | mu2, 10 | Cholesky1, 11 | Cholesky2, 12 | Cholesky_inverse1, 13 | Cholesky_inverse2 14 | ) 15 | } 16 | \arguments{ 17 | \item{mu1}{First mean} 18 | 19 | \item{mu2}{First mean} 20 | 21 | \item{Cholesky1}{First Cholesky factor, e.g. obtained with \code{\link[base]{chol}}} 22 | 23 | \item{Cholesky2}{Second Cholesky factor} 24 | 25 | \item{Cholesky_inverse1}{First inverse of Cholesky factor, e.g. obtained with \code{solve(chol(Sigma))}} 26 | 27 | \item{Cholesky_inverse2}{Second inverse of Cholesky factor} 28 | } 29 | \value{ 30 | A list containing 'xy', a matrix with 2 columns (one for each draw), 31 | and a boolean indicator 'identical' indicating whether the two draws 32 | are identical. 33 | } 34 | \description{ 35 | Sample from maximal coupling of two multivariate Normal distributions, 36 | specified through their means, the Cholesky factors of their covariance matrices, 37 | and the inverse of the Cholesky factors of the covariance matrices. 38 | } 39 | \examples{ 40 | p <- 3 41 | mu1 <- rep(0, p) 42 | mu2 <- rep(1, p) 43 | Sigma1 <- diag(0.4, p, p) 44 | Sigma1[1,2] <- Sigma1[2,1] <- 0.2 45 | Sigma2 <- diag(1.4, p, p) 46 | Sigma2[1,2] <- Sigma2[2,1] <- -0.5 47 | Sigma1_chol <- chol(Sigma1) 48 | Sigma2_chol <- chol(Sigma2) 49 | Sigma1_chol_inv <- solve(Sigma1_chol) 50 | Sigma2_chol_inv <- solve(Sigma2_chol) 51 | rmvnorm_max_chol(mu1, mu2, Sigma1_chol, Sigma2_chol, Sigma1_chol_inv, Sigma2_chol_inv) 52 | } 53 | -------------------------------------------------------------------------------- /man/rmvnorm_reflectionmax.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mvnorm_couplings.R 3 | \name{rmvnorm_reflectionmax} 4 | \alias{rmvnorm_reflectionmax} 5 | \title{Reflection-Maximal coupling of two multivariate Normal distributions} 6 | \usage{ 7 | rmvnorm_reflectionmax(mu1, mu2, Cholesky, Cholesky_inverse) 8 | } 9 | \arguments{ 10 | \item{mu1}{First mean} 11 | 12 | \item{mu2}{First mean} 13 | 14 | \item{Cholesky}{Cholesky factor, e.g. obtained with \code{\link[base]{chol}}} 15 | 16 | \item{Cholesky_inverse}{Inverse of Cholesky factor, e.g. obtained with \code{solve(chol(Sigma))}} 17 | } 18 | \value{ 19 | A list containing 'xy', a matrix with 2 columns (one for each draw), 20 | and a boolean indicator 'identical' indicating whether the two draws 21 | are identical. 22 | } 23 | \description{ 24 | Sample from reflection-maximal coupling of two multivariate Normal distributions, 25 | specified through their means, with the same covariance matrix, specified 26 | through its Cholesky factor and inverse of Cholesky factor. 27 | 28 | The idea is that a multivariate Normal is drawn around the first mean (mu1), 29 | and then reflected with respect to a hyperplane orthogonal to the direction between mu1 and mu2. 30 | 31 | For univariate Normal distribution, see \code{\link{rnorm_reflectionmax}}. 32 | } 33 | \examples{ 34 | p <- 3 35 | mu1 <- rep(0, p) 36 | mu2 <- rep(1, p) 37 | Sigma <- diag(0.4, p, p) 38 | Sigma[1,2] <- Sigma[2,1] <- 0.2 39 | Sigma_chol <- chol(Sigma) 40 | Sigma_chol_inv <- solve(Sigma_chol) 41 | rmvnorm_reflectionmax(mu1, mu2, Sigma_chol, Sigma_chol_inv) 42 | } 43 | -------------------------------------------------------------------------------- /man/rnorm_max_coupling.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rnorm_max_coupling.R 3 | \name{rnorm_max_coupling} 4 | \alias{rnorm_max_coupling} 5 | \title{Maximal coupling of two univariate Normal distributions} 6 | \usage{ 7 | rnorm_max_coupling(mu1, mu2, sigma1, sigma2) 8 | } 9 | \arguments{ 10 | \item{mu1}{First mean} 11 | 12 | \item{mu2}{Second mean} 13 | 14 | \item{sigma1}{First mean} 15 | 16 | \item{sigma2}{Second mean} 17 | } 18 | \value{ 19 | Returns a list with 20 | 21 | \itemize{ 22 | \item "xy": the pair of samples \eqn{(x,y)} 23 | 24 | \item "identical": TRUE if \eqn{x = y}, FALSE otherwise 25 | } 26 | } 27 | \description{ 28 | Sample from maximal coupling of two univariate Normal distributions, 29 | specified through their means and standard deviations. See \code{\link{rmvnorm_max}} for a multivariate version. 30 | } 31 | -------------------------------------------------------------------------------- /man/rnorm_reflectionmax.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rnorm_reflectionmax.R 3 | \name{rnorm_reflectionmax} 4 | \alias{rnorm_reflectionmax} 5 | \title{Reflection-maximal coupling of two univariate Normal distributions} 6 | \usage{ 7 | rnorm_reflectionmax(mu1, mu2, sigma) 8 | } 9 | \arguments{ 10 | \item{mu1}{First mean} 11 | 12 | \item{mu2}{Second mean} 13 | 14 | \item{sigma}{Common standard deviation} 15 | } 16 | \value{ 17 | Returns a list with 18 | \itemize{ 19 | 20 | \item "xy": the pair of samples \eqn{(x,y)} 21 | 22 | \item "identical": TRUE if \eqn{x = y}, FALSE otherwise 23 | } 24 | } 25 | \description{ 26 | Sample from reflection-maximal coupling of two univariate Normal distributions, 27 | specified through their means, with common standard deviation. 28 | See \code{\link{rmvnorm_reflectionmax}} for a multivariate version. 29 | } 30 | -------------------------------------------------------------------------------- /man/sample_coupled_chains.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/coupled_chains.R 3 | \name{sample_coupled_chains} 4 | \alias{sample_coupled_chains} 5 | \title{Sample coupled Markov chains} 6 | \usage{ 7 | sample_coupled_chains( 8 | single_kernel, 9 | coupled_kernel, 10 | rinit, 11 | m = 1, 12 | lag = 1, 13 | max_iterations = Inf, 14 | preallocate = 10 15 | ) 16 | } 17 | \arguments{ 18 | \item{single_kernel}{A list taking a state and returning a state, performing one step of a Markov kernel} 19 | 20 | \item{coupled_kernel}{A list taking two states and returning two states, performing one step of a coupled Markov kernel; 21 | it also returns a boolean "identical" indicating whether the two states are identical.} 22 | 23 | \item{rinit}{A list representing the initial state of the chain, that can be given to 'single_kernel'} 24 | 25 | \item{m}{A time horizon: the chains are sampled until the maximum between m and the meeting time} 26 | 27 | \item{lag}{A time lag, equal to one by default} 28 | 29 | \item{max_iterations}{A maximum number of iterations, at which to interrup the while loop; Inf by default} 30 | 31 | \item{preallocate}{A number of anticipated iterations, used to pre-allocate memory; 10 by default} 32 | } 33 | \value{ 34 | A list with 35 | \itemize{ 36 | 37 | \item samples1: the first chain, of length max(m, tau) 38 | 39 | \item samples2: the second chain, of length max(m, tau) - lag 40 | 41 | \item meetingtime: the meeting time; equal to Inf if while loop was interrupted 42 | 43 | \item iteration: final iteration; could be equal to m, to meetingtime, or to max_iterations 44 | 45 | \item elapsedtime: elapsed wall-clock time, in seconds 46 | 47 | \item cost: computing cost in terms of calls to Markov kernels (counting coupled kernel as twice the cost) 48 | } 49 | } 50 | \description{ 51 | Sample two Markov chains, each following 'single_kernel' marginally, 52 | and 'coupled_kernel' jointly, until min(max(tau, m), max_iterations), where tau 53 | is the first time the two chains meet (the "meeting time"). 54 | 55 | Or more precisely, they meet with a delay of lag, i.e. X_t = Y_{t-lag}, and lag is one by default. 56 | 57 | Once the coupled chains are obtained, unbiased estimators can be computed for arbitrary test 58 | functions via the function \code{\link{H_bar}}. 59 | 60 | If you're only interested in sampling meeting times, see \code{\link{sample_meetingtime}}. 61 | } 62 | -------------------------------------------------------------------------------- /man/sample_meetingtime.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/meetingtime.R 3 | \name{sample_meetingtime} 4 | \alias{sample_meetingtime} 5 | \title{Sample coupled Markov chains until meeting} 6 | \usage{ 7 | sample_meetingtime( 8 | single_kernel, 9 | coupled_kernel, 10 | rinit, 11 | lag = 1, 12 | max_iterations = Inf 13 | ) 14 | } 15 | \arguments{ 16 | \item{single_kernel}{A list taking a state and returning a state, performing one step of a Markov kernel} 17 | 18 | \item{coupled_kernel}{A list taking two states and returning two states, performing one step of a coupled Markov kernel; 19 | it also returns a boolean "identical" indicating whether the two states are identical.} 20 | 21 | \item{rinit}{A list representing the initial state of the chain, that can be given to 'single_kernel'} 22 | 23 | \item{lag}{A time lag, equal to one by default} 24 | 25 | \item{max_iterations}{A maximum number of iterations, at which to interrup the while loop; Inf by default} 26 | } 27 | \value{ 28 | A list with 29 | \itemize{ 30 | 31 | \item meetingtime: the meeting time; equal to Inf if while loop was interrupted 32 | 33 | \item elapsedtime: elapsed wall-clock time, in seconds 34 | } 35 | } 36 | \description{ 37 | Sample two Markov chains, each following 'single_kernel' marginally, 38 | until they meet, and report the meeting time, as well as the elapsed wall-clock time in seconds. 39 | 40 | This function does not record the trajectories of the chains, with the goal of being memory-light. 41 | To record these trajectories, see \code{\link{sample_coupled_chains}}. To directly 42 | compute unbiased estimators on the fly, see \code{\link{sample_unbiasedestimator}}. 43 | } 44 | -------------------------------------------------------------------------------- /man/sample_unbiasedestimator.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/unbiasedestimator.R 3 | \name{sample_unbiasedestimator} 4 | \alias{sample_unbiasedestimator} 5 | \title{Unbiased MCMC estimators} 6 | \usage{ 7 | sample_unbiasedestimator( 8 | single_kernel, 9 | coupled_kernel, 10 | rinit, 11 | h = function(x) x, 12 | k = 0, 13 | m = 1, 14 | lag = 1, 15 | max_iterations = Inf 16 | ) 17 | } 18 | \arguments{ 19 | \item{single_kernel}{A list taking a state and returning a state, performing one step of a Markov kernel} 20 | 21 | \item{coupled_kernel}{A list taking two states and returning two states, performing one step of a coupled Markov kernel; 22 | it also returns a boolean "identical" indicating whether the two states are identical.} 23 | 24 | \item{rinit}{A list representing the initial state of the chain, that can be given to 'single_kernel'} 25 | 26 | \item{h}{A test function of interest, which should take a chain state ("chain_state" entry of the output of "rinit", for instance) 27 | and return a numeric vector} 28 | 29 | \item{k}{An integer at which to start computing the unbiased estimator} 30 | 31 | \item{m}{A time horizon: the chains are sampled until the maximum between m and the meeting time} 32 | 33 | \item{lag}{A time lag, equal to one by default} 34 | 35 | \item{max_iterations}{A maximum number of iterations, at which to interrup the while loop; Inf by default} 36 | } 37 | \value{ 38 | A list with 39 | \itemize{ 40 | 41 | \item mcmcestimator: an MCMC estimator computed on the first chain, from step k to m 42 | 43 | \item correction: the bias correction term 44 | 45 | \item uestimator: unbiased estimator, equal to the sum of mcmcestimator and correction 46 | 47 | \item meetingtime: the meeting time; equal to Inf if while loop was interrupted 48 | 49 | \item iteration: final iteration; could be equal to m, to meetingtime, or to max_iterations 50 | 51 | \item elapsedtime: elapsed wall-clock time, in seconds 52 | 53 | \item cost: computing cost in terms of calls to Markov kernels (counting coupled kernel as twice the cost) 54 | } 55 | } 56 | \description{ 57 | Sample two Markov chains, each following 'single_kernel' marginally, 58 | and 'coupled_kernel' jointly, until min(max(tau, m), max_iterations), where tau 59 | is the first time the two chains meet (the "meeting time"). An unbiased estimator 60 | of the expectation of a test function h is computed on the fly, between step k and step m, and returned. 61 | 62 | Allows for an arbitrary lag, i.e. X_t = Y_{t-lag}, and lag is one by default. 63 | 64 | Compared to \code{\link{sample_coupled_chains}} this function requires specifying 65 | the test function, but does not record the trajectories, and thus is memory-light. 66 | 67 | If you're only interested in sampling meeting times, see \code{\link{sample_meetingtime}}. 68 | } 69 | -------------------------------------------------------------------------------- /man/setmytheme.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{setmytheme} 4 | \alias{setmytheme} 5 | \title{Customize graphical settings} 6 | \usage{ 7 | setmytheme() 8 | } 9 | \description{ 10 | This function customizes the theme used by ggplot2. Loads the packages ggplot2 and 11 | ggthemes. 12 | } 13 | -------------------------------------------------------------------------------- /man/unbiasedmcmc-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/unbiasedmcmc-package.R 3 | \docType{package} 4 | \name{unbiasedmcmc-package} 5 | \alias{unbiasedmcmc-package} 6 | \alias{unbiasedmcmc} 7 | \title{unbiasedmcmc} 8 | \description{ 9 | Unbiased MCMC estimators with couplings 10 | } 11 | \details{ 12 | This package contains scripts to reproduce the figures of the 13 | paper "Unbiased Markov chain Monte Carlo with couplings" by 14 | Pierre E. Jacob, John O'Leary, Yves F Atchade, available on arXiv at 15 | https://arxiv.org/abs/1708.03625 16 | } 17 | \author{ 18 | Pierre E. Jacob , John O'Leary, Yves F. Atchade 19 | } 20 | \keyword{package} 21 | -------------------------------------------------------------------------------- /src/PolyaGamma.h: -------------------------------------------------------------------------------- 1 | // -*- mode: c++; -*- 2 | 3 | //////////////////////////////////////////////////////////////////////////////// 4 | 5 | // Copyright 2014 Nick Polson, James Scott, and Jesse Windle. 6 | 7 | // This file is part of BayesLogit. 8 | 9 | // BayesLogit is free software: you can redistribute it and/or modify it under 10 | // the terms of the GNU General Public License as published by the Free Software 11 | // Foundation, either version 3 of the License, or any later version. 12 | 13 | // BayesLogit is distributed in the hope that it will be useful, but WITHOUT ANY 14 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 15 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 | 17 | // You should have received a copy of the GNU General Public License along with 18 | // BayesLogit. If not, see . 19 | 20 | //////////////////////////////////////////////////////////////////////////////// 21 | 22 | // See for implementation details. 23 | 24 | #ifndef __POLYAGAMMA__ 25 | #define __POLYAGAMMA__ 26 | 27 | #include "RNG.h" 28 | #include 29 | #include 30 | 31 | using std::vector; 32 | 33 | // The numerical accuracy of __PI will affect your distribution. 34 | const double __PI = 3.141592653589793238462643383279502884197; 35 | const double HALFPISQ = 0.5 * __PI * __PI; 36 | const double FOURPISQ = 4 * __PI * __PI; 37 | const double __TRUNC = 0.64; 38 | const double __TRUNC_RECIP = 1.0 / __TRUNC; 39 | 40 | class PolyaGamma 41 | { 42 | 43 | // For sum of Gammas. 44 | int T; 45 | vector bvec; 46 | 47 | public: 48 | 49 | // Constructors. 50 | PolyaGamma(int trunc = 200); 51 | 52 | // Draw. 53 | // double draw(double n, double z, RNG& r); 54 | double draw(int n, double z, RNG& r); 55 | double draw_sum_of_gammas(double n, double z, RNG& r); 56 | double draw_like_devroye(double z, RNG& r); 57 | 58 | //void draw(MF x, double a, double z, RNG& r); 59 | //void draw(MF x, MF a, MF z, RNG& r); 60 | 61 | // Utility. 62 | void set_trunc(int trunc); 63 | 64 | // Helper. 65 | double a(int n, double x); 66 | double pigauss(double x, double Z); 67 | double mass_texpon(double Z); 68 | double rtigauss(double Z, RNG& r); 69 | 70 | static double jj_m1(double b, double z); 71 | static double jj_m2(double b, double z); 72 | static double pg_m1(double b, double z); 73 | static double pg_m2(double b, double z); 74 | 75 | }; 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /src/RRNG.cpp: -------------------------------------------------------------------------------- 1 | #include "RRNG.h" 2 | #include 3 | 4 | using std::pow; 5 | using std::fabs; 6 | using std::sqrt; 7 | using std::log; 8 | using std::exp; 9 | 10 | ////////////////////////////////////////////////////////////////////// 11 | // R Random Variates // 12 | ////////////////////////////////////////////////////////////////////// 13 | 14 | //-------------------------------------------------------------------- 15 | // Distributions with one parameter. 16 | 17 | #define ONEP(NAME, CALL, P1) \ 18 | double BasicRNG::NAME(double P1) \ 19 | { \ 20 | return CALL (P1); \ 21 | } \ 22 | 23 | ONEP(expon_mean, rexp , mean) 24 | ONEP(chisq , rchisq, df ) 25 | 26 | #undef ONEP 27 | 28 | //-------------------------------------------------------------------- 29 | // Distributions with two parameters. 30 | 31 | #define TWOP(NAME, CALL, P1, P2) \ 32 | double BasicRNG::NAME(double P1, double P2) \ 33 | { \ 34 | return CALL (P1, P2); \ 35 | } \ 36 | 37 | TWOP(gamma_scale, rgamma, shape, scale) 38 | TWOP(norm , rnorm , mean , sd ) 39 | TWOP(flat , runif , a , b ) 40 | TWOP(beta , rbeta , a , b ) 41 | 42 | // x ~ Gamma(shape=a, scale=b) 43 | // x ~ x^{a-1} exp(x / b). 44 | 45 | #undef TWOP 46 | 47 | //-------------------------------------------------------------------- 48 | // Uniform // 49 | 50 | double BasicRNG::unif() 51 | { 52 | return unif_rand(); 53 | } // unif 54 | 55 | //-------------------------------------------------------------------- 56 | // Exponential // 57 | double BasicRNG::expon_rate(double rate) 58 | { 59 | return expon_mean(1.0 / rate); 60 | } 61 | 62 | //-------------------------------------------------------------------- 63 | // Normal // 64 | 65 | double BasicRNG::norm(double sd) 66 | { 67 | return rnorm(0, sd); 68 | } // norm 69 | 70 | //-------------------------------------------------------------------- 71 | // gamma_rate // 72 | 73 | double BasicRNG::gamma_rate(double shape, double rate) 74 | { 75 | return gamma_scale(shape, 1.0 / rate); 76 | } 77 | 78 | //-------------------------------------------------------------------- 79 | // Inv-Gamma // 80 | 81 | // a = shape, b = scale 82 | // x ~ IG(shape, scale) ~ x^{-a-1} exp(b / x). 83 | // => 1/x ~ Ga(shape, scale*=1/scale). 84 | 85 | double BasicRNG::igamma(double shape, double scale) 86 | { 87 | return 1.0/rgamma(shape, 1.0 / scale); 88 | } // igamma 89 | 90 | //////////////////////////////////////////////////////////////////////////////// 91 | 92 | double BasicRNG::p_norm(double x, int use_log) 93 | { 94 | return pnorm(x, 0.0, 1.0, 1, use_log); 95 | } 96 | 97 | double BasicRNG::p_gamma_rate(double x, double shape, double rate, int use_log) 98 | { 99 | double scale = 1.0 / rate; 100 | return pgamma(x, shape, scale, 1, use_log); 101 | } 102 | 103 | //////////////////////////////////////////////////////////////////////////////// 104 | 105 | double BasicRNG::Gamma (double x, int use_log) 106 | { 107 | double y = lgammafn(x); 108 | if (!use_log) y = exp(y); 109 | return y; 110 | } 111 | 112 | //////////////////////////////////////////////////////////////////////////////// 113 | 114 | double BasicRNG::d_beta(double x, double a, double b) 115 | { 116 | return dbeta(x, a, b, false); 117 | } 118 | 119 | //////////////////////////////////////////////////////////////////////////////// 120 | 121 | 122 | -------------------------------------------------------------------------------- /src/RRNG.h: -------------------------------------------------------------------------------- 1 | // Copyright 2012 Jesse Windle - jesse.windle@gmail.com 2 | 3 | // This program is free software: you can redistribute it and/or 4 | // modify it under the terms of the GNU General Public License as 5 | // published by the Free Software Foundation, either version 3 of the 6 | // License, or (at your option) any later version. 7 | 8 | // This program is distributed in the hope that it will be useful, but 9 | // WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | // General Public License for more details. 12 | 13 | // You should have received a copy of the GNU General Public License 14 | // along with this program. If not, see 15 | // . 16 | 17 | ////////////////////////////////////////////////////////////////////// 18 | 19 | // YOU MUST ALWAYS CALL GetRNGSeed() and PutRNGSeed() WHEN USING THESE FUNCTIONS!!! 20 | 21 | ////////////////////////////////////////////////////////////////////// 22 | 23 | #ifndef __BASICRNG__ 24 | #define __BASICRNG__ 25 | 26 | #include "R.h" 27 | #include "Rmath.h" 28 | // #include "Matrix.h" 29 | 30 | class BasicRNG { 31 | 32 | public: 33 | 34 | // Random variates. 35 | double unif (); // Uniform 36 | double expon_mean(double mean); // Exponential 37 | double expon_rate(double rate); // Exponential 38 | double chisq (double df); // Chisq 39 | double norm (double sd); // Normal 40 | double norm (double mean , double sd); // Normal 41 | double gamma_scale (double shape, double scale); // Gamma_Scale 42 | double gamma_rate (double shape, double rate); // Gamma_Rate 43 | double igamma(double shape, double scale); // Inv-Gamma 44 | double flat (double a=0 , double b=1 ); // Flat 45 | double beta (double a=1.0, double b=1.0); // Beta 46 | 47 | int bern (double p); // Bernoulli 48 | 49 | // CDF 50 | static double p_norm (double x, int use_log=0); 51 | static double p_gamma_rate(double x, double shape, double rate, int use_log=0); 52 | 53 | // Density 54 | static double d_beta(double x, double a, double b); 55 | 56 | // Utility 57 | static double Gamma (double x, int use_log=0); 58 | 59 | }; // BasicRNG 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /src/blassoutil.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mvnorm.h" 3 | using namespace Rcpp; 4 | using namespace std; 5 | using namespace Eigen; 6 | 7 | 8 | // D_tau_inv <- diag(1/tau2, p, p) 9 | // A <- XtX + D_tau_inv 10 | // A_inv <- solve(A) 11 | // beta <- t(fast_rmvnorm(1, (A_inv %*% XtY)[,1], sigma2 * A_inv)) 12 | // norm <- sum((Y - X %*% beta)^2) 13 | // betaDbeta <- sum(beta^2 / tau2) 14 | // 15 | // [[Rcpp::export]] 16 | List blassoconditional(const Eigen::VectorXd & Y, const Eigen::MatrixXd & X, 17 | const Eigen::VectorXd & XtY, const Eigen::MatrixXd & XtX, 18 | const NumericVector tau2, const double sigma2){ 19 | int p = X.cols(); 20 | 21 | Eigen::MatrixXd D_tau_inv(p, p); 22 | D_tau_inv.setIdentity(); 23 | for (int i=0; i < p; i++){ 24 | D_tau_inv(i,i) /= tau2(i); 25 | } 26 | Eigen::MatrixXd A_inv(p, p); 27 | A_inv = (XtX + D_tau_inv).inverse(); 28 | NumericVector mean(wrap(A_inv * XtY)); 29 | NumericMatrix Sigma(wrap(A_inv.array() * sigma2)); 30 | NumericMatrix beta = fast_rmvnorm_(1, mean, Sigma); 31 | Eigen::VectorXd beta_eigen = as(beta); 32 | double norm = (Y - X * beta_eigen).array().square().sum(); 33 | double betaDbeta = 0; 34 | for (int i=0; i < p; i++){ 35 | betaDbeta += beta_eigen(i) * beta_eigen(i) / tau2(i); 36 | } 37 | return List::create(Named("beta") = beta.row(0), Named("norm") = norm, 38 | Named("betaDbeta") = betaDbeta); 39 | } 40 | 41 | 42 | // [[Rcpp::export]] 43 | List blassoconditional_coupled(const Eigen::VectorXd & Y, const Eigen::MatrixXd & X, 44 | const Eigen::VectorXd & XtY, const Eigen::MatrixXd & XtX, 45 | const NumericVector & tau21, const NumericVector & tau22, const double sigma21, const double sigma22){ 46 | int p = X.cols(); 47 | Eigen::MatrixXd D_tau_inv1(p, p); 48 | D_tau_inv1.setIdentity(); 49 | for (int i=0; i < p; i++){ 50 | D_tau_inv1(i,i) /= tau21(i); 51 | } 52 | Eigen::MatrixXd A_inv1(p, p); 53 | A_inv1 = (XtX + D_tau_inv1).inverse(); 54 | NumericVector mean1(wrap(A_inv1 * XtY)); 55 | NumericMatrix Sigma1(wrap(A_inv1.array() * sigma21)); 56 | Eigen::MatrixXd D_tau_inv2(p, p); 57 | D_tau_inv2.setIdentity(); 58 | for (int i=0; i < p; i++){ 59 | D_tau_inv2(i,i) /= tau22(i); 60 | } 61 | Eigen::MatrixXd A_inv2(p, p); 62 | A_inv2 = (XtX + D_tau_inv2).inverse(); 63 | NumericVector mean2(wrap(A_inv2 * XtY)); 64 | NumericMatrix Sigma2(wrap(A_inv2.array() * sigma22)); 65 | 66 | 67 | List betas_coupled = rmvnorm_max_coupling_(mean1, mean2, Sigma1, Sigma2); 68 | NumericMatrix betas = betas_coupled["xy"]; 69 | // NumericMatrix beta = rmvnorm(1, mean, Sigma); 70 | Eigen::VectorXd beta_eigen1(p); 71 | Eigen::VectorXd beta_eigen2(p); 72 | for (int i=0; i < p; i++){ 73 | beta_eigen1(i) = betas(i,0); 74 | beta_eigen2(i) = betas(i,1); 75 | } 76 | // = as(betas.col(1)); 77 | // = as(betas.col(0)); 78 | double norm1 = (Y - X * beta_eigen1).array().square().sum(); 79 | double norm2 = (Y - X * beta_eigen2).array().square().sum(); 80 | double betaDbeta1 = 0.; 81 | double betaDbeta2 = 0.; 82 | for (int i=0; i < p; i++){ 83 | betaDbeta1 += beta_eigen1(i) * beta_eigen1(i) / tau21(i); 84 | betaDbeta2 += beta_eigen2(i) * beta_eigen2(i) / tau22(i); 85 | } 86 | return List::create(Named("beta1") = betas(_,0), Named("beta2") = betas(_,1), 87 | Named("norm1") = norm1, Named("norm2") = norm2, 88 | Named("betaDbeta1") = betaDbeta1, Named("betaDbeta2") = betaDbeta2); 89 | } 90 | -------------------------------------------------------------------------------- /src/c_chains_to_measure.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | using namespace Rcpp; 4 | 5 | // This function takes coupled chains, containing chains X (samples1) and Y (samples2) 6 | // and returns a signed empirical measure with atoms and weights. 7 | // lag is an integer >= 1. 8 | 9 | // The chains are generated as follows: 10 | // X_0 ~ pi_0, then X_t | X_{t-1} ~ P(X_{t-1}, dot) for t=1,...,lag, 11 | // Y_0 ~ pi_0. 12 | 13 | // Then they are coupled, to produce 14 | // (X_{t},Y_{t-lag})|(X_{t-1},Y_{t-lag-1}) ~ bar{P}((X_{t-1},Y_{t-lag-1}), dot) 15 | // for t = lag+1,...,max(m,tau) 16 | // The meeting time is denoted by tau; it is the smallest t such that X_{t} = Y_{t-lag}. 17 | 18 | // Thus upon completion, we have 19 | // X_0,..., X_{max(m,tau)} (of length 1+max(m, tau)) 20 | // Y_0,..., Y_{max(m,tau)-lag} (of length 1+max(m, tau) - lag) 21 | 22 | // The signed measure approximation, for a general lag, is given by 23 | // (MCMC): (m-k+1)^{-1} \sum_{t=k}^m delta_{X_t} 24 | // (bias correction): (m-k+1)^{-1} \sum_{t=k+lag}^{tau-1} (floor((t-k) / lag) - ceil(max(lag, t-m)/lag) + 1) {delta_{X_t} - delta_{Y_{t-lag}}} 25 | 26 | // Thus total number of atoms is 27 | // m-k+1 from the MCMC approximation 28 | // + 2 * max(0, (tau-1) - (k+lag) + 1) from the bias correction term 29 | // the max above simplifies to max(0, tau - (k + lag)) 30 | 31 | // Of course some of the atoms are identical but we can deal with that separately, 32 | // by pruning the generated data set later. 33 | 34 | // [[Rcpp::export]] 35 | List c_chains_to_measure_as_list_(const List & c_chains, int k, int m){ 36 | int meetingtime = c_chains["meetingtime"]; 37 | NumericMatrix samples1 = c_chains["samples1"]; 38 | NumericMatrix samples2 = c_chains["samples2"]; 39 | int lag = samples1.nrow() - samples2.nrow(); 40 | int size; 41 | if (((meetingtime-1) - (k+lag) + 1) > 0){ 42 | size = (m - k + 1) + 2 * ((meetingtime-1) - (k+lag) + 1); 43 | } else { 44 | size = (m - k + 1); 45 | } 46 | // equal weight for each atom in the MCMC part 47 | double eqweight = 1. / (double) (m - k + 1); 48 | // number of columns in samples1 49 | // (should be equal to number of columns in samples2) 50 | int dimension = samples1.cols(); 51 | // matrix of all atoms 52 | NumericMatrix atoms(size, dimension); 53 | // vector of all weights 54 | NumericVector weights(size); 55 | // vector indicating which atoms are part of the MCMC measure and which are part of the bias correction 56 | // TRUE refers to MCMC, FALSE to bias correction 57 | LogicalVector whichpart(size); 58 | // fill in MCMC part 59 | for (int i = 0; i < (m-k+1); i ++){ 60 | atoms(i,_) = samples1(k+i,_); 61 | weights(i) = eqweight; 62 | whichpart(i) = true; 63 | } 64 | // index keeps track of which row to fill in "atoms" 65 | int index = m-k+1; 66 | // now bias correction part 67 | if (((meetingtime-1) - (k+lag) + 1) > 0){ 68 | for (int time = k+lag; time <= meetingtime-1; time ++){ 69 | atoms(index,_) = samples1(time,_); 70 | atoms(index+1,_) = samples2(time-lag,_); 71 | // weights(index) = ceil(((double) time - k)/((double) lag)); 72 | //if ((m - k + 1) < weights(index)){ 73 | // weights(index) = m - k + 1; 74 | //} 75 | weights(index) = floor(((double) time - k) / (double) lag) - ceil(std::max(lag, (double) time - m)/ (double) lag) + 1.; 76 | weights(index) = weights(index) * eqweight; 77 | weights(index+1) = - weights(index); 78 | whichpart(index) = false; 79 | whichpart(index+1) = false; 80 | index += 2; 81 | } 82 | } 83 | return List::create(Named("atoms") = atoms, Named("weights") = weights, Named("MCMC") = whichpart); 84 | } 85 | 86 | -------------------------------------------------------------------------------- /src/estimator_bin.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace Rcpp; 5 | using namespace std; 6 | 7 | // given c_chains, a list produced by the function 'coupled_chains', 8 | // returns estimator of probability of component being between lower and upper 9 | // [[Rcpp::export]] 10 | double estimator_bin_(List c_chains, int component, double lower, double upper, int k, int m, int lag){ 11 | int meetingtime = c_chains["meetingtime"]; 12 | NumericMatrix samples1 = c_chains["samples1"]; 13 | NumericMatrix samples2 = c_chains["samples2"]; 14 | double estimator = 0; 15 | for (int isample = k; isample <= m; isample ++){ 16 | if (samples1(isample,component-1) > lower && samples1(isample,component-1) < upper){ 17 | estimator += 1; 18 | } 19 | } 20 | // next, add bias correction terms 21 | if (meetingtime > k + lag){ 22 | double coefficient = 0.; 23 | double increment = 0.; 24 | for (int time = k+lag; time <= meetingtime-1; time ++){ 25 | increment = 0.; 26 | coefficient = floor(((double) time - k) / (double) lag) - ceil(std::max(lag, (double) time - m)/ (double) lag) + 1.; 27 | // compute min between m - k + 1 and ceiling ((t - k) / L) 28 | // coefficient = ceil(((double) time - k)/((double) lag)); 29 | // if ((m - k + 1) < coefficient){ 30 | // coefficient = m - k + 1.; 31 | // } 32 | if (samples1(time,component-1) > lower && samples1(time,component-1) < upper){ 33 | increment += coefficient; 34 | } 35 | if (samples2(time-lag,component-1) > lower && samples2(time-lag,component-1) < upper){ 36 | increment -= coefficient; 37 | } 38 | estimator += increment; 39 | } 40 | } 41 | return estimator / (m - k + 1.); 42 | } 43 | -------------------------------------------------------------------------------- /src/inversegaussian.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace Rcpp; 4 | 5 | // [[Rcpp::export]] 6 | NumericVector rinvgaussian_c(int n, double mu, double lambda){ 7 | RNGScope scope; 8 | NumericVector results(n); 9 | GetRNGstate(); 10 | NumericVector nu = rnorm(n); 11 | NumericVector z = runif(n); 12 | PutRNGstate(); 13 | NumericVector x = nu * nu; 14 | x = mu + mu*mu * x / (2. * lambda) - mu / (2. * lambda) * sqrt(4. * mu * lambda * x + mu*mu * x*x); 15 | for (int i = 0; i < n; i++){ 16 | if (z(i) <= mu / (mu + x(i))){ 17 | results(i) = x(i); 18 | } else { 19 | results(i) = mu * mu / x(i); 20 | } 21 | } 22 | return results; 23 | } 24 | 25 | double dinvgaussian_c(double x, double mu, double lambda){ 26 | return 0.5 * log(lambda/6.283185) - 1.5 * log(x) - lambda * (x - mu) * (x - mu) / (2 * mu * mu * x); 27 | } 28 | 29 | 30 | // [[Rcpp::export]] 31 | NumericVector rinvgaussian_coupled_c(double mu1, double mu2, double lambda1, double lambda2){ 32 | RNGScope scope; 33 | NumericVector results(2); 34 | NumericVector x = rinvgaussian_c(1, mu1, lambda1); 35 | GetRNGstate(); 36 | NumericVector u = runif(1); 37 | PutRNGstate(); 38 | if (x(0) < 1e-20){ 39 | x(0) = 1e-20; 40 | } 41 | if (dinvgaussian_c(x(0), mu1, lambda1) + log(u(0)) < dinvgaussian_c(x(0), mu2, lambda2)){ 42 | results(0) = x(0); 43 | results(1) = x(0); 44 | } else { 45 | bool reject = true; 46 | NumericVector y(1); 47 | while (reject){ 48 | y = rinvgaussian_c(1, mu2, lambda2); 49 | if (y(0) < 1e-20){ 50 | y(0) = 1e-20; 51 | } 52 | GetRNGstate(); 53 | u = runif(1); 54 | PutRNGstate(); 55 | reject = (dinvgaussian_c(y(0), mu2, lambda2) + log(u(0)) < dinvgaussian_c(y(0), mu1, lambda1)); 56 | } 57 | results(0) = x(0); 58 | results(1) = y(0); 59 | } 60 | 61 | return results; 62 | } 63 | 64 | -------------------------------------------------------------------------------- /src/ising.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | using namespace Rcpp; 4 | 5 | // given a square grid x = {x_i}, e.g. 32x32 or something 6 | // with each x_i in {-1,+1} 7 | // the following computes sum_{i~j} x_i x_j 8 | // where i~j denotes the neighboring relation; here we use perodic boundary condition 9 | // so (0,0) is neighbor with (32,0) in a 32x32 grid, for instance 10 | // [[Rcpp::export]] 11 | int ising_sum_(const IntegerMatrix & state){ 12 | int size = state.rows(); 13 | int s = 0; 14 | // below, (i1,j1) will index a neighbor of (i,j) 15 | int i1; 16 | int j1; 17 | for (int i = 0; i < size; i++){ 18 | if (i == (size - 1)){ 19 | i1 = 0; 20 | } else { 21 | i1 = i+1; 22 | } 23 | for (int j = 0; j < size; j++){ 24 | if (j == (size - 1)){ 25 | j1=0; 26 | } else { 27 | j1 = j+1; 28 | } 29 | // sums over two neighbors ("top right" neighbors 30 | // of current location, if counting from bottom left corner) 31 | s += state(i,j) * (state(i,j1) + state(i1,j)); 32 | } 33 | } 34 | return s; 35 | } 36 | 37 | // given a square grid 'state' 38 | // and a vector proba_beta of values between 0 and 1 39 | // corresponding to probabilities of drawing a +1 given the neighbors sum to 40 | // -4, -2, 0, 2, 4 (respectively, so proba_beta should contain 5 values) 41 | // then the following functions perform a sweep over all locations in the grid and performs a flip 42 | // [[Rcpp::export]] 43 | IntegerMatrix ising_gibbs_sweep_(IntegerMatrix state, NumericVector proba_beta){ 44 | RNGScope scope; 45 | int size = state.rows(); 46 | int s; 47 | int itop, ibottom, jright, jleft; 48 | for (int i = 0; i < size; i++){ 49 | for (int j = 0; j < size; j++){ 50 | s = 0; 51 | itop = (i+1) % size; 52 | ibottom = ((i + size - 1) % size); 53 | jright = (j+1) % size; 54 | jleft = (j + size - 1) % size; 55 | s += state(itop, j) + state(ibottom, j) + state(i, jright) + state(i, jleft); 56 | GetRNGstate(); 57 | state(i,j) = 2*((runif(1))(0) < proba_beta((s+4)/2)) - 1; 58 | PutRNGstate(); 59 | } 60 | } 61 | return state; 62 | } 63 | 64 | // coupled version of single-site Gibbs update, 65 | // where the strategy is to maximally couple each conditional update 66 | // [[Rcpp::export]] 67 | List ising_coupled_gibbs_sweep_(IntegerMatrix state1, IntegerMatrix state2, NumericVector proba_beta){ 68 | RNGScope scope; 69 | int size = state1.rows(); 70 | int s1; 71 | int s2; 72 | int itop,ibottom,jright,jleft; 73 | IntegerVector x(2); 74 | for (int i = 0; i < size; i++){ 75 | for (int j = 0; j < size; j++){ 76 | s1 = 0; 77 | s2 = 0; 78 | itop = (i+1) % size; 79 | ibottom = ((i + size - 1) % size); 80 | jright = (j+1) % size; 81 | jleft = (j + size - 1) % size; 82 | s1 += state1(itop, j) + state1(ibottom, j) + state1(i, jright) + state1(i, jleft); 83 | s2 += state2(itop, j) + state2(ibottom, j) + state2(i, jright) + state2(i, jleft); 84 | GetRNGstate(); 85 | double u_ = (runif(1))(0); 86 | PutRNGstate(); 87 | state1(i,j) = 2*(u_ < proba_beta((s1+4)/2)) - 1; 88 | state2(i,j) = 2*(u_ < proba_beta((s2+4)/2)) - 1; 89 | } 90 | } 91 | return List::create(Named("state1") = state1, Named("state2") = state2); 92 | } 93 | 94 | 95 | -------------------------------------------------------------------------------- /src/logisticregression.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | NumericMatrix sigma_(const NumericMatrix & X, const NumericVector & w){ 6 | int n = X.rows(); 7 | int p = X.cols(); 8 | NumericMatrix inv_sigma(p,p); 9 | for (int j1 = 0; j1 < p; j1 ++){ 10 | for (int j2 = j1; j2 < p; j2 ++){ 11 | inv_sigma(j1,j2) = 0; 12 | for (int i = 0; i < n; i++){ 13 | inv_sigma(j1,j2) = inv_sigma(j1,j2) + X(i,j1) * X(i,j2) * w(i); 14 | } 15 | } 16 | } 17 | for (int j1 = 1; j1 < p; j1 ++){ 18 | for (int j2 = 0; j2 < j1; j2 ++){ 19 | inv_sigma(j1,j2) = inv_sigma(j2,j1); 20 | } 21 | } 22 | return inv_sigma; 23 | } 24 | 25 | // The following function computes m(omega) and Sigma(omega)... (or what we really need instead) 26 | // it returns m (= m(omega)), Sigma_inverse = Sigma(omega)^{-1}, 27 | // as well as Cholesky_inverse and Cholesky that are such that 28 | // Cholesky_inverse is the lower triangular matrix L, in the decomposition Sigma^{-1} = L L^T 29 | // whereas Cholesky is the lower triangular matrix Ltilde in the decomposition Sigma = Ltilde^T Ltilde 30 | 31 | // [[Rcpp::export]] 32 | List m_sigma_function_(const Eigen::Map & omega, 33 | const Eigen::Map & X, 34 | const Eigen::Map & invB, 35 | const Eigen::Map & KTkappaplusinvBtimesb){ 36 | int n = X.rows(); 37 | int p = X.cols(); 38 | // The matrix A stores XT Omega X + B^{-1}, that is, Sigma^{-1} 39 | Eigen::MatrixXd A(p,p); 40 | for (int j1 = 0; j1 < p; j1 ++){ 41 | for (int j2 = j1; j2 < p; j2 ++){ 42 | A(j1,j2) = invB(j1, j2); 43 | for (int i = 0; i < n; i++){ 44 | A(j1,j2) = A(j1,j2) + X(i,j1) * X(i,j2) * omega(i); 45 | } 46 | A(j2,j1) = A(j1,j2); 47 | } 48 | } 49 | Eigen::LLT lltofA(A); 50 | Eigen::MatrixXd lower = lltofA.matrixL(); 51 | Eigen::VectorXd x = lltofA.solve(KTkappaplusinvBtimesb); 52 | return List::create(Named("m")=x, 53 | Named("Sigma_inverse") = A, 54 | Named("Cholesky_inverse") = lower, 55 | Named("Cholesky") = lower.inverse()); 56 | } 57 | -------------------------------------------------------------------------------- /src/logisticregressioncoupling.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "RNG.h" 3 | #include "PolyaGamma.h" 4 | 5 | using namespace Rcpp; 6 | 7 | 8 | // [[Rcpp::export]] 9 | double logcosh(double x){ 10 | double result = 0.; 11 | if (x > 0.){ 12 | result = x + log(1.0 + exp(-2.0*x)) - 0.6931472; 13 | } else { 14 | result = -x + log(1.0 + exp(2.0*x)) - 0.6931472; 15 | } 16 | return result; 17 | } 18 | 19 | 20 | 21 | // [[Rcpp::export]] 22 | NumericVector xbeta_(const NumericMatrix & X, const NumericVector & beta){ 23 | int n = X.rows(); 24 | NumericVector xbeta(n); 25 | for (int i = 0; i < n; i++){ 26 | xbeta(i) = 0; 27 | for (int j = 0; j < X.cols(); j++){ 28 | xbeta(i) = xbeta(i) + X(i,j) * beta(j); 29 | } 30 | } 31 | return(xbeta); 32 | } 33 | 34 | double rpg_devroye(int n, double z){ 35 | RNG r; 36 | PolyaGamma pg(1); 37 | double x = pg.draw(n, z, r); 38 | return(x); 39 | } 40 | 41 | // [[Rcpp::export]] 42 | NumericMatrix w_rejsamplerC(const NumericVector & beta1, 43 | const NumericVector & beta2, 44 | const NumericMatrix & X){ 45 | RNGScope scope; 46 | int n = X.rows(); 47 | NumericMatrix w(n,2); 48 | NumericVector z1s = abs(xbeta_(X, beta1)); 49 | NumericVector z2s = abs(xbeta_(X, beta2)); 50 | for(int i = 0; i < n; ++i){ 51 | double z1 = z1s(i); 52 | double z2 = z2s(i); 53 | double z_min = std::min(z1,z2); 54 | double z_max = std::max(z1,z2); 55 | double w_max; 56 | double w_min = rpg_devroye(1,z_min); 57 | GetRNGstate(); 58 | double log_u = log(runif(1,0,1)(0)); 59 | PutRNGstate(); 60 | double log_ratio = - 0.5 * w_min * (pow(z_max,2.)-pow(z_min,2.)); 61 | if(log_u < log_ratio){ 62 | w_max = w_min; 63 | } else { 64 | w_max = rpg_devroye(1,z_max); 65 | } 66 | if(z1 logaccept2){ 108 | accept = TRUE; 109 | } 110 | } 111 | } 112 | w(i,1) = w2; 113 | } 114 | return w; 115 | } 116 | 117 | 118 | -------------------------------------------------------------------------------- /src/mvnorm.h: -------------------------------------------------------------------------------- 1 | #ifndef _INCL_MVNORM_ 2 | #define _INCL_MVNORM_ 3 | #include 4 | using namespace Rcpp; 5 | 6 | // generate samples from a multivariate Normal distribution 7 | NumericMatrix fast_rmvnorm_(int nsamples, const NumericVector & mean, const NumericMatrix & covariance); 8 | NumericMatrix fast_rmvnorm_cholesky_(int nsamples, const NumericVector & mean, const Eigen::MatrixXd & cholesky); 9 | 10 | // evaluate probability density function of a multivariate Normal distribution 11 | NumericVector fast_dmvnorm_(const NumericMatrix & x, const NumericVector & mean, const NumericMatrix & covariance); 12 | NumericVector fast_dmvnorm_cholesky_inverse_(const NumericMatrix & x, const NumericVector & mean, const Eigen::MatrixXd & cholesky_inverse); 13 | 14 | // Couplings -- 15 | // The following functions sample from couplings of multivariate Normals. 16 | // They return a list with two entries: 17 | // "xy" is a matrix with two columns, one for x and one for y, which are Normally distributed vectors. 18 | // "identical" which is a boolean indicating whether x = y. 19 | 20 | // sample from maximum coupling of two multivariate Normals 21 | Rcpp::List rmvnorm_max_coupling_(const NumericVector & mu1, const NumericVector & mu2, const NumericMatrix & Sigma1, const NumericMatrix & Sigma2); 22 | 23 | 24 | // sample from maximum coupling of two multivariate Normals 25 | Rcpp::List rmvnorm_max_coupling_cholesky(const NumericVector & mu1, const NumericVector & mu2, 26 | const Eigen::MatrixXd & Cholesky1, const Eigen::MatrixXd & Cholesky2, 27 | const Eigen::MatrixXd & Cholesky_inverse1, const Eigen::MatrixXd & Cholesky_inverse2); 28 | 29 | 30 | // sample from reflection-maximum coupling of two multivariate Normals with common covariance matrix 31 | Rcpp::List rmvnorm_reflection_max_coupling_(const Eigen::VectorXd & mu1, const Eigen::VectorXd & mu2, 32 | const Eigen::MatrixXd & Sigma_chol, const Eigen::MatrixXd & inv_Sigma_chol); 33 | 34 | #endif 35 | 36 | -------------------------------------------------------------------------------- /src/propensityscore.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | NumericVector beta2e_(const NumericVector & beta, const NumericMatrix & C){ 6 | double betatimescovar; 7 | int nbeta = beta.size(); 8 | int nobservations = C.nrow(); 9 | NumericVector e(nobservations); 10 | for (int i = 0; i < nobservations; i++){ 11 | betatimescovar = beta(0); 12 | for (int j = 0; j < nbeta-1; j++){ 13 | betatimescovar += beta(j+1) * C(i,j); 14 | } 15 | e(i) = 1. / (1. + exp(-betatimescovar)); 16 | } 17 | return e; 18 | } 19 | 20 | // function that takes a vector of doubles 21 | // compute the quintiles 22 | // and return a vector of indicators of quintile memberships, 23 | // ie with values in {1,2,3,4,5}. 24 | // [[Rcpp::export]] 25 | IntegerVector cut_in_fifth_(const NumericVector & x){ 26 | NumericVector y = clone(x); 27 | std::sort(y.begin(), y.end()); 28 | NumericVector q = NumericVector::create(0.2, 0.4, 0.6, 0.8); 29 | int s = x.size(); 30 | int nqs = q.size(); 31 | NumericVector quintiles(nqs); 32 | for (int i = 0; i < nqs; i ++){ 33 | quintiles(i) = y[(int) (s * q[i])]; 34 | } 35 | IntegerVector indicators(s); 36 | int indicator; 37 | for (int i = 0; i < s; i ++){ 38 | indicator = 0; 39 | while ((x(i) >= quintiles(indicator)) && (indicator < (nqs - 1))){ 40 | indicator ++; 41 | } 42 | if (x(i) >= quintiles(indicator)){ indicator ++; } 43 | indicators(i) = indicator + 1; // so that the result is between 1 and 5. 44 | } 45 | return indicators; 46 | } 47 | 48 | 49 | // [[Rcpp::export]] 50 | NumericVector propensity_module2_loglik2_(NumericMatrix theta1s, NumericMatrix theta2s, const NumericVector & X, const NumericMatrix & C, const NumericVector & Y){ 51 | int n = theta1s.nrow(); 52 | NumericVector evals(n); 53 | std::fill(evals.begin(), evals.end(), 0); 54 | int theta_dim = 7; 55 | double logpr, betatimescovar; 56 | for (int itheta = 0; itheta < n; itheta++){ 57 | NumericVector beta = theta1s.row(itheta); 58 | int nobservations = X.size(); 59 | NumericVector e = beta2e_(beta, C); 60 | IntegerVector indicators = cut_in_fifth_(e); 61 | NumericVector theta = theta2s.row(itheta); 62 | double z; 63 | for (int i = 0; i < nobservations; i++){ 64 | z = theta(0) + theta(1) * X(i); 65 | if (indicators(i) > 1){ 66 | z = z + theta(0 + indicators(i)); 67 | } 68 | logpr = - log(1 + exp(-z)); 69 | evals(itheta) += logpr + (1 - Y(i)) * (-z); 70 | } 71 | } 72 | return evals; 73 | } 74 | -------------------------------------------------------------------------------- /src/prune.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace Rcpp; 4 | 5 | // [[Rcpp::export]] 6 | NumericMatrix prune_measure_(const NumericMatrix & df){ 7 | // df is meant to have a first column with repeat numbers 8 | // second column made of 1 and 0, indicating (1) whether atom is part of MCMC part or bias correction part 9 | // third columns with weights 10 | // and remaining columns (4, 5, ...) with atoms 11 | // sorted in increasing/lexicographic order 12 | NumericMatrix newdf(df.nrow(), df.ncol()); 13 | std::fill(newdf.begin(), newdf.end(), 0.); 14 | // keep track of index of row in newdf in which to write 15 | int index_row = 0; 16 | // s will serve as a measure of difference between atoms 17 | double s = 0; 18 | newdf(0,_) = df(0,_); 19 | // loop over rows of df 20 | for (int i=1; i < df.nrow(); i++){ 21 | // do not prune if different repeat (column 0 contains repeat) 22 | // if same repeat 23 | if (df(i,0) == df(i-1,0)){ 24 | // sum absolute value of difference over each component of atom 25 | s = 0; 26 | for (int j = 3; j < df.ncol(); j++){ 27 | s += std::abs(df(i,j) - df(i-1,j)); 28 | } 29 | // if this sum is very small then the atoms are considered equal 30 | if (s < 1e-20){ 31 | // if so, add up the weights to current weight (column 2 contains weights) 32 | newdf(index_row, 2) += df(i, 2); 33 | } else { 34 | // new atom, add it to newdf 35 | index_row++; 36 | newdf(index_row,_) = df(i,_); 37 | } 38 | } else { 39 | // new atom, add it to newdf 40 | index_row++; 41 | newdf(index_row,_) = df(i,_); 42 | } 43 | } 44 | return newdf(Range(0,index_row),_); 45 | } 46 | -------------------------------------------------------------------------------- /src/sample_pairs01.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace Rcpp; 4 | using namespace std; 5 | 6 | // This function takes a vector of zeros and ones, 7 | // with at least one zero and one one 8 | // and samples uniformly among the indices pointing to zeros, 9 | // and uniformly among the indices pointing to ones, and 10 | // outputs the pair 11 | // [[Rcpp::export]] 12 | IntegerVector sample_pair01(const NumericVector & selection){ 13 | RNGScope scope; 14 | int l = selection.size(); 15 | int sumones = sum(selection); 16 | IntegerVector indices(2); 17 | indices(0) = -1; 18 | indices(1) = -1; 19 | GetRNGstate(); 20 | NumericVector us = runif(2); 21 | PutRNGstate(); 22 | double u0 = us(0); 23 | double u1 = us(1); 24 | double w0 = 1. / (l - sumones); 25 | double w1 = 1. / sumones; 26 | double csw0 = 0.; 27 | double csw1 = 0.; 28 | for (int k = 0; k < l; k++){ 29 | if (selection(k) == 0){ 30 | csw0 += w0; 31 | if (indices(0) < 0 && csw0 > u0){ 32 | indices(0) = k+1; 33 | } 34 | } else { 35 | csw1 += w1; 36 | if (indices(1) < 0 && csw1 > u1){ 37 | indices(1) = k+1; 38 | } 39 | } 40 | } 41 | return indices; 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/vs_mlikelihood.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | double marginal_likelihood_c_2(Eigen::VectorXf selection, const Eigen::MatrixXf & X, const Eigen::VectorXf & Y, double Y2, double g){ 6 | double l = 0.; 7 | int n = X.rows(); 8 | int p = X.cols(); 9 | int s = selection.sum(); 10 | Eigen::MatrixXf temp; 11 | if (s > 0){ 12 | Eigen::MatrixXf Xselected(n,s); 13 | int counter = 0; 14 | for (int column = 0; column < p; column++){ 15 | if (selection(column)){ 16 | Xselected.col(counter) = X.col(column); 17 | counter++; 18 | } 19 | } 20 | temp = Y.transpose() * Xselected; 21 | temp = temp * ((Xselected.transpose() * Xselected).inverse()) * temp.transpose(); 22 | l = temp(0,0); 23 | } else { 24 | l = 0.; 25 | } 26 | l = -((double) s + 1.) / 2. * log(g + 1.) - (double) n / 2. * log(Y2 - g / (g + 1.) * l); 27 | return l; 28 | } 29 | 30 | 31 | -------------------------------------------------------------------------------- /unbiasedmcmc.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.png 3 | *.R 4 | *files 5 | --------------------------------------------------------------------------------