├── .Rbuildignore
├── .Rprofile
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── H_bar.R
    ├── RcppExports.R
    ├── bayesianlasso.R
    ├── c_chains_to_dataframe.R
    ├── c_chains_to_measure_as_list.R
    ├── coupled_chains.R
    ├── coupled_pairs01.R
    ├── gamma_couplings.R
    ├── get_max_coupling.R
    ├── get_mh_kernels.R
    ├── histogram_c_chains.R
    ├── invgamma_couplings.R
    ├── invgaussian_couplings.R
    ├── ising.R
    ├── logistic_regression.R
    ├── meetingtime.R
    ├── mvnorm.R
    ├── mvnorm_couplings.R
    ├── rnorm_max_coupling.R
    ├── rnorm_reflectionmax.R
    ├── unbiasedestimator.R
    ├── unbiasedmcmc-package.R
    ├── utils.R
    └── variableselection.R
├── README.Rmd
├── README.md
├── README_files
    └── figure-gfm
    │   ├── estimators-1.png
    │   ├── usage-1.png
    │   └── usage-2.png
├── data
    ├── diabetes.RData
    └── germancredit.RData
├── inst
    ├── README.R
    ├── check
    │   ├── check_discretecouplings.R
    │   ├── check_gamma.R
    │   ├── check_hbar.R
    │   ├── check_histograms.R
    │   ├── check_inversegamma.R
    │   ├── check_inversegaussian.R
    │   ├── check_ising.R
    │   ├── check_lags.R
    │   ├── check_mvnorm.R
    │   ├── check_normalcouplings.R
    │   ├── check_reflmaxcoupling.R
    │   ├── check_rwmh_bivariate.R
    │   ├── check_rwmh_doublewell.R
    │   ├── check_rwmh_minimalcode.R
    │   ├── check_rwmh_univariate.R
    │   ├── check_vs_mlikelihood.R
    │   ├── check_weirdbug.R
    │   └── to_check_signed_measure.R
    ├── reproducebaseball
    │   ├── baseball.mcmc.run.R
    │   ├── baseball.plots.R
    │   ├── baseball.run.R
    │   └── run.all.R
    ├── reproducebayesianlasso
    │   ├── bayesianlasso.mcmc.run.R
    │   ├── bayesianlasso.plots.R
    │   ├── bayesianlasso.run.R
    │   └── run.all.R
    ├── reproducebimodal
    │   ├── bimodal.mcmc.run.R
    │   ├── bimodal.plots.R
    │   ├── bimodal.run.R
    │   └── run.all.R
    ├── reproduceepidemiology
    │   ├── plummer.cut.plots.R
    │   ├── plummer.cut.run.R
    │   └── run.all.R
    ├── reproducegermancredit
    │   ├── german_credit.csv
    │   ├── germancredit.mcmc.run.R
    │   ├── germancredit.plots.R
    │   ├── germancredit.preparedata.R
    │   ├── germancredit.run.R
    │   └── run.all.R
    ├── reproduceisingmodel
    │   ├── ising.gibbs.meetings.run.R
    │   ├── ising.mcmc.run.R
    │   ├── ising.plots.R
    │   ├── ising.swap.meetings.run.R
    │   ├── run.all.R
    │   ├── run.batch.ising.R
    │   └── run.odyssey.ising.sh
    ├── reproducepumpfailures
    │   ├── pumpfailures.mcmc.run.R
    │   ├── pumpfailures.mykland.run.R
    │   ├── pumpfailures.plots.R
    │   ├── pumpfailures.run.R
    │   ├── pumpfailures.tuning.run.R
    │   └── run.all.R
    ├── reproducescalingdimension
    │   ├── run.all.R
    │   ├── scaling.gibbs.R
    │   ├── scaling.hmc.R
    │   ├── scaling.plots.R
    │   ├── scaling.rwmh.maximalcoupling.R
    │   └── scaling.rwmh.reflectionmaxcoupling.R
    └── reproducevarselection
    │   ├── run.all.R
    │   ├── varselection.clusterscript.kappas.R
    │   ├── varselection.differentkappas.R
    │   ├── varselection.differentkappas.plots.R
    │   ├── varselection.differentp.R
    │   ├── varselection.differentp.plots.R
    │   ├── varselection.generatedata.run.R
    │   ├── varselection.mcmc.run.R
    │   ├── varselection.script.R
    │   └── varselection.tables.R
├── man
    ├── H_bar.Rd
    ├── c_chains_to_dataframe.Rd
    ├── c_chains_to_measure_as_list.Rd
    ├── dinversegamma.Rd
    ├── dinvgaussian.Rd
    ├── expit.Rd
    ├── fast_dmvnorm.Rd
    ├── fast_dmvnorm_chol_inverse.Rd
    ├── fast_rmvnorm.Rd
    ├── fast_rmvnorm_chol.Rd
    ├── get_blasso.Rd
    ├── get_max_coupling.Rd
    ├── get_mh_kernel.Rd
    ├── get_variableselection.Rd
    ├── hello.Rd
    ├── histogram_c_chains.Rd
    ├── logistic_precomputation.Rd
    ├── pg_gibbs.Rd
    ├── rcpp_hello.Rd
    ├── rgamma_coupled.Rd
    ├── rinversegamma.Rd
    ├── rinversegamma_coupled.Rd
    ├── rinvgaussian.Rd
    ├── rinvgaussian_coupled.Rd
    ├── rmvnorm_max.Rd
    ├── rmvnorm_max_chol.Rd
    ├── rmvnorm_reflectionmax.Rd
    ├── rnorm_max_coupling.Rd
    ├── rnorm_reflectionmax.Rd
    ├── sample_coupled_chains.Rd
    ├── sample_meetingtime.Rd
    ├── sample_unbiasedestimator.Rd
    ├── setmytheme.Rd
    └── unbiasedmcmc-package.Rd
├── src
    ├── PolyaGamma.cpp
    ├── PolyaGamma.h
    ├── RNG.cpp
    ├── RNG.h
    ├── RRNG.cpp
    ├── RRNG.h
    ├── RcppExports.cpp
    ├── blassoutil.cpp
    ├── c_chains_to_measure.cpp
    ├── estimator_bin.cpp
    ├── inversegaussian.cpp
    ├── ising.cpp
    ├── logisticregression.cpp
    ├── logisticregressioncoupling.cpp
    ├── mvnorm.cpp
    ├── mvnorm.h
    ├── propensityscore.cpp
    ├── prune.cpp
    ├── sample_pairs01.cpp
    └── vs_mlikelihood.cpp
├── unbiasedmcmc.Rproj
└── vignettes
    ├── .gitignore
    ├── introduction-normaltarget.Rmd
    └── polyagammagibbs.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^doc$
4 | ^Meta$
5 | 


--------------------------------------------------------------------------------
/.Rprofile:
--------------------------------------------------------------------------------
1 | .First <- function(){
2 |   print("hello")
3 |   Sys.setenv(MAKEFLAGS = " -j6")
4 | }
5 | 
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.txt
 2 | *.pdf
 3 | *.tex
 4 | .*.swp
 5 | .DS_Store
 6 | TODO
 7 | .Rproj.user
 8 | .Rhistory
 9 | *.RData
10 | .Ruserdata
11 | .Rprofile
12 | src/*.o
13 | src/*.so
14 | src/*.dll
15 | README_cache/
16 | inst/tutorials/*Rmd
17 | inst/tutorials/*pdf
18 | inst/tutorials/*cache*/
19 | inst/tutorials/*files*/
20 | inst/reproducescalingdimension_old
21 | inst/reproducescalingdimension/backup
22 | inst/doc
23 | vignettes/*cache*
24 | vignettes/*cache*/*html*
25 | *.out
26 | inst/reproducepropensityscores
27 | doc
28 | Meta
29 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: unbiasedmcmc
 2 | Type: Package
 3 | Title: Unbiased MCMC with couplings
 4 | Version: 0.3.0
 5 | Author: Pierre E Jacob, John O'Leary, Yves F Atchade
 6 | Maintainer: Pierre E Jacob <pjacob@fas.harvard.edu>
 7 | Description: This package includes the scripts to reproduce the article
 8 |     "Unbiased Markov chain Monte Carlo with couplings" by Pierre E. Jacob, John
 9 |     O'Leary, Yves F. Atchade, where it is described how the bias of MCMC can
10 |     be exactly removed, using couplings of Markov chains. The technical report
11 |     is available at https://arxiv.org/abs/1708.03625.
12 | License: GPL (>= 2)
13 | Encoding: UTF-8
14 | Depends:
15 |     Rcpp,RcppEigen,lubridate
16 | Imports:
17 |     Rcpp (>= 0.11.6),RcppEigen
18 | LinkingTo: Rcpp,RcppEigen
19 | RoxygenNote: 7.2.1
20 | Suggests: 
21 |     knitr,
22 |     rmarkdown
23 | VignetteBuilder: knitr
24 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(H_bar)
 4 | export(beta2e)
 5 | export(c_chains_to_dataframe)
 6 | export(c_chains_to_measure_as_list)
 7 | export(coupled_pairs01)
 8 | export(create_mids)
 9 | export(cut_in_fifth)
10 | export(dinversegamma)
11 | export(dinvgaussian)
12 | export(estimator_bin)
13 | export(expit)
14 | export(fast_dmvnorm)
15 | export(fast_dmvnorm_chol_inverse)
16 | export(fast_rmvnorm)
17 | export(fast_rmvnorm_chol)
18 | export(find_breaks)
19 | export(get_blasso)
20 | export(get_max_coupling)
21 | export(get_mh_kernels)
22 | export(get_variableselection)
23 | export(histogram_c_chains)
24 | export(ising_coupled_kernel)
25 | export(ising_pt_coupled_kernel)
26 | export(ising_pt_rinit)
27 | export(ising_pt_single_kernel)
28 | export(ising_rinit)
29 | export(ising_single_kernel)
30 | export(logisticregression_m_and_sigma)
31 | export(logisticregression_m_function)
32 | export(logisticregression_precomputation)
33 | export(logisticregression_sigma)
34 | export(logisticregression_sigma_function)
35 | export(logisticregression_xbeta)
36 | export(pg_gibbs)
37 | export(plot_histogram)
38 | export(rgamma_coupled)
39 | export(rinversegamma)
40 | export(rinversegamma_coupled)
41 | export(rinvgaussian)
42 | export(rinvgaussian_coupled)
43 | export(rmvnorm_max)
44 | export(rmvnorm_max_chol)
45 | export(rmvnorm_reflectionmax)
46 | export(rnorm_max_coupling)
47 | export(rnorm_reflectionmax)
48 | export(sample_beta)
49 | export(sample_coupled_chains)
50 | export(sample_meetingtime)
51 | export(sample_unbiasedestimator)
52 | export(sample_w)
53 | export(setmytheme)
54 | export(w_max_coupling_caller)
55 | export(w_rejsampler)
56 | export(w_rejsampler_caller)
57 | importFrom(Rcpp,sourceCpp)
58 | useDynLib(unbiasedmcmc)
59 | 


--------------------------------------------------------------------------------
/R/H_bar.R:
--------------------------------------------------------------------------------
 1 | #'@rdname H_bar
 2 | #'@title Compute unbiased estimators from coupled chains
 3 | #'@description Compute unbiased estimators based on coupled chains.
 4 | #' Presumably generated via \code{\link{sample_coupled_chains}}.
 5 | #'
 6 | #' The test function h should take a "chain_state" as argument and return a numeric vector.
 7 | #' The estimand of interest is \eqn{\int h(x) \pi(x) dx}, where \eqn{\pi} is the invariant distribution
 8 | #' of the chains.
 9 | #'
10 | #' The lag is inferred from the coupled chains, so there's no argument to specify it.
11 | #'@param c_chains A list containing coupled chains generated by \code{\link{sample_coupled_chains}}.
12 | #'@param h A test function of interest, which should take a chain state ("chain_state" entry of the output of "rinit", for instance)
13 | #'and return a numeric vector
14 | #'@param k An integer at which to start computing the unbiased estimator; should be less than m
15 | #'@param m A time horizon: should be less than the length of the chains; typically the same
16 | #'m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value
17 | #'@return A value (or vector of values) of an unbiased estimator of \eqn{\int h(x) \pi(x) dx}
18 | #'@export
19 | H_bar <- function(c_chains, h = function(x) x, k = 0, m = 1){
20 |   maxiter <- c_chains$iteration
21 |   if (k > maxiter){
22 |     print("error: k has to be less than the horizon of the coupled chains")
23 |     return(NULL)
24 |   }
25 |   if (m > maxiter){
26 |     print("error: m has to be less than the horizon of the coupled chains")
27 |     return(NULL)
28 |   }
29 |   # infer the lag from the number of rows in samples1 and samples2
30 |   lag <- dim(c_chains$samples1)[1] - dim(c_chains$samples2)[1]
31 |   # test the dimension of h(X)
32 |   # p <- length(h(c_chains$samples1[1,]))
33 |   h_of_chain <- apply(X = c_chains$samples1[(k+1):(m+1),,drop=F], MARGIN = 1, FUN = h)
34 |   if (is.null(dim(h_of_chain))){
35 |     h_of_chain <- matrix(h_of_chain, ncol = 1)
36 |   } else {
37 |     h_of_chain <- t(h_of_chain)
38 |   }
39 |   H_bar <- apply(X = h_of_chain, MARGIN = 2, sum)
40 |   # next, add bias correction terms
41 |   # Delta_t refers to h(X_t) - h(Y_{t-lag})
42 |   if (c_chains$meetingtime <= k + lag){
43 |     # nothing else to add, because Delta_t = 0 for all t >= meeting time
44 |   } else {
45 |     for (time in (k+lag):(c_chains$meetingtime-1)){
46 |       # time is the index t of X_{t} where the chain start from X_{0}
47 |       coefficient_t <- (floor((time-k) / lag) - ceiling(max(lag, time-m)/lag) + 1)
48 |       Delta_t <- h(c_chains$samples1[time+1,]) - h(c_chains$samples2[time-lag+1,])
49 |       H_bar <- H_bar + coefficient_t * Delta_t
50 |     }
51 |   }
52 |   # return result divided by number of terms i.e. m - k + 1
53 |   return(H_bar / (m - k + 1))
54 | }
55 | 


--------------------------------------------------------------------------------
/R/bayesianlasso.R:
--------------------------------------------------------------------------------
 1 | #' Y and X need to be matrices, and lambda non-negative
 2 | #'@export
 3 | get_blasso <- function(Y, X, lambda){
 4 |   p <- ncol(X)
 5 |   n <- nrow(X)
 6 |   XtX <- t(X) %*% X
 7 |   XtY <- t(X) %*% Y
 8 |   alpha1 <- (n-1)/2 + p/2
 9 |   lambda2 <- lambda^2
10 |   # naive initialization
11 |   rinit <- function(){
12 |     return(list(chain_state = c(rep(0, p), rep(1, p), 1)))
13 |   }
14 |   #
15 |   gibbs_kernel <- function(state){
16 |     beta <- state$chain_state[1:p]
17 |     tau2 <- state$chain_state[(p+1):(2*p)]
18 |     sigma2 <- state$chain_state[2*p+1]
19 |     res_ <- unbiasedmcmc:::blassoconditional(Y, X, XtY, XtX, tau2, sigma2)
20 |     beta <- res_$beta
21 |     norm <- res_$norm
22 |     betaDbeta <- res_$betaDbeta
23 |     sigma2 <- rinversegamma(1, alpha1, 0.5 * (norm + betaDbeta))
24 |     # update tau
25 |     sqrtlambda2sigma2 <- sqrt(lambda2 * sigma2)
26 |     for (component in 1:p){
27 |       tau2[component] <- 1 / rinvgaussian(1, sqrtlambda2sigma2 / abs(beta[component]), lambda2)
28 |     }
29 |     return(list(chain_state = c(beta, tau2, sigma2)))
30 |   }
31 | 
32 |   coupled_gibbs_kernel <- function(state1, state2){
33 |     beta1 <- state1$chain_state[1:p]
34 |     tau21 <- state1$chain_state[(p+1):(2*p)]
35 |     sigma21 <- state1$chain_state[2*p+1]
36 |     beta2 <- state2$chain_state[1:p]
37 |     tau22 <- state2$chain_state[(p+1):(2*p)]
38 |     sigma22 <- state2$chain_state[2*p+1]
39 |     #
40 |     if (all(tau21 == tau22) && all(sigma21 == sigma22)){
41 |       res_ <- unbiasedmcmc:::blassoconditional(Y, X, XtY, XtX, tau21, sigma21)
42 |       beta1 <- res_$beta
43 |       norm1 <- res_$norm
44 |       betaDbeta1 <- res_$betaDbeta
45 |       beta2 <- beta1
46 |       norm2 <- norm1
47 |       betaDbeta2 <- betaDbeta1
48 |     } else {
49 |       res_ <- unbiasedmcmc:::blassoconditional_coupled(Y, X, XtY, XtX, tau21, tau22, sigma21, sigma22)
50 |       beta1 <- res_$beta1
51 |       norm1 <- res_$norm1
52 |       betaDbeta1 <- res_$betaDbeta1
53 |       beta2 <- res_$beta2
54 |       norm2 <- res_$norm2
55 |       betaDbeta2 <- res_$betaDbeta2
56 |     }
57 |     sigma2s <- rinversegamma_coupled(alpha1, alpha1,
58 |                                0.5 * (norm1 + betaDbeta1),
59 |                                0.5 * (norm2 + betaDbeta2))
60 |     sigma21 <- sigma2s$xy[1]
61 |     sigma22 <- sigma2s$xy[2]
62 |     # update tau
63 |     sqrtlambda2sigma21 <- sqrt(lambda2 * sigma21)
64 |     sqrtlambda2sigma22 <- sqrt(lambda2 * sigma22)
65 |     for (component in 1:p){
66 |       if (sigma21 == sigma22 && beta1[component] == beta2[component]){
67 |         invtau2 <- rinvgaussian(1, sqrtlambda2sigma21/ abs(beta1[component]), lambda2)
68 |         tau21[component] <- 1 / invtau2
69 |         tau22[component] <- tau21[component]
70 |       } else {
71 |         invtau2s <- rinvgaussian_coupled(sqrtlambda2sigma21 / abs(beta1[component]),
72 |                                          sqrtlambda2sigma22 / abs(beta2[component]),
73 |                                          lambda2, lambda2)
74 |         tau21[component] <- 1 / invtau2s[1]
75 |         tau22[component] <- 1 / invtau2s[2]
76 |       }
77 |     }
78 |     chain_state1 <- c(beta1, tau21, sigma21)
79 |     chain_state2 <- c(beta2, tau22, sigma22)
80 |     identical_ <- all(chain_state1 == chain_state2)
81 |     return(list(state1 = list(chain_state = chain_state1),
82 |                 state2 = list(chain_state = chain_state2),
83 |                 identical = identical_))
84 |   }
85 |   return(list(rinit = rinit, gibbs_kernel = gibbs_kernel, coupled_gibbs_kernel = coupled_gibbs_kernel))
86 | }
87 | 


--------------------------------------------------------------------------------
/R/c_chains_to_dataframe.R:
--------------------------------------------------------------------------------
 1 | #'@rdname c_chains_to_dataframe
 2 | #'@title Obtain data frame representation of measure from list of coupled chains
 3 | #'@description From coupled chains,
 4 | #' presumably generated via \code{\link{sample_coupled_chains}},
 5 | #' and a choice of integers k and m, the function constructs
 6 | #' a data frame representation of an empirical signed measure, i.e.
 7 | #'
 8 | #'  \deqn{\hat{\pi}(dx) = \sum_{n=1}^N \omega_n \delta_{Z_n}(dx)}
 9 | #'
10 | #' The function returns a data frame with first column "rep" indicating index of coupled chain,
11 | #' second column "MCMC" indicating whether atom is part of the "MCMC" part of the signed measure (1) or the bias correction part (0)
12 | #' third column "weight" indicating weights,
13 | #' remaining columns "atom.1", "atom.2", etc containing components of the atoms
14 | #'
15 | #'@param c_chains A list containing coupled chains generated by \code{\link{sample_coupled_chains}}.
16 | #'@param k An integer at which to start computing the signed measure; should be less than m
17 | #'@param m A time horizon: should be less than the length of the chains; typically the same
18 | #'m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value
19 | #'@param dopar Boolean (default to FALSE) indicating whether to perform calculation using registered parallel cores
20 | #'@return A data frame
21 | #'@export
22 | c_chains_to_dataframe <- function(c_chains, k, m, dopar = FALSE, prune = TRUE){
23 |   if ("meetingtime" %in% names(c_chains)){
24 |     # the user supplied a coupled chain, rather than a list of coupled chain
25 |     c_chains <- list(c_chains)
26 |   }
27 |   # number of coupled chains
28 |   nsamples <- length(c_chains)
29 |   approximation <- NULL
30 |   if (dopar){
31 |     # get signed measure representation of each coupled chain
32 |     approximation <-  foreach(irep = 1:nsamples, .combine = rbind) %dopar% {
33 |       ms_irep <- c_chains_to_measure_as_list(c_chains[[irep]], k, m)
34 |       data.frame(rep = rep(irep, length(ms_irep$weights)), MCMC = ms_irep$MCMC, weight = ms_irep$weights, atom = ms_irep$atoms)
35 |     }
36 |   } else {
37 |     approximation <- data.frame()
38 |     for (irep in 1:nsamples){
39 |       ms_irep <- c_chains_to_measure_as_list(c_chains[[irep]], k, m)
40 |       approximation <- rbind(approximation,
41 |          data.frame(rep = rep(irep, length(ms_irep$weights)), MCMC = ms_irep$MCMC, weight = ms_irep$weights, atom = ms_irep$atoms))
42 |     }
43 |   }
44 |   # normalize the weights
45 |   approximation$weight <- approximation$weight / nsamples
46 |   # prune identical atoms
47 |   if (prune){
48 |     approximation <- data.frame(prune_measure_(as.matrix(approximation[do.call(order, as.list(approximation[,c(1,2,4:ncol(approximation)),drop=F])),])))
49 |   }
50 |   names(approximation) <- c("rep", "MCMC", "weight", paste0("atom.", 1:(ncol(approximation)-3)))
51 |   approximation <- approximation[do.call(order, as.list(approximation[,1:2])),]
52 |   return(approximation)
53 | }
54 | 


--------------------------------------------------------------------------------
/R/c_chains_to_measure_as_list.R:
--------------------------------------------------------------------------------
 1 | #' @rdname c_chains_to_measure_as_list
 2 | #' @title Obtain empirical measure (as list) from coupled chains
 3 | #' @description From coupled chains,
 4 | #' presumably generated via \code{\link{sample_coupled_chains}},
 5 | #' and a choice of integers k and m, the function constructs
 6 | #' a representation of an empirical signed measure, i.e.
 7 | #'
 8 | #'  \deqn{\hat{\pi}(dx) = \sum_{n=1}^N \omega_n \delta_{Z_n}(dx)}
 9 | #'
10 | #' The function returns the weights \eqn{\omega} and the atoms \eqn{Z},
11 | #' in a list with two entries, "weights" and "atoms".
12 | #'
13 | #' @param c_chains A list containing coupled chains generated by \code{\link{sample_coupled_chains}}.
14 | #' @param k An integer at which to start computing the signed measure; should be less than m
15 | #' @param m A time horizon: should be less than the length of the chains; typically the same
16 | #' m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value
17 | #' @return A list with "weights" and "atoms"
18 | #'@export
19 | c_chains_to_measure_as_list <- function(c_chains, k, m){
20 |   if (k > m){
21 |     stop("k must be <= than m")
22 |   }
23 |   if (m > c_chains$iteration){
24 |     stop("m has to be less than the time horizon of the coupled chains")
25 |   }
26 |   return(c_chains_to_measure_as_list_(c_chains, k, m))
27 | }
28 | 


--------------------------------------------------------------------------------
/R/coupled_chains.R:
--------------------------------------------------------------------------------
 1 | #'@rdname sample_coupled_chains
 2 | #'@title Sample coupled Markov chains
 3 | #'@description Sample two Markov chains, each following 'single_kernel' marginally,
 4 | #' and 'coupled_kernel' jointly, until min(max(tau, m), max_iterations), where tau
 5 | #' is the first time the two chains meet (the "meeting time").
 6 | #'
 7 | #' Or more precisely, they meet with a delay of lag, i.e. X_t = Y_{t-lag}, and lag is one by default.
 8 | #'
 9 | #' Once the coupled chains are obtained, unbiased estimators can be computed for arbitrary test
10 | #' functions via the function \code{\link{H_bar}}.
11 | #'
12 | #' If you're only interested in sampling meeting times, see \code{\link{sample_meetingtime}}.
13 | #'
14 | #'
15 | #'@param single_kernel A list taking a state and returning a state, performing one step of a Markov kernel
16 | #'@param coupled_kernel A list taking two states and returning two states, performing one step of a coupled Markov kernel;
17 | #'it also returns a boolean "identical" indicating whether the two states are identical.
18 | #'@param rinit A list representing the initial state of the chain, that can be given to 'single_kernel'
19 | #'@param m A time horizon: the chains are sampled until the maximum between m and the meeting time
20 | #'@param lag A time lag, equal to one by default
21 | #'@param max_iterations A maximum number of iterations, at which to interrup the while loop; Inf by default
22 | #'@param preallocate A number of anticipated iterations, used to pre-allocate memory; 10 by default
23 | #'@return A list with
24 | #'\itemize{
25 | #'
26 | #'\item samples1: the first chain, of length max(m, tau)
27 | #'
28 | #'\item samples2: the second chain, of length max(m, tau) - lag
29 | #'
30 | #'\item meetingtime: the meeting time; equal to Inf if while loop was interrupted
31 | #'
32 | #'\item iteration: final iteration; could be equal to m, to meetingtime, or to max_iterations
33 | #'
34 | #'\item elapsedtime: elapsed wall-clock time, in seconds
35 | #'
36 | #'\item cost: computing cost in terms of calls to Markov kernels (counting coupled kernel as twice the cost)
37 | #'}
38 | #'@export
39 | sample_coupled_chains <- function(single_kernel, coupled_kernel, rinit, m = 1, lag = 1, max_iterations = Inf, preallocate = 10){
40 |   starttime <- Sys.time()
41 |   state1 <- rinit(); state2 <- rinit()
42 |   dimstate <- length(state1$chain_state)
43 |   nrowsamples1 <- m+preallocate+lag
44 |   samples1 <- matrix(nrow = nrowsamples1, ncol = dimstate)
45 |   samples2 <- matrix(nrow = nrowsamples1-lag, ncol = dimstate)
46 |   samples1[1,] <- state1$chain_state
47 |   samples2[1,] <- state2$chain_state
48 |   # current_nsamples1 <- 1
49 |   time <- 0
50 |   for (t in 1:lag){
51 |     time <- time + 1
52 |     state1 <- single_kernel(state1)
53 |     samples1[time+1,] <- state1$chain_state
54 |   }
55 |   # current_nsamples1 <- current_nsamples1 + 1
56 |   # iter <- 1
57 |   meetingtime <- Inf
58 |   while ((time < max(meetingtime, m)) && (time < max_iterations)){
59 |     time <- time + 1 # time is lag+1,lag+2,...
60 |     if (is.finite(meetingtime)){
61 |       state1 <- single_kernel(state1)
62 |       state2 <- state1
63 |     } else {
64 |       res_coupled_kernel <- coupled_kernel(state1, state2)
65 |       state1 <- res_coupled_kernel$state1
66 |       state2 <- res_coupled_kernel$state2
67 |       if (res_coupled_kernel$identical){
68 |         meetingtime <- time
69 |       }
70 |     }
71 |     if ((time+1) > nrowsamples1){
72 |       new_rows <- nrowsamples1
73 |       nrowsamples1 <- nrowsamples1 + new_rows
74 |       samples1 <- rbind(samples1, matrix(NA, nrow = new_rows, ncol = dimstate))
75 |       samples2 <- rbind(samples2, matrix(NA, nrow = new_rows, ncol = dimstate))
76 |     }
77 |     samples1[time+1,] <- state1$chain_state
78 |     samples2[time-lag+1,] <-   state2$chain_state
79 |   }
80 |   samples1 <- samples1[1:(time+1),,drop=F]
81 |   samples2 <- samples2[1:(time-lag+1),,drop=F]
82 |   cost <- lag + 2*(meetingtime - lag) + max(0, time - meetingtime)
83 |   currenttime <- Sys.time()
84 |   elapsedtime <- as.numeric(lubridate::as.duration(lubridate::ymd_hms(currenttime) - lubridate::ymd_hms(starttime)), "seconds")
85 |   return(list(samples1 = samples1, samples2 = samples2,
86 |               meetingtime = meetingtime, iteration = time, elapsedtime = elapsedtime, cost = cost))
87 | }
88 | 
89 | 


--------------------------------------------------------------------------------
/R/coupled_pairs01.R:
--------------------------------------------------------------------------------
 1 | # jointly sample ones and zeros
 2 | #'@export
 3 | coupled_pairs01 <- function(selection1, selection2){
 4 |   p <- length(selection1)
 5 |   # jointly samples ones
 6 |   s1 <- sum(selection1)
 7 |   s2 <- sum(selection2)
 8 |   w11 <- selection1 / s1
 9 |   w12 <- selection2 / s2
10 |   pmin1 <- pmin(w11, w12)
11 |   alpha1 <- sum(pmin1)
12 |   minor1 <- pmin1 / alpha1
13 |   residual1 <- (w11 - pmin1) / (1 - alpha1)
14 |   residual2 <- (w12 - pmin1) / (1 - alpha1)
15 |   ind1 <- c(NA, NA)
16 |   if (runif(1) < alpha1){
17 |     x <- sample(x = 1:p, size = 1, prob = minor1)
18 |     ind1 <- c(x,x)
19 |   } else {
20 |     ind1 <- c(sample(x = 1:p, size = 1, prob = residual1),
21 |               sample(x = 1:p, size = 1, prob = residual2))
22 |   }
23 |   # jointly samples zeros
24 |   w01 <- (1 - selection1) / (p - s1)
25 |   w02 <- (1 - selection2) / (p - s2)
26 |   pmin0 <- pmin(w01, w02)
27 |   alpha0 <- sum(pmin0)
28 |   minor0 <- pmin0 / alpha0
29 |   residual1 <- (w01 - pmin0) / (1 - alpha0)
30 |   residual2 <- (w02 - pmin0) / (1 - alpha0)
31 |   ind0 <- c(NA, NA)
32 |   if (runif(1) < alpha0){
33 |     x <- sample(x = 1:p, size = 1, prob = minor0)
34 |     ind0 <- c(x,x)
35 |   } else {
36 |     ind0 <- c(sample(x = 1:p, size = 1, prob = residual1),
37 |               sample(x = 1:p, size = 1, prob = residual2))
38 |   }
39 |   return(c(ind0, ind1))
40 | }
41 | 


--------------------------------------------------------------------------------
/R/gamma_couplings.R:
--------------------------------------------------------------------------------
 1 | #'@rdname rgamma_coupled
 2 | #'@title Sample from maximally coupled Gamma variables
 3 | #'@description Draws a pair of variables, respectively Gamma(alpha1, beta1) and Gamma(alpha2, beta2)
 4 | #'where the parametrization is that beta is the rate, i.e. the log-pdf of Gamma(alpha,beta) evaluated at x is
 5 | #' \deqn{\alpha * log(\beta) - lgamma(\alpha) + (\alpha-1) * log(x) - \beta x}
 6 | #' where \eqn{lgamma} stands for the logarithm of the Gamma function.
 7 | #'@param alpha1 First shape
 8 | #'@param alpha2 Second shape
 9 | #'@param beta1 First rate
10 | #'@param beta2 Second rate
11 | #'@return A list with entry 'xy' for the pair of values, and boolean 'identical' indicating whether the two values
12 | #'are identical.
13 | #'@export
14 | rgamma_coupled <- function(alpha1, alpha2, beta1, beta2){
15 |   f <- get_max_coupling(function(n) rgamma(n, alpha1, beta1),
16 |                         function(x) dgamma(x, alpha1, beta1, log = TRUE),
17 |                         function(n) rgamma(n, alpha2, beta2),
18 |                         function(x) dgamma(x, alpha2, beta2, log = TRUE))
19 |   return(f())
20 | }
21 | 


--------------------------------------------------------------------------------
/R/get_max_coupling.R:
--------------------------------------------------------------------------------
 1 | #'@rdname get_max_coupling
 2 | #'@title Sample from maximal coupling of two distributions p and q
 3 | #'@description Takes two univariate continuous distributions (specified by random number generator and log-pdf function),
 4 | #' and returns a function to sample from a maximal coupling of these two distributions.
 5 | #'@param rp A function taking n as an argument and returning n samples from the distribution p
 6 | #'@param dp A function taking x as an argument and returning log-pdf of p evaluated at x
 7 | #'@param rq A function taking n as an argument and returning n samples from the distribution q
 8 | #'@param dq A function taking x as an argument and returning log-pdf of q evaluated at x
 9 | #'@return Returns a list with
10 | #'
11 | #' \itemize{
12 | #' \item "xy": the pair of samples \eqn{(x,y)}
13 | #'
14 | #' \item "identical": TRUE if \eqn{x = y}, FALSE otherwise
15 | #' }
16 | #'@examples
17 | #' mu1 <- 0; mu2 <- 1; sigma1 <- 0.5; sigma2 <- 1.2
18 | #'  f <- get_max_coupling(function(n) rnorm(n, mu1, sigma1),
19 | #'  function(x) dnorm(x, mu1, sigma1, log = TRUE),
20 | #'  function(n) rnorm(n, mu2, sigma2),
21 | #'  function(x) dnorm(x, mu2, sigma2, log = TRUE))
22 | #' f()
23 | #'@export
24 | get_max_coupling <- function(rp, dp, rq, dq){
25 |   function(){
26 |     x <- rp(1)
27 |     if (dp(x) + log(runif(1)) < dq(x)){
28 |       return(list(xy = c(x,x), identical = TRUE))
29 |     } else {
30 |       reject <- TRUE
31 |       y <- NA
32 |       while (reject){
33 |         y <- rq(1)
34 |         reject <- (dq(y) + log(runif(1)) < dp(y))
35 |       }
36 |       return(list(xy = c(x,y), identical = FALSE))
37 |     }
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/R/get_mh_kernels.R:
--------------------------------------------------------------------------------
 1 | #'@rdname get_mh_kernel
 2 | #'@title Get random walk Metropolis-Hastings kernels
 3 | #'@description This function takes a target (specified through its log-pdf)
 4 | #' and a covariance matrix for a Normal random walk proposal, and returns a list containing the keys
 5 | #' \code{single_kernel}, \code{coupled_kernel} corresponding to marginal
 6 | #' and coupled MH kernels.
 7 | #'
 8 | #' The coupling is done by reflection-maximal coupling of the proposals,
 9 | #' and common uniform variable for the accept/reject step. For reflection-maximal
10 | #' couplings, see \code{\link{rnorm_reflectionmax}} and \code{\link{rmvnorm_reflectionmax}}.
11 | #'
12 | #' The returned kernels can then be used in the functions \code{\link{sample_meetingtime}} or
13 | #' \code{\link{sample_coupled_chains}} or \code{\link{sample_unbiasedestimator}}.
14 | #'
15 | #'@param target function taking a vector as input and returning target log-density evaluation
16 | #'@param Sigma_proposal covariance of the Normal random walk proposal
17 | #'@return A list containing the keys
18 | #' \code{single_kernel}, \code{coupled_kernel}.
19 | #'@export
20 | get_mh_kernels <- function(target, Sigma_proposal){
21 |   if (is.null(dim(Sigma_proposal))){
22 |     Sigma_proposal <- matrix(Sigma_proposal)
23 |   }
24 |   dimension <- dim(Sigma_proposal)[1]
25 |   Sigma_proposal_chol <- chol(Sigma_proposal)
26 |   Sigma_proposal_chol_inv <- solve(chol(Sigma_proposal))
27 |   zeromean <- rep(0, dimension)
28 |   # single kernel
29 |   single_kernel <- function(state){
30 |     chain_state <- state$chain_state
31 |     current_pdf <- state$current_pdf
32 |     proposal_value <- fast_rmvnorm_chol(1, chain_state, Sigma_proposal_chol)
33 |     proposal_pdf <- target(proposal_value)
34 |     accept <- (log(runif(1)) < (proposal_pdf - current_pdf))
35 |     if (accept){
36 |       return(list(chain_state = proposal_value, current_pdf = proposal_pdf))
37 |     } else {
38 |       return(list(chain_state = chain_state, current_pdf = current_pdf))
39 |     }
40 |   }
41 |   # coupled kernel
42 |   coupled_kernel <- function(state1, state2){
43 |     chain_state1 <- state1$chain_state; current_pdf1 <- state1$current_pdf
44 |     chain_state2 <- state2$chain_state; current_pdf2 <- state2$current_pdf
45 |     if (dimension == 1){
46 |       proposal_value <- rnorm_reflectionmax(chain_state1, chain_state2, Sigma_proposal_chol[1,1])
47 |       proposal_value$xy <- matrix(proposal_value$xy, nrow = 1)
48 |     } else {
49 |       proposal_value <- rmvnorm_reflectionmax(chain_state1, chain_state2, Sigma_proposal_chol, Sigma_proposal_chol_inv)
50 |     }
51 |     proposal1 <- proposal_value$xy[,1]; proposal_pdf1 <- target(proposal1)
52 |     if (proposal_value$identical){
53 |       proposal2 <- proposal1; proposal_pdf2 <- proposal_pdf1
54 |     } else {
55 |       proposal2 <- proposal_value$xy[,2]; proposal_pdf2 <- target(proposal2)
56 |     }
57 |     logu <- log(runif(1))
58 |     accept1 <- FALSE; accept2 <- FALSE
59 |     if (is.finite(proposal_pdf1)){
60 |       accept1 <- (logu < (proposal_pdf1 - current_pdf1))
61 |     }
62 |     if (is.finite(proposal_pdf2)){
63 |       accept2 <- (logu < (proposal_pdf2 - current_pdf2))
64 |     }
65 |     if (accept1){
66 |       chain_state1 <- proposal1
67 |       current_pdf1 <- proposal_pdf1
68 |     }
69 |     if (accept2){
70 |       chain_state2 <- proposal2
71 |       current_pdf2 <- proposal_pdf2
72 |     }
73 |     identical_ <- proposal_value$identical && accept1 && accept2
74 |     return(list(state1 = list(chain_state = chain_state1, current_pdf = current_pdf1),
75 |                 state2 = list(chain_state = chain_state2, current_pdf = current_pdf2),
76 |                 identical = identical_))
77 |   }
78 |   return(list(single_kernel = single_kernel, coupled_kernel = coupled_kernel))
79 | }
80 | 


--------------------------------------------------------------------------------
/R/histogram_c_chains.R:
--------------------------------------------------------------------------------
 1 | ## create histogram
 2 | #'@rdname histogram_c_chains
 3 | #'@title histogram_c_chains
 4 | #'@description Compute histogram approximations of marginal distributions based on coupled Markov chains
 5 | #'@param c_chains A list of coupled chains, each as produced by \code{\link{sample_coupled_chains}}
 6 | #'@param component An integer specifying which marginal to approximate
 7 | #'@param k An integer (see \code{\link{H_bar}})
 8 | #'@param m Another integer (see \code{\link{H_bar}})
 9 | #'@param breaks A vector indicating how to bin the space (optional)
10 | #'@param nclass An integer specifying the number of bins to aim for, if "breaks" is not specified
11 | #'@param dopar A boolean indicating whether to parallelize the computation (requires doParallel and having registed parallel cores)
12 | #'@export
13 | histogram_c_chains <- function(c_chains, component, k, m, breaks = NULL, nclass = 30, dopar = FALSE){
14 |   nsamples <- length(c_chains)
15 |   lag <- dim(c_chains[[1]]$samples1)[1] - dim(c_chains[[1]]$samples2)[1]
16 |   if (is.null(breaks)){
17 |     breaks <- find_breaks(c_chains, component, nclass, k = k, m = m, lag = lag)
18 |   }
19 |   mids <- create_mids(breaks)
20 |   width <- diff(breaks)[1]
21 |   ### compute histogram
22 |   res_ <- NULL
23 |   if (dopar){
24 |     res_ <- foreach (ibreak = 2:length(breaks), .combine = rbind) %dopar% {
25 |       estimators <- rep(0, nsamples)
26 |       for (irep in 1:nsamples){
27 |         estimators[irep] <- estimator_bin(c_chains[[irep]], component, breaks[ibreak-1], breaks[ibreak], k, m, lag)
28 |       }
29 |       prop <- mean(estimators)
30 |       sd_prop <- sd(estimators) / sqrt(nsamples)
31 |       c(prop, sd_prop)
32 |     }
33 |   } else {
34 |     res_ <- matrix(0, nrow = length(breaks)-1, ncol = 2)
35 |     for (ibreak in 2:length(breaks)){
36 |       estimators <- rep(0, nsamples)
37 |       for (irep in 1:nsamples){
38 |         estimators[irep] <- estimator_bin(c_chains[[irep]], component, breaks[ibreak-1], breaks[ibreak], k, m, lag)
39 |       }
40 |       prop <- mean(estimators)
41 |       sd_prop <- sd(estimators) / sqrt(nsamples)
42 |       res_[ibreak-1,] <- c(prop, sd_prop)
43 |     }
44 |   }
45 |   prop <- res_[,1]
46 |   sd_prop <- res_[,2]
47 |   return(list(mids = mids, breaks = breaks, proportions = prop, sd = sd_prop, width = width))
48 | }
49 | 
50 | ## plot the result of the histogram_c_chains function
51 | #'@export
52 | plot_histogram <- function(histogram, with_bar = TRUE){
53 |   df_ <- data.frame(xmin = histogram$mids - histogram$width/2,
54 |                     xmax = histogram$mids + histogram$width/2,
55 |                     x = histogram$mids,
56 |                     y = histogram$proportions / histogram$width,
57 |                     ymin = (histogram$proportions - 2 * histogram$sd) / histogram$width,
58 |                     ymax = (histogram$proportions + 2 * histogram$sd) / histogram$width)
59 | 
60 |   g <- ggplot(df_, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax)) + geom_rect(alpha = 0.5)
61 |   if (with_bar){
62 |     g <- g + geom_segment(aes(x = x, xend = x, y = 0, yend = y)) + ylab("density")
63 |   }
64 |   return(g)
65 | }
66 | 
67 | ## create breaks based on list of c_chains
68 | #'@export
69 | find_breaks <- function(c_chains, component, nclass, k, m, lag){
70 |   all_samples <- unlist(sapply(c_chains, function(x) x$samples1[(k+1):(m+1),component]))
71 |   all_samples <- c(all_samples, unlist(sapply(c_chains, function(x) x$samples2[(k):(m+1-lag),component])))
72 |   br <- hist(all_samples, plot=F, nclass = nclass)$breaks
73 |   return(br)
74 | }
75 | 
76 | #'@export
77 | create_mids <- function(breaks){
78 |   mids <- c()
79 |   for (i in 2:length(breaks)){
80 |     mids <- c(mids, breaks[i-1] + (breaks[i] - breaks[i-1])/2)
81 |   }
82 |   return(mids)
83 | }
84 | ## compute estimator of component being in [lower, upper]
85 | #'@export
86 | estimator_bin <- function(c_chains, component, lower, upper, k, m, lag){
87 |   return(estimator_bin_(c_chains, component, lower, upper, k, m, lag))
88 | }
89 | 


--------------------------------------------------------------------------------
/R/invgamma_couplings.R:
--------------------------------------------------------------------------------
 1 | #'@rdname dinversegamma
 2 | #'@title compute log-density of inverse gamma
 3 | #'@description at x > 0 and with given parameters alpha, beta, given by
 4 | #'  alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x
 5 | #'@export
 6 | dinversegamma <- function(x, alpha, beta){
 7 |   return(alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x)
 8 | }
 9 | 
10 | #'@rdname rinversegamma
11 | #'@title Sample from inverse gamma
12 | #'@description with given parameters alpha, beta, with log-density given by
13 | #' alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x
14 | #'@export
15 | rinversegamma <- function(n, alpha, beta){
16 |   return(1/rgamma(n = n, shape = alpha, rate = beta))
17 | }
18 | 
19 | #'@rdname rinversegamma_coupled
20 | #'@title Sample from maximally coupled inverse gamma
21 | #'@description with given parameters alpha1, alpha2, beta1, beta2,
22 | #'where the parametrization is that the log-density of IG(alpha, beta) is
23 | #' alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x
24 | #'@export
25 | rinversegamma_coupled <- function(alpha1, alpha2, beta1, beta2){
26 |   f <- get_max_coupling(function(n) rinversegamma(n, alpha1, beta1),
27 |                         function(x) dinversegamma(x, alpha1, beta1),
28 |                         function(n) rinversegamma(n, alpha2, beta2),
29 |                         function(x) dinversegamma(x, alpha2, beta2))
30 |   return(f())
31 | }
32 | 


--------------------------------------------------------------------------------
/R/invgaussian_couplings.R:
--------------------------------------------------------------------------------
 1 | #'@rdname dinvgaussian
 2 | #'@title Log-density of inverse Gaussian
 3 | #'@description Computes log-pdf at x > 0 of inverse Gaussian with given parameters mu, lambda, given by
 4 | #'  \deqn{0.5 * log(\lambda/(2*\pi)) - 1.5 * log(x) - \lambda * (x-\mu)^2 / (2 * \mu^2 * x)}
 5 | #'@return A vector of n log-pdf values, one for each element in the first argument 'x'.
 6 | #'@export
 7 | dinvgaussian <- function(x, mu, lambda){
 8 |   return(0.5 * log(lambda/(2*pi)) - 1.5 * log(x) - lambda * (x-mu)^2 / (2 * mu^2 * x))
 9 | }
10 | 
11 | #'@rdname rinvgaussian
12 | #'@title Sample from inverse Gaussian
13 | #'@description Parametrized by mu, lambda, with log-density given by
14 | #'  \deqn{0.5 * log(\lambda/(2*\pi)) - 1.5 * log(x) - \lambda * (x-\mu)^2 / (2 * \mu^2 * x)}
15 | #'
16 | #'  The procedure goes as follows.
17 | #'
18 | #'  \itemize{
19 | #'  \item Generate nu ~ Normal(0,1).
20 | #'  \item Define y = nu^2.
21 | #'  \item Define x = mu + mu^2 * y / (2 * lambda) - mu / (2 * lambda) * sqrt(4 * mu * lambda * y + mu^2 * y^2).
22 | #'  \item Generate Z ~ Uniform(0,1).
23 | #'  \item If z <= mu / (mu + x), output x, otherwise output mu^2 / x.
24 | #'  }
25 | #'@return A vector of n draws, where n is the first argument.
26 | #'@export
27 | rinvgaussian <- function(n, mu, lambda){
28 |   return(rinvgaussian_c(n, mu, lambda))
29 | }
30 | 
31 | #'@rdname rinvgaussian_coupled
32 | #'@title Sample from maximally coupled inverse Gaussian
33 | #'@description with parameters mu1, mu2, lambda1, lambda2; see \code{\link{rinvgaussian}}.
34 | #'@return A pair of values in a vector of size two.
35 | #'@export
36 | rinvgaussian_coupled <- function(mu1, mu2, lambda1, lambda2){
37 |   rinvgaussian_coupled_c(mu1, mu2, lambda1, lambda2)
38 | }
39 | 


--------------------------------------------------------------------------------
/R/meetingtime.R:
--------------------------------------------------------------------------------
 1 | #'@rdname sample_meetingtime
 2 | #'@title Sample coupled Markov chains until meeting
 3 | #'@description Sample two Markov chains, each following 'single_kernel' marginally,
 4 | #'until they meet, and report the meeting time, as well as the elapsed wall-clock time in seconds.
 5 | #'
 6 | #' This function does not record the trajectories of the chains, with the goal of being memory-light.
 7 | #' To record these trajectories, see \code{\link{sample_coupled_chains}}. To directly
 8 | #' compute unbiased estimators on the fly, see  \code{\link{sample_unbiasedestimator}}.
 9 | #'
10 | #'@param single_kernel A list taking a state and returning a state, performing one step of a Markov kernel
11 | #'@param coupled_kernel A list taking two states and returning two states, performing one step of a coupled Markov kernel;
12 | #'it also returns a boolean "identical" indicating whether the two states are identical.
13 | #'@param rinit A list representing the initial state of the chain, that can be given to 'single_kernel'
14 | #'@param lag A time lag, equal to one by default
15 | #'@param max_iterations A maximum number of iterations, at which to interrup the while loop; Inf by default
16 | #'@return A list with
17 | #'\itemize{
18 | #'
19 | #'\item meetingtime: the meeting time; equal to Inf if while loop was interrupted
20 | #'
21 | #'\item elapsedtime: elapsed wall-clock time, in seconds
22 | #'}
23 | #'@export
24 | sample_meetingtime <- function(single_kernel, coupled_kernel, rinit, lag = 1, max_iterations = Inf){
25 |   starttime <- Sys.time()
26 |   # initialize
27 |   state1 <- rinit(); state2 <- rinit()
28 |   # move first chain
29 |   time <- 0
30 |   for (t in 1:lag){
31 |     time <- time + 1
32 |     state1 <- single_kernel(state1)
33 |   }
34 |   # move two chains until meeting (or until max_iterations)
35 |   meetingtime <- Inf
36 |   while (is.infinite(meetingtime) && (time < max_iterations)){
37 |     time <- time + 1
38 |     # use coupled kernel
39 |     coupledstates <- coupled_kernel(state1, state2)
40 |     state1 <- coupledstates$state1
41 |     state2 <- coupledstates$state2
42 |     # check if meeting happens
43 |     if (coupledstates$identical) meetingtime <- time
44 |   }
45 |   currentime <- Sys.time()
46 |   elapsedtime <- as.numeric(lubridate::as.duration(lubridate::ymd_hms(currentime) - lubridate::ymd_hms(starttime)), "seconds")
47 |   return(list(meetingtime = meetingtime, elapsedtime = elapsedtime))
48 | }
49 | 


--------------------------------------------------------------------------------
/R/mvnorm.R:
--------------------------------------------------------------------------------
 1 | #'@rdname fast_rmvnorm
 2 | #'@title fast_rmvnorm
 3 | #'@description Generate multivariate Normal draws. The function does not check
 4 | #' the arguments, use at your own risk.
 5 | #'@param n An integer >= 1 specifying the desired number of draws
 6 | #'@param mean A vector of size d specifying the mean vector of the multivariate Normal
 7 | #'@param covariance A matrix of size d x d specifying the covariance matrix of the multivariate Normal
 8 | #'@return A matrix of size n x d containing n d-dimensional multivariate Normal draws (one per row)
 9 | #'@examples
10 | #'  fast_rmvnorm(2, rep(0, 5), diag(1, 5, 5))
11 | #'@export
12 | fast_rmvnorm <- function(n, mean, covariance){
13 |   return(fast_rmvnorm_(n, mean, covariance))
14 | }
15 | 
16 | #'@rdname fast_rmvnorm_chol
17 | #'@title fast_rmvnorm_chol
18 | #'@description Generate multivariate Normal draws. The function does not check
19 | #' the arguments, use at your own risk.
20 | #'@param n An integer >= 1 specifying the desired number of draws
21 | #'@param mean A vector of size d specifying the mean vector of the multivariate Normal
22 | #'@param chol A matrix of size d x d specifying the upper triangular Cholesky factor
23 | #'of the covariance matrix of the multivariate Normal target,
24 | #'for instance obtained using the \code{\link[base]{chol}}
25 | #'function of R.
26 | #'@return A matrix of size n x d containing n d-dimensional multivariate Normal draws (one per row)
27 | #'@examples
28 | #' Sigma <- diag(1, 5, 5)
29 | #' Sigma[1,2] <- Sigma[2,1] <- 0.3
30 | #' fast_rmvnorm_chol(2, rep(0, 5), chol(Sigma))
31 | #'@export
32 | fast_rmvnorm_chol <- function(nparticles, mean, chol){
33 |   return(fast_rmvnorm_cholesky_(nparticles, mean, chol))
34 | }
35 | 
36 | #'@rdname fast_dmvnorm
37 | #'@title fast_dmvnorm
38 | #'@description Compute multivariate Normal density (log-value) evaluated at each row of a given matrix. The function does not check
39 | #' the arguments, use at your own risk.
40 | #'@param x A matrix of size n times d
41 | #'@param mean A vector of size d specifying the mean vector of the multivariate Normal
42 | #'@param covariance A matrix of size d x d specifying the covariance matrix of the multivariate Normal
43 | #'@return A vector of n evaluations of the multivariate Normal log-pdf, one for each row of \code{x}
44 | #'@examples
45 | #'x <- fast_rmvnorm(2, rep(0, 5), diag(1,5,5))
46 | #'fast_dmvnorm(x, rep(0, 5), diag(1,5,5))
47 | #'@export
48 | fast_dmvnorm <- function(x, mean, covariance){
49 |   return(fast_dmvnorm_(x, mean, covariance))
50 | }
51 | 
52 | #'@rdname fast_dmvnorm_chol_inverse
53 | #'@title fast_dmvnorm_chol_inverse
54 | #'@description Compute multivariate Normal density (log-value) evaluated at each row of a given matrix. The function does not check
55 | #' the arguments, use at your own risk.
56 | #'@param x A matrix of size n times d
57 | #'@param mean A vector of size d specifying the mean vector of the multivariate Normal
58 | #'@param chol_inverse A matrix of size d x d specifying the inverse of the upper-triangular Cholesky
59 | #'factor of the covariance matrix of the multivariate Normal,
60 | #'for instance obtained using \code{solve(chol(Sigma))}
61 | #'@return A vector of n evaluations of the multivariate Normal log-pdf, one for each row of \code{x}
62 | #'@examples
63 | #' Sigma <- diag(1, 5, 5)
64 | #' Sigma[1,2] <- Sigma[2,1] <- 0.3
65 | #' Sigma_chol <- chol(Sigma)
66 | #' x <- fast_rmvnorm_chol(2, rep(0, 5), Sigma_chol)
67 | #' fast_dmvnorm_chol_inverse(x, rep(0, 5), solve(Sigma_chol))
68 | #'@export
69 | fast_dmvnorm_chol_inverse <- function(x, mean, chol_inverse){
70 |   return(fast_dmvnorm_cholesky_inverse_(x, mean, chol_inverse))
71 | }
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/R/rnorm_max_coupling.R:
--------------------------------------------------------------------------------
 1 | #'@rdname rnorm_max_coupling
 2 | #'@title Maximal coupling of two univariate Normal distributions
 3 | #'@description Sample from maximal coupling of two univariate Normal distributions,
 4 | #'specified through their means and standard deviations. See \code{\link{rmvnorm_max}} for a multivariate version.
 5 | #'@param mu1 First mean
 6 | #'@param mu2 Second mean
 7 | #'@param sigma1 First mean
 8 | #'@param sigma2 Second mean
 9 | #'@return Returns a list with
10 | #'
11 | #' \itemize{
12 | #' \item "xy": the pair of samples \eqn{(x,y)}
13 | #'
14 | #' \item "identical": TRUE if \eqn{x = y}, FALSE otherwise
15 | #' }
16 | #'@export
17 | rnorm_max_coupling <- function(mu1, mu2, sigma1, sigma2){
18 |   f <- get_max_coupling(function(n) rnorm(n, mu1, sigma1),
19 |                         function(x) dnorm(x, mu1, sigma1, log = TRUE),
20 |                         function(n) rnorm(n, mu2, sigma2),
21 |                         function(x) dnorm(x, mu2, sigma2, log = TRUE))
22 |   return(f())
23 | }
24 | 


--------------------------------------------------------------------------------
/R/rnorm_reflectionmax.R:
--------------------------------------------------------------------------------
 1 | # reflection-maximal coupling in one dimension
 2 | #'@rdname rnorm_reflectionmax
 3 | #'@title Reflection-maximal coupling of two univariate Normal distributions
 4 | #'@description Sample from reflection-maximal coupling of two univariate Normal distributions,
 5 | #'specified through their means, with common standard deviation.
 6 | #'See \code{\link{rmvnorm_reflectionmax}} for a multivariate version.
 7 | #'@param mu1 First mean
 8 | #'@param mu2 Second mean
 9 | #'@param sigma Common standard deviation
10 | #'@return Returns a list with
11 | #' \itemize{
12 | #'
13 | #' \item "xy": the pair of samples \eqn{(x,y)}
14 | #'
15 | #' \item "identical": TRUE if \eqn{x = y}, FALSE otherwise
16 | #' }
17 | #'@export
18 | rnorm_reflectionmax <- function(mu1, mu2, sigma){
19 |   # number of samples
20 |   reflmax_xy <- c(0,0)
21 |   # draw std normal first
22 |   xdot <- rnorm(1)
23 |   # this follows the notation of Bou Rabee et al, 2018, roughly
24 |   z <- (mu1 - mu2) / sigma
25 |   normz <- sqrt(sum(z^2))
26 |   e <- z / normz
27 |   utilde <- runif(1, 0, 1)
28 |   accept <- (log(utilde) < (dnorm(xdot + z, 0, 1, log = TRUE) - dnorm(xdot, log = TRUE)))
29 |   ident_ <- FALSE
30 |   if (accept){
31 |     ydot <- xdot + z
32 |     ident_ <- TRUE
33 |   } else {
34 |     ydot <- xdot - 2 * (e * xdot) * e
35 |   }
36 |   reflmax_xy[1] <- mu1 + sigma * xdot
37 |   reflmax_xy[2] <- mu2 + sigma * ydot
38 |   return(list(xy = reflmax_xy, identical = ident_))
39 | }
40 | 


--------------------------------------------------------------------------------
/R/unbiasedmcmc-package.R:
--------------------------------------------------------------------------------
 1 | #'@name unbiasedmcmc-package
 2 | #'@aliases unbiasedmcmc
 3 | #'@docType package
 4 | #'@title unbiasedmcmc
 5 | #'@author Pierre E. Jacob <pierre.jacob.work@@gmail.com>, John O'Leary, Yves F. Atchade
 6 | #'@description Unbiased MCMC estimators with couplings
 7 | #'@details This package contains scripts to reproduce the figures of the
 8 | #' paper "Unbiased Markov chain Monte Carlo with couplings" by
 9 | #' Pierre E. Jacob, John O'Leary, Yves F Atchade, available on arXiv at
10 | #' https://arxiv.org/abs/1708.03625
11 | #'@keywords package
12 | #'@useDynLib unbiasedmcmc
13 | #'@importFrom Rcpp sourceCpp
14 | NULL
15 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # This file consists of short utility functions for working with
 3 | # propensity scores, as well as the ggplot2-related  setmytheme.
 4 | 
 5 | # from util_expit ---------------------------------------------------------
 6 | #'@rdname expit
 7 | #'@title expit
 8 | #'@description expit function
 9 | #'@export
10 | expit <- function(z) 1 / (1 + exp(-z))
11 | 
12 | 
13 | # from propensity ---------------------------------------------------------
14 | 
15 | #'@export
16 | beta2e <- function(beta, C){
17 |   return(beta2e_(beta, C))
18 | }
19 | 
20 | #'@export
21 | cut_in_fifth <- function(x){
22 |   return(cut_in_fifth_(x))
23 | }
24 | 
25 | 
26 | # from util_setmytheme ----------------------------------------------------
27 | 
28 | #'@rdname setmytheme
29 | #'@title Customize graphical settings
30 | #'@description This function customizes the theme used by ggplot2. Loads the packages ggplot2 and
31 | #'ggthemes.
32 | #'@export
33 | setmytheme <- function(){
34 |   library(ggplot2)
35 |   library(ggthemes)
36 |   theme_set(theme_bw())
37 |   theme_update(axis.text.x = element_text(size = 20),
38 |                axis.text.y = element_text(size = 20),
39 |                axis.title.x = element_text(size = 25, margin=margin(20,0,0,0)),
40 |                axis.title.y = element_text(size = 25, angle = 90, margin = margin(0,20,0,0)),
41 |                legend.text = element_text(size = 20),
42 |                legend.title = element_text(size = 20),
43 |                title = element_text(size = 30),
44 |                strip.text = element_text(size = 25),
45 |                strip.background = element_rect(fill="white"),
46 |                legend.position = "bottom")
47 | }
48 | 


--------------------------------------------------------------------------------
/README_files/figure-gfm/estimators-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/unbiasedmcmc/a1eea04eef08710463ff9ff2228f5e58565fe78b/README_files/figure-gfm/estimators-1.png


--------------------------------------------------------------------------------
/README_files/figure-gfm/usage-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/unbiasedmcmc/a1eea04eef08710463ff9ff2228f5e58565fe78b/README_files/figure-gfm/usage-1.png


--------------------------------------------------------------------------------
/README_files/figure-gfm/usage-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/unbiasedmcmc/a1eea04eef08710463ff9ff2228f5e58565fe78b/README_files/figure-gfm/usage-2.png


--------------------------------------------------------------------------------
/data/diabetes.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/unbiasedmcmc/a1eea04eef08710463ff9ff2228f5e58565fe78b/data/diabetes.RData


--------------------------------------------------------------------------------
/data/germancredit.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pierrejacob/unbiasedmcmc/a1eea04eef08710463ff9ff2228f5e58565fe78b/data/germancredit.RData


--------------------------------------------------------------------------------
/inst/README.R:
--------------------------------------------------------------------------------
 1 | # The files of the unbiasedmcmc/inst/reproduce*
 2 | # contain the scripts to reproduce the figures, model by model.
 3 | 
 4 | # Each folder has a "run.all.R" script that can be run to execute each script in the correct order
 5 | # and to produce tables/figures.
 6 | 
 7 | # The files finishing in .plots.R create the plots and tables.
 8 | 
 9 | # The Ising model and variable selection examples contain further details on how
10 | # to execute batch scripts, in parallel, e.g. on a cluster or via GNU parallel.
11 | 
12 | # There might be hardcoded paths to be modified before executing the scripts,
13 | # although efforts have been made to remove them.
14 | 


--------------------------------------------------------------------------------
/inst/check/check_discretecouplings.R:
--------------------------------------------------------------------------------
 1 | # This script plays with coupling of distributions defined on discrete spaces
 2 | # load packages
 3 | library(unbiasedmcmc)
 4 | rm(list = ls())
 5 | set.seed(21)
 6 | library(doParallel)
 7 | library(doRNG)
 8 | registerDoParallel(cores = detectCores()-2)
 9 | #
10 | p <- 40
11 | s <- 20
12 | selection <- rep(0, p)
13 | selection[sample(1:p, s, replace = F)] <- (runif(s) < 0.5)
14 | selection
15 | unbiasedmcmc:::sample_pair01(selection)
16 | # this is meant to sample one zero and one one uniformly
17 | nrep <- 10000
18 | 
19 | test <- foreach(irep = 1:nrep, .combine = rbind) %dorng% {
20 |   unbiasedmcmc:::sample_pair01(selection)
21 | }
22 | #
23 | table(test[,1])  / nrep
24 | which(selection == 0)
25 | 1/sum(selection==0)
26 | #
27 | 
28 | selection1 <- rep(0, p)
29 | selection1[sample(1:p, s, replace = F)] <- (runif(s) < 0.5)
30 | selection2 <- rep(0, p)
31 | selection2[sample(1:p, s, replace = F)] <- (runif(s) < 0.5)
32 | 
33 | 
34 | test <- foreach(irep = 1:10000, .combine = rbind) %dorng% {
35 |   coupled_pairs01(selection1, selection2)
36 | }
37 | 
38 | #
39 | table(test[,1]) / nrow(test)
40 | 1/(p - sum(selection1))
41 | table(test[,2]) / nrow(test)
42 | 1/(p - sum(selection2))
43 | table(test[,3]) / nrow(test)
44 | 1/sum(selection1)
45 | table(test[,4]) / nrow(test)
46 | 1/sum(selection2)
47 | #
48 | 
49 | 


--------------------------------------------------------------------------------
/inst/check/check_gamma.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | library(doParallel)
 3 | library(doRNG)
 4 | # register parallel cores
 5 | registerDoParallel(cores = detectCores()-2)
 6 | setmytheme()
 7 | rm(list = ls())
 8 | set.seed(21)
 9 | 
10 | # check Gamma sampler
11 | alpha <- 3.1
12 | beta <- 2
13 | x <- rgamma(100000, alpha, beta)
14 | summary(x)
15 | quantile(x, probs = c(0.99))
16 | hist(x[x < 5], nclass = 200, prob = TRUE)
17 | curve(dgamma(x, alpha, rate = beta), col = "red", add = TRUE)
18 | 
19 | ### maximal coupling of Gamma
20 | alpha1 <- 3.1
21 | beta1 <- 2.03
22 | alpha2 <- 7.4
23 | beta2 <- 1.7
24 | # sample
25 | rgamma_coupled(alpha1, alpha2, beta1, beta2)
26 | 
27 | xy <- foreach(i = 1:10000) %dorng% {
28 |   rgamma_coupled(alpha1, alpha2, beta1, beta2)
29 | }
30 | 
31 | x1 <- sapply(xy, function(x) x$xy[1])
32 | x2 <- sapply(xy, function(x) x$xy[2])
33 | quantile(x1, probs = c(0.99))
34 | hist(x1[x1 < 5], prob = TRUE, nclass = 100)
35 | curve(dgamma(x, alpha1, rate = beta1), col = "red", add = TRUE)
36 | 
37 | quantile(x2, probs = c(0.99))
38 | hist(x2, prob = TRUE, nclass = 100)
39 | curve(dgamma(x, alpha2, rate = beta2), col = "red", add = TRUE)
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/inst/check/check_histograms.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | library(doParallel)
 3 | library(doRNG)
 4 | # register parallel cores
 5 | registerDoParallel(cores = detectCores()-2)
 6 | setmytheme()
 7 | rm(list = ls())
 8 | set.seed(21)
 9 | 
10 | target <- function(x){
11 |   evals <- log(0.5) + dnorm(x, mean = c(-2, 2), sd = 0.2, log = TRUE)
12 |   return(max(evals) + log(sum(exp(evals - max(evals)))))
13 | }
14 | 
15 | kernels <- get_mh_kernels(target, 2^2)
16 | single_kernel <- kernels$single_kernel
17 | coupled_kernel <- kernels$coupled_kernel
18 | rinit <- function(){
19 |   x <- rnorm(1)
20 |   list(chain_state = x, current_pdf = target(x))
21 | }
22 | c_chains <- sample_coupled_chains(single_kernel, coupled_kernel, rinit)
23 | 
24 | 
25 | nsamples <- 1000
26 | meetingtime <- foreach(irep = 1:nsamples, .combine = c) %dorng% {
27 |   sample_meetingtime(single_kernel, coupled_kernel, rinit)$meetingtime
28 | }
29 | summary(meetingtime)
30 | hist(meetingtime)
31 | ##
32 | m <- 100
33 | c_chains_ <-  foreach(irep = 1:nsamples) %dorng% {
34 |   sample_coupled_chains(single_kernel, coupled_kernel, rinit, m = m, lag = 50)
35 | }
36 | k <- 20
37 | index_ <- which(sapply(c_chains_, function(x) x$meetingtime) > 30)[1]
38 | c_chains_[[index_]]
39 | H_bar(c_chains_[[index_]], h = function(x) x, k = k, m = m)
40 | #
41 | summary(sapply(c_chains_, function(x) x$meetingtime))
42 | # histogram
43 | component <- 1
44 | hist1 <- histogram_c_chains(c_chains_, component, k, m)
45 | barplot(height = hist1$proportions)
46 | plot_histogram(hist1)
47 | library(ggplot2)
48 | df_ <- data.frame(x = hist1$mids, y = hist1$proportions / hist1$width)
49 | g <- ggplot(df_, aes(x = x, xend = x, y = 0, yend = y))
50 | g <- g + geom_segment(aes(x = x, xend = x, y = 0, yend = y)) + ylab("density")
51 | g
52 | 


--------------------------------------------------------------------------------
/inst/check/check_inversegamma.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | library(doParallel)
 3 | library(doRNG)
 4 | # register parallel cores
 5 | registerDoParallel(cores = detectCores()-2)
 6 | setmytheme()
 7 | rm(list = ls())
 8 | set.seed(21)
 9 | 
10 | # check inverse Gamma sampler
11 | alpha <- 3.1
12 | beta <- 2
13 | x <- rinversegamma(100000, alpha, beta)
14 | summary(x)
15 | quantile(x, probs = c(0.99))
16 | hist(x[x < 5], nclass = 200, prob = TRUE)
17 | curve(exp(dinversegamma(x, alpha, beta)), col = "red", add = TRUE)
18 | 
19 | ### maximal coupling of inverse Gamma
20 | alpha1 <- 3.1
21 | beta1 <- 2.03
22 | alpha2 <- 7.4
23 | beta2 <- 1.7
24 | # sample
25 | rinversegamma_coupled(alpha1, alpha2, beta1, beta2)
26 | xy <- foreach(i = 1:10000) %dorng% {
27 |   rinversegamma_coupled(alpha1, alpha2, beta1, beta2)
28 | }
29 | 
30 | x1 <- sapply(xy, function(x) x$xy[1])
31 | x2 <- sapply(xy, function(x) x$xy[2])
32 | quantile(x1, probs = c(0.99))
33 | 
34 | hist(x1[x1 < 5], prob = TRUE, nclass = 100)
35 | curve(exp(dinversegamma(x, alpha1, beta1)), col = "red", add = TRUE)
36 | 
37 | hist(x2, prob = TRUE, nclass = 100)
38 | curve(exp(dinversegamma(x, alpha2, beta2)), col = "red", add = TRUE)
39 | 


--------------------------------------------------------------------------------
/inst/check/check_inversegaussian.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | library(doParallel)
 3 | library(doRNG)
 4 | # register parallel cores
 5 | registerDoParallel(cores = detectCores()-2)
 6 | setmytheme()
 7 | rm(list = ls())
 8 | set.seed(21)
 9 | 
10 | # check inverse Gamma sampler
11 | mu <- 3.1
12 | lambda <- 2
13 | x <- rinvgaussian(100000, mu, lambda)
14 | summary(x)
15 | quantile(x, probs = c(0.99))
16 | hist(x, nclass = 200, prob = TRUE)
17 | curve(exp(dinvgaussian(x, mu, lambda)), col = "red", add = TRUE)
18 | 
19 | ### maximal coupling of inverse Gamma
20 | mu1 <- 3.1
21 | lambda1 <- 2
22 | mu2 <- 1.7
23 | lambda2 <- 1.3
24 | # sample
25 | rinvgaussian_coupled(mu1, mu2, lambda1, lambda2)
26 | 
27 | xy <- foreach(i = 1:10000) %dorng% {
28 |   rinvgaussian_coupled(mu1, mu2, lambda1, lambda2)
29 | }
30 | 
31 | x1 <- sapply(xy, function(x) x[1])
32 | x2 <- sapply(xy, function(x) x[2])
33 | quantile(x1, probs = c(0.99))
34 | hist(x1, prob = TRUE, nclass = 100)
35 | curve(exp(dinvgaussian(x, mu1, lambda1)), col = "red", add = TRUE)
36 | quantile(x2, probs = c(0.99))
37 | 
38 | hist(x2, prob = TRUE, nclass = 100)
39 | curve(exp(dinvgaussian(x, mu2, lambda2)), col = "red", add = TRUE)
40 | 


--------------------------------------------------------------------------------
/inst/check/check_lags.R:
--------------------------------------------------------------------------------
  1 | library(unbiasedmcmc)
  2 | library(doParallel)
  3 | library(doRNG)
  4 | # register parallel cores
  5 | registerDoParallel(cores = detectCores()-2)
  6 | # remove all
  7 | rm(list = ls())
  8 | # set RNG seed
  9 | set.seed(11)
 10 | 
 11 | logtarget <- function(x) dnorm(x, mean = 0, sd = 1, log = TRUE)
 12 | rinit <- function(){
 13 |   chain_state <- rnorm(1, 1, 1)
 14 |   current_pdf <- logtarget(chain_state)
 15 |   return(list(chain_state = chain_state, current_pdf = current_pdf))
 16 | }
 17 | sd_proposal <- 1
 18 | MH_kernel <- function(state){
 19 |   chain_state <- state$chain_state
 20 |   current_pdf <- state$current_pdf
 21 |   proposal <- rnorm(1, chain_state, sd_proposal)
 22 |   proposal_pdf <- logtarget(proposal)
 23 |   if (log(runif(1)) < (proposal_pdf - current_pdf)){
 24 |     return(list(chain_state = proposal, current_pdf = proposal_pdf))
 25 |   } else {
 26 |     return(list(chain_state = chain_state, current_pdf = current_pdf))
 27 |   }
 28 | }
 29 | 
 30 | niterations <- 10000
 31 | chain <- rep(0, niterations)
 32 | state <- rinit()
 33 | for (i in 1:niterations){
 34 |   state <- MH_kernel(state)
 35 |   chain[i] <- state$chain_state
 36 | }
 37 | hist(chain, prob = TRUE, nclass = 40, main = "")
 38 | curve(exp(logtarget(x)), add = TRUE, col = "red")
 39 | 
 40 | coupledMH_kernel <- function(state1, state2){
 41 |   chain_state1 <- state1$chain_state;  current_pdf1 <- state1$current_pdf
 42 |   chain_state2 <- state2$chain_state;  current_pdf2 <- state2$current_pdf
 43 |   # proposal from a maximal coupling
 44 |   proposal <- rnorm_max_coupling(chain_state1, chain_state2, sd_proposal, sd_proposal)
 45 |   proposal_pdf1 <- logtarget(proposal$xy[1])
 46 |   # only compute target pdf on 2nd proposal if it is not identical to 1st proposal
 47 |   proposal_pdf2 <- proposal_pdf1
 48 |   if (!proposal$identical){
 49 |     proposal_pdf2 <- logtarget(proposal$xy[2])
 50 |   }
 51 |   logu <- log(runif(1))
 52 |   accept1 <- FALSE; accept2 <- FALSE
 53 |   if (is.finite(proposal_pdf1)){
 54 |     if (logu < (proposal_pdf1 - current_pdf1)){
 55 |       accept1 <- TRUE
 56 |       chain_state1 <- proposal$xy[1]; current_pdf1 <- proposal_pdf1
 57 |     }
 58 |   }
 59 |   if (is.finite(proposal_pdf2)){
 60 |     if(logu < (proposal_pdf2 - current_pdf2)){
 61 |       accept2 <- TRUE
 62 |       chain_state2 <- proposal$xy[2]; current_pdf2 <- proposal_pdf2
 63 |     }
 64 |   }
 65 |   identical_ <- (proposal$identical) && (accept1) && accept2
 66 |   return(list(state1 = list(chain_state = chain_state1, current_pdf = current_pdf1),
 67 |               state2 = list(chain_state = chain_state2, current_pdf = current_pdf2),
 68 |               identical = identical_))
 69 | }
 70 | 
 71 | nsamples <- 10000
 72 | lag <- 10
 73 | k <- 10
 74 | m <- 50
 75 | c_chains_ <-  foreach(irep = 1:nsamples) %dorng% {
 76 |   sample_coupled_chains(MH_kernel, coupledMH_kernel, rinit, lag = lag, m = m)
 77 | }
 78 | names(c_chains_[[1]])
 79 | meetingtime <- sapply(c_chains_, function(x) x$meetingtime)
 80 | hist(meetingtime, breaks = 1:max(meetingtime), prob = TRUE, main = "", xlab = "meeting times")
 81 | mean_cost <- mean(sapply(c_chains_, function(x) x$cost))
 82 | uestimators <-  sapply(c_chains_, function(x) H_bar(x, h = function(x) x^2, k = k, m = m))
 83 | 
 84 | uestimators2 <-  foreach(irep = 1:nsamples) %dorng% {
 85 |   sample_unbiasedestimator(MH_kernel, coupledMH_kernel, rinit, h = function(x) x^2, k = k, m = m, lag = lag)
 86 | }
 87 | 
 88 | mean(uestimators)
 89 | mean(sapply(uestimators2, function(x) x$uestimator))
 90 | var(uestimators)
 91 | var(sapply(uestimators2, function(x) x$uestimator))
 92 | 
 93 | hist(uestimators)
 94 | hist(sapply(uestimators2, function(x) x$uestimator))
 95 | 
 96 | mean_cost
 97 | mean(sapply(uestimators2, function(x) x$cost))
 98 | 
 99 | # ## cost: lag calls to kernel, tau - lag calls to coupled kernel, then max(0, m - tau) calls to kernel again (if meeting < m)
100 | # cost_of_coupled_chain <- function(c_chains){
101 | #   lag <- dim(c_chains$samples1)[1] - dim(c_chains$samples2)[1]
102 | #   cost <- lag + 2*(c_chains$meetingtime - lag) + max(0, c_chains$iteration - c_chains$meetingtime)
103 | #   return(cost)
104 | # }
105 | #
106 | # costs_ <- sapply(c_chains_, function(x) cost_of_coupled_chain(x))
107 | 
108 | inefficiency <- mean_cost * var(uestimators)
109 | cat("inefficiency:", inefficiency, ", cost:", mean_cost)
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/inst/check/check_mvnorm.R:
--------------------------------------------------------------------------------
 1 | # This script tests the functions implementing different
 2 | # random number generators and probability density functions for
 3 | # multivariate Normal distributions.
 4 | 
 5 | # load packages
 6 | library(unbiasedmcmc)
 7 | library(doParallel)
 8 | library(doRNG)
 9 | # register parallel cores
10 | registerDoParallel(cores = detectCores()-2)
11 | setmytheme()
12 | rm(list = ls())
13 | set.seed(21)
14 | 
15 | ### sampling trivariate Gaussians
16 | p <- 3
17 | mu1 <- rep(0, p)
18 | mu2 <- rep(1, p)
19 | Sigma1 <- diag(0.4, p, p)
20 | Sigma1[1,2] <- Sigma1[2,1] <- 0.2
21 | Sigma2 <- diag(1.4, p, p)
22 | Sigma2[1,2] <- Sigma2[2,1] <- -0.5
23 | 
24 | # Compute Cholesky factors and inverses
25 | Sigma1_chol <- chol(Sigma1)
26 | Sigma2_chol <- chol(Sigma2)
27 | Sigma1_chol_inv <- solve(Sigma1_chol)
28 | Sigma2_chol_inv <- solve(Sigma2_chol)
29 | ## Note on Cholesky factorization:
30 | ## we have
31 | # t(Sigma1_chol) %*% Sigma1_chol
32 | # and Sigma1_chol is upper triangular
33 | 
34 | # sample
35 | x <- fast_rmvnorm(2, mu2, Sigma2)
36 | 
37 | # equivalent calculations of pdf evaluations
38 | fast_dmvnorm(x, mu1, Sigma1)
39 | mvtnorm::dmvnorm(x, mu1, Sigma1, log = T)
40 | fast_dmvnorm_chol_inverse(x, mu1, Sigma1_chol_inv)
41 | 
42 | # and again
43 | x <- fast_rmvnorm(10, mu1, Sigma1)
44 | fast_dmvnorm(x, mu2, Sigma2)
45 | mvtnorm::dmvnorm(x, mu2, Sigma2, log = T)
46 | fast_dmvnorm_chol_inverse(x, mu2, Sigma2_chol_inv)
47 | 
48 | # benchmark
49 | microbenchmark::microbenchmark(
50 |   fast_dmvnorm(x, mu1, Sigma1),
51 |   mvtnorm::dmvnorm(x, mu1, Sigma1, log = T),
52 |   fast_dmvnorm_chol_inverse(x, mu1, Sigma1_chol_inv))
53 | 
54 | # now check random number generator
55 | n <- 1e4
56 | x2_1 <- fast_rmvnorm(n, mu2, Sigma2)
57 | x2_2 <- mvtnorm::rmvnorm(n, mu2, Sigma2)
58 | 
59 | # compare means
60 | cat(colMeans(x2_1), "\n", colMeans(x2_2), "\n", mu2, "\n")
61 | 
62 | # compare covariances
63 | cat(cov(x2_1)[1:2,1:2], "\n", cov(x2_2)[1:2,1:2], "\n", Sigma2[1:2,1:2], "\n")
64 | 
65 | 
66 | # and using cholesky
67 | x1_1 <- fast_rmvnorm(n, mu1, Sigma1)
68 | x1_3 <- fast_rmvnorm_chol(n, mu1, Sigma1_chol)
69 | 
70 | # compare means
71 | cat(colMeans(x1_1), "\n", colMeans(x1_3), "\n", mu1, "\n")
72 | 
73 | # compare covariances
74 | cat(cov(x1_1)[1:3,1:3], "\n", cov(x1_3)[1:3,1:3], "\n", Sigma1[1:3,1:3], "\n")
75 | 


--------------------------------------------------------------------------------
/inst/check/check_reflmaxcoupling.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | library(doParallel)
 3 | library(doRNG)
 4 | # register parallel cores
 5 | registerDoParallel(cores = detectCores()-2)
 6 | setmytheme()
 7 | rm(list = ls())
 8 | set.seed(21)
 9 | 
10 | ### maximal coupling of bivariate Gaussians with identical covariance matrices
11 | mu1 <- c(0.2, 0.3)
12 | mu2 <- c(0.0, 0.8)
13 | Sigma <- diag(1, 2, 2)
14 | Sigma[1,2] <- Sigma[2,1] <- 0.8
15 | Sigma[2,2] <- 2
16 | Sigma_chol <- chol(Sigma)
17 | Sigma_inv_chol <- solve(Sigma_chol)
18 | 
19 | # sample once
20 | rmvnorm_reflectionmax(mu1, mu2, Sigma_chol, Sigma_inv_chol)
21 | # function output samples (column-bound) in $xy, and a boolean indicator of identity in $identical
22 | 
23 | nsamples <- 5e4
24 | xy <- foreach(i = 1:nsamples) %dorng% {
25 |   rmvnorm_reflectionmax(mu1, mu2, Sigma_chol, Sigma_inv_chol)
26 | }
27 | 
28 | identicals <- sapply(xy, function(x) x$identical)
29 | equalvalues <- sapply(xy, function(x) all(x$xy[,1] == x$xy[,2]))
30 | all(identicals == equalvalues)
31 | 
32 | # collect samples from both distributions
33 | sample1 <- t(sapply(xy, function(x) x$xy[,1]))
34 | sample2 <- t(sapply(xy, function(x) x$xy[,2]))
35 | 
36 | # and check that they follow the correct distribution
37 | colMeans(sample1)
38 | mu1
39 | colMeans(sample2)
40 | mu2
41 | 
42 | cov(sample1)
43 | cov(sample2)
44 | Sigma
45 | 
46 | # estimate of 1-TVD
47 | mean(sapply(xy, function(x) x$identical))
48 | 
49 | # visualize marginals
50 | hist(sample1[,1], prob = TRUE, nclass = 100)
51 | curve(dnorm(x, mu1[1], sqrt(Sigma[1,1])), add = TRUE, col = "red")
52 | 
53 | hist(sample1[,2], prob = TRUE, nclass = 100)
54 | curve(dnorm(x, mu1[2], sqrt(Sigma[2,2])), add = TRUE, col = "red")
55 | 
56 | hist(sample2[,1], prob = TRUE, nclass = 100)
57 | curve(dnorm(x, mu2[1], sqrt(Sigma[1,1])), add = TRUE, col = "red")
58 | 
59 | hist(sample2[,2], prob = TRUE, nclass = 100)
60 | curve(dnorm(x, mu2[2], sqrt(Sigma[2,2])), add = TRUE, col = "red")
61 | 
62 | 
63 | mu1 <- 1
64 | mu2 <- 2
65 | Sigma_proposal <- diag(2, 1, 1)
66 | Sigma_chol <- chol(Sigma_proposal)
67 | Sigma_chol_inv <- solve(Sigma_chol)
68 | nsamples <- 5e4
69 | xy <- foreach(i = 1:nsamples) %dorng% {
70 |   rnorm_reflectionmax(mu1, mu2, Sigma_chol[1,1])
71 | }
72 | hist(sapply(xy, function(x) x$xy[1]), prob = TRUE, nclass = 100)
73 | curve(dnorm(x, mu1, Sigma_chol[1]), add = T)
74 | 
75 | hist(sapply(xy, function(x) x$xy[2]), prob = TRUE, nclass = 100)
76 | curve(dnorm(x, mu2, Sigma_chol[1]), add = T)
77 | 
78 | rmvnorm_reflectionmax(mu1, mu2, Sigma_chol, Sigma_chol_inv)
79 | 
80 | mean(sapply(xy, function(xy) xy$identical))
81 | 
82 | xymax <- foreach(i = 1:nsamples) %dorng% {
83 |   rnorm_max_coupling(mu1, mu2, Sigma_chol[1,1], Sigma_chol[1,1])
84 | }
85 | mean(sapply(xymax, function(xy) xy$identical))
86 | 
87 | 


--------------------------------------------------------------------------------
/inst/check/check_rwmh_doublewell.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | rm(list = ls())
 3 | # double well potential log-pdf
 4 | target <- function(x) -0.1 * ( ((x[1]-1)^2-x[2]^2 )^2 + 10*(x[1]^2-5)^2+ (x[1]+x[2])^4 + (x[1]-x[2])^4)
 5 | # compute marginals pdfs and nornalizing constant
 6 | targetmarginal1 <- function(x) integrate(function(a) sapply(a, function(z) exp(target(c(x, z)))), lower = -6, upper = 6, subdivisions = 1e4)$value
 7 | targetmarginal2 <- function(x) integrate(function(a) sapply(a, function(z) exp(target(c(z, x)))), lower = -6, upper = 6, subdivisions = 1e4)$value
 8 | Z <- integrate(function(x) sapply(x, function(z) targetmarginal1(z)), lower = -6, upper = 6, subdivisions = 1e4)$value
 9 | 
10 | # xgrid <- seq(from = -4, to = 4, length.out = 1e2)
11 | # ygrid <- seq(from = -4, to = 4, length.out = 1e2)
12 | # df_ <- expand.grid(xgrid, ygrid)
13 | # df_$z <- apply(df_, 1, target)
14 | # library(ggplot2)
15 | # ggplot(df_, aes(x = Var1, y = Var2, z = z)) + geom_contour(binwidth = 5)
16 | 
17 | # define random walk Metropolis-Hastings kernels
18 | kernels <- get_mh_kernels(target, Sigma_proposal = diag(1, 2, 2))
19 | # define initial distribution
20 | rinit <- function(){
21 |   x <- rnorm(n = 2, mean = 2, sd = 25)
22 |   return(list(chain_state = x, current_pdf = target(x)))
23 | }
24 | ## register parallel cores
25 | library(doParallel)
26 | library(doRNG)
27 | registerDoParallel(cores = detectCores()-2)
28 | nrepeats <- 5e2
29 | # sample meeting times "tau"
30 | meetings_ <- foreach(rep = 1:nrepeats) %dorng% {
31 |   sample_meetingtime(kernels$single_kernel, kernels$coupled_kernel, rinit)
32 | }
33 | hist(sapply(meetings_, function(x) x$meetingtime), nclass = 50, xlab = "meeting time", main = "")
34 | 
35 | ## now run coupled chains for max(m, tau) steps
36 | coupledchains_ <- foreach(rep = 1:nrepeats) %dorng% {
37 |   sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = 10000, lag = 1000)
38 | }
39 | names(coupledchains_[[1]])
40 | hist(sapply(coupledchains_, function(x) x$meetingtime))
41 | 
42 | # let's see what happened to the chains that took longest to meet
43 | index_max <- which.max(sapply(coupledchains_, function(x) x$meetingtime))
44 | plot(coupledchains_[[index_max]]$samples1[,1], coupledchains_[[index_max]]$samples1[,2], type = "l", xlim = c(-5, 5), ylim = c(-5, 5))
45 | lines(coupledchains_[[index_max]]$samples2[,1], coupledchains_[[index_max]]$samples2[,2], col = "red")
46 | matplot(coupledchains_[[index_max]]$samples1, type = "l", col = "black")
47 | matplot(coupledchains_[[index_max]]$samples2, type = "l", col = "red", add = T)
48 | 
49 | library(ggplot2)
50 | hist1 <- histogram_c_chains(coupledchains_, component = 1, k = 2000, m = 5000, dopar = FALSE)
51 | plot_histogram(hist1) + stat_function(fun = function(x) sapply(x, function(v) targetmarginal1(v)/Z)) + xlim(-4,4)
52 | 
53 | hist2 <- histogram_c_chains(coupledchains_, component = 2, k = 2000, m = 10000, dopar = TRUE)
54 | plot_histogram(hist2) + stat_function(fun = function(x) sapply(x, function(v) targetmarginal2(v)/Z)) + xlim(-4,4)
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/inst/check/check_rwmh_minimalcode.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | rm(list = ls())
 3 | # double well potential log-pdf
 4 | target <- function(x) -0.1 * ( ((x[1]-1)^2-x[2]^2 )^2 + 10*(x[1]^2-5)^2+ (x[1]+x[2])^4 + (x[1]-x[2])^4)
 5 | # compute marginals pdfs and nornalizing constant
 6 | targetmarginal1 <- function(x) integrate(function(a) sapply(a, function(z) exp(target(c(x, z)))), lower = -6, upper = 6, subdivisions = 1e4)$value
 7 | targetmarginal2 <- function(x) integrate(function(a) sapply(a, function(z) exp(target(c(z, x)))), lower = -6, upper = 6, subdivisions = 1e4)$value
 8 | Z <- integrate(function(x) sapply(x, function(z) targetmarginal1(z)), lower = -6, upper = 6, subdivisions = 1e4)$value
 9 | 
10 | # xgrid <- seq(from = -4, to = 4, length.out = 1e2)
11 | # ygrid <- seq(from = -4, to = 4, length.out = 1e2)
12 | # df_ <- expand.grid(xgrid, ygrid)
13 | # df_$z <- apply(df_, 1, target)
14 | # library(ggplot2)
15 | # ggplot(df_, aes(x = Var1, y = Var2, z = z)) + geom_contour(binwidth = 5)
16 | 
17 | # define random walk Metropolis-Hastings kernels
18 | kernels <- get_mh_kernels(target, Sigma_proposal = diag(1, 2, 2))
19 | # define initial distribution
20 | rinit <- function(){
21 |   x <- rnorm(n = 2, mean = 2, sd = 25)
22 |   return(list(chain_state = x, current_pdf = target(x)))
23 | }
24 | ## register parallel cores
25 | library(doParallel)
26 | library(doRNG)
27 | registerDoParallel(cores = detectCores()-2)
28 | nrepeats <- 5e2
29 | # sample meeting times "tau"
30 | meetings_ <- foreach(rep = 1:nrepeats) %dorng% {
31 |   sample_meetingtime(kernels$single_kernel, kernels$coupled_kernel, rinit)
32 | }
33 | hist(sapply(meetings_, function(x) x$meetingtime), nclass = 50, xlab = "meeting time", main = "")
34 | 
35 | ## now run coupled chains for max(m, tau) steps
36 | coupledchains_ <- foreach(rep = 1:nrepeats) %dorng% {
37 |   sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = 10000, lag = 1000)
38 | }
39 | names(coupledchains_[[1]])
40 | hist(sapply(coupledchains_, function(x) x$meetingtime))
41 | 
42 | # let's see what happened to the chains that took longest to meet
43 | index_max <- which.max(sapply(coupledchains_, function(x) x$meetingtime))
44 | plot(coupledchains_[[index_max]]$samples1[,1], coupledchains_[[index_max]]$samples1[,2], type = "l", xlim = c(-5, 5), ylim = c(-5, 5))
45 | lines(coupledchains_[[index_max]]$samples2[,1], coupledchains_[[index_max]]$samples2[,2], col = "red")
46 | matplot(coupledchains_[[index_max]]$samples1, type = "l", col = "black")
47 | matplot(coupledchains_[[index_max]]$samples2, type = "l", col = "red", add = T)
48 | 
49 | library(ggplot2)
50 | hist1 <- histogram_c_chains(coupledchains_, component = 1, k = 2000, m = 5000, dopar = FALSE)
51 | plot_histogram(hist1) + stat_function(fun = function(x) sapply(x, function(v) targetmarginal1(v)/Z)) + xlim(-4,4)
52 | 
53 | hist2 <- histogram_c_chains(coupledchains_, component = 2, k = 2000, m = 10000, dopar = TRUE)
54 | plot_histogram(hist2) + stat_function(fun = function(x) sapply(x, function(v) targetmarginal2(v)/Z)) + xlim(-4,4)
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/inst/check/check_rwmh_univariate.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | library(doParallel)
 3 | library(doRNG)
 4 | # register parallel cores
 5 | registerDoParallel(cores = detectCores()-2)
 6 | setmytheme()
 7 | rm(list = ls())
 8 | set.seed(21)
 9 | 
10 | ## test MH kernels on a univariate target
11 | ## define target distribution
12 | target <- function(x){
13 |   evals <- log(0.5) + dnorm(x, mean = c(-2, 2), sd = 0.2, log = TRUE)
14 |   return(max(evals) + log(sum(exp(evals - max(evals)))))
15 | }
16 | 
17 | Sigma_proposal <- diag(2, 1, 1)
18 | Sigma_proposal_chol <- chol(Sigma_proposal)
19 | Sigma_proposal_chol_inv <- solve(chol(Sigma_proposal))
20 | 
21 | kernels <- get_mh_kernels(target, Sigma_proposal)
22 | single_kernel <- kernels$single_kernel
23 | coupled_kernel <- kernels$coupled_kernel
24 | rinit <- function(){
25 |   x <- rnorm(1, 5, 5)
26 |   return(list(chain_state = x, current_pdf = target(x)))
27 | }
28 | 
29 | ### Test of the single kernel
30 | niterations <- 25000
31 | state <- rinit()
32 | chain <- matrix(ncol=1, nrow=niterations)
33 | chain[1,] <- state$chain_state
34 | for (t in 2:niterations){
35 |   state <- single_kernel(state)
36 |   chain[t,] <- state$chain_state
37 | }
38 | it <- floor(seq(from = 1, to  = niterations, length.out = 1000))
39 | qplot(x=it, y=chain[it], geom = "line") + ylab("X") + xlab("iteration")
40 | hist(chain[1000:niterations], nclass = 100, prob = TRUE)
41 | curve(sapply(x, function(y) exp(target(y))), add = T)
42 | mean(chain > 1)
43 | 
44 | # Markov kernel of the coupled chain
45 | niterations <- 50000
46 | state1 <- rinit()
47 | state2 <- rinit()
48 | chain1 <- matrix(ncol=1, nrow=niterations)
49 | chain2 <- matrix(ncol=1, nrow=niterations)
50 | chain1[1,] <- state1$chain_state
51 | chain2[1,] <- state2$chain_state
52 | meetingtime <- Inf
53 | for (t in 2:niterations){
54 |   coupled_result <- coupled_kernel(state1, state2)
55 |   state1 <- coupled_result$state1
56 |   state2 <- coupled_result$state2
57 |   if (coupled_result$identical && is.infinite(meetingtime)){
58 |     meetingtime <- t
59 |   }
60 |   chain1[t,] <- state1$chain_state
61 |   chain2[t,] <- state2$chain_state
62 | }
63 | print(meetingtime)
64 | it <- floor(seq(from = 1, to  = niterations, length.out = 1000))
65 | qplot(x=it, y=chain1[it], geom = "line") + ylab("X") + xlab("iteration") + geom_line(aes(y = chain2[it]), colour = "red")
66 | 
67 | hist(chain1[1000:niterations], nclass = 100, prob = TRUE)
68 | hist(chain2[1000:niterations], nclass = 100, prob = TRUE, add = TRUE, col = rgb(1,0,0,0.5))
69 | curve(sapply(x, function(y) exp(target(y))), add = T)
70 | 
71 | ## a meeting
72 | res <- sample_coupled_chains(single_kernel, coupled_kernel, rinit, m = 3)
73 | res$meetingtime
74 | res$iteration
75 | H_bar(res, h = function(x) x, k = 3, m = 3)
76 | # #
77 | qplot(x = 0:res$iteration, y = res$samples1[,1], geom = "line") +
78 |   geom_line(aes(x = 0:(res$iteration-1), y = res$samples2[,1]), col = rgb(1,0,0,0.5)) +
79 |   geom_line(aes(x = 1:res$iteration, y = res$samples2[,1]), col = rgb(1,0,0,1), linetype = 2)
80 | 
81 | ### distribution of meeting times
82 | nsamples <- 500
83 | meetingtime <- foreach(irep = 1:nsamples, .combine = c) %dorng% {
84 |   sample_meetingtime(single_kernel, coupled_kernel, rinit)$meetingtime
85 | }
86 | summary(meetingtime)
87 | hist(meetingtime)
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/inst/check/check_vs_mlikelihood.R:
--------------------------------------------------------------------------------
  1 | # This script attempts at optimizing the code that computes the marginal likelihood
  2 | # in the variable selection example, and compares different implementations
  3 | library(unbiasedmcmc)
  4 | rm(list = ls())
  5 | set.seed(1)
  6 | #
  7 | # simulate data
  8 | n <- 100
  9 | p <- 5000
 10 | SNR <- 3
 11 | s_star <- 10
 12 | s0 <- 100
 13 | sigma0 <- 1
 14 | beta_star <- SNR * sqrt(sigma0^2 * log(p) / n) * c(2,-3,2,2,-3,3,-2,3,-2,3, rep(0, p-10))
 15 | # independent design
 16 | X <- matrix(rnorm(n * p), nrow = n, ncol = p) # fast_rmvnorm_chol(n, rep(0, p), diag(1, p, p))
 17 | X <- scale(X)
 18 | Y <- X %*% matrix(beta_star, ncol = 1) + rnorm(n, 0, sigma0)
 19 | Y <- scale(Y)
 20 | Y2 <- (t(Y) %*% Y)[1,1]
 21 | g <- p^3
 22 | kappa <- 1
 23 | 
 24 | rinit <- function(){
 25 |   x <- rep(0, p)
 26 |   x[sample(1:p, min(s0, p), replace = F)] <- (runif(min(s0, p)) < 0.5)
 27 |   return(x)
 28 | }
 29 | 
 30 | marginal_likelihood <- function(selection){
 31 |   Xselected <- as.matrix(X[, selection==1,drop=F])
 32 |   if (sum(selection) != 0){
 33 |     P <- Xselected %*% tcrossprod(x = solve(crossprod(x = Xselected, y = Xselected)), y = Xselected)
 34 |   } else {
 35 |     P <- matrix(0, n, n)
 36 |   }
 37 |   R2_gamma <- t(Y) %*% P %*% Y/ Y2
 38 |   return((-sum(selection)/2 * log(1+g) - (n/2) * log(1 + g*(1-R2_gamma)))[1,1])
 39 | }
 40 | 
 41 | marginal_likelihood_alt <- function(selection){
 42 |   Xselected <- as.matrix(X[, selection==1,drop=F])
 43 |   if (sum(selection) != 0){
 44 |     #P1 <- Xselected %*% solve(t(Xselected) %*% Xselected) %*% t(Xselected)
 45 |     # using crossprod, it's a bit faster
 46 |     P <- Xselected %*% tcrossprod(x = solve(crossprod(x = Xselected, y = Xselected)), y = Xselected)
 47 |   } else {
 48 |     P <- matrix(0, n, n)
 49 |   }
 50 |   posteriorvalue <- -(sum(selection) + 1) / 2 * log(g + 1)
 51 |   posteriorvalue <- posteriorvalue - n / 2 * log(Y2 - g / (g + 1) * t(Y) %*% P %*% Y)
 52 |   return(posteriorvalue[1,1])
 53 | }
 54 | 
 55 | cppFunction('
 56 | double marginal_likelihood_c_(Eigen::VectorXd selection, const Eigen::MatrixXd & X, const Eigen::VectorXd & Y, double Y2, double g){
 57 |   double l = 0.;
 58 |   int n = X.rows();
 59 |   int p = X.cols();
 60 |   int s = selection.sum();
 61 |   if (s > 0){
 62 |     Eigen::MatrixXd Xselected(n,s);
 63 |     int counter = 0;
 64 |     for (int column = 0; column < p; column++){
 65 |       if (selection(column)){
 66 |       Xselected.col(counter) = X.col(column);
 67 |       counter++;
 68 |       }
 69 |     }
 70 |     l = Y.transpose() * Xselected * ((Xselected.transpose() * Xselected).inverse()) * Xselected.transpose() * Y;
 71 |   } else {
 72 |     l = 0.;
 73 |   }
 74 |   l = -((double) s + 1.) / 2. * log(g + 1.) - (double) n / 2. * log(Y2 - g / (g + 1.) * l);
 75 |   return l;
 76 | }
 77 |             ', depends="RcppEigen")
 78 | 
 79 | sel <- rinit()
 80 | marginal_likelihood_c_(sel, X, Y, Y2, g)
 81 | marginal_likelihood_alt(sel)
 82 | 
 83 | marginal_likelihood_c_(rep(0, p), X, Y, Y2, g)
 84 | marginal_likelihood_alt(rep(0, p))
 85 | 
 86 | selection1 <- rinit()
 87 | selection2 <- rinit()
 88 | 
 89 | marginal_likelihood(selection1)
 90 | marginal_likelihood(selection2)
 91 | marginal_likelihood(selection1) - marginal_likelihood(selection2)
 92 | 
 93 | marginal_likelihood_alt(selection1)
 94 | marginal_likelihood_alt(selection2)
 95 | marginal_likelihood_alt(selection1) - marginal_likelihood_alt(selection2)
 96 | 
 97 | 
 98 | library(microbenchmark)
 99 | microbenchmark(marginal_likelihood(rinit()),
100 |                marginal_likelihood_alt(rinit()),
101 |                marginal_likelihood_c_(rinit(), X, Y, Y2, g))
102 | 


--------------------------------------------------------------------------------
/inst/check/check_weirdbug.R:
--------------------------------------------------------------------------------
 1 | # This script illustrates a weird phenomenon when using
 2 | # nrow/ncol to get a matrix' dimension instead of dim.
 3 | 
 4 | # The mystery has been elucidated thanks to Louis Aslett,
 5 | # and explanations are given in the blog post
 6 | # https://statisfaction.wordpress.com/2017/12/10/nrow-references-and-copies/
 7 | 
 8 | # The question is: why is the following code so slow?
 9 | dimstate = 100
10 | nmcmc = 1e4
11 | chain = matrix(0, nrow = nmcmc, ncol = dimstate)
12 | for (imcmc in 1:nmcmc){
13 |   if (imcmc == nrow(chain)){
14 |   }
15 |   x = rnorm(dimstate, mean = 0, sd = 1)
16 |   chain[imcmc,] = x
17 | }
18 | 
19 | # Attempts at finding the reason, identifying "nrow" as the problem
20 | # in combination to changing the matrix chain.
21 | dimstate = 100
22 | nmcmc = 1e4
23 | chain = matrix(0, nrow = nmcmc, ncol = dimstate)
24 | for (imcmc in 1:nmcmc){
25 |   if (imcmc == nrow(chain)){
26 |   }
27 |   x = rnorm(dimstate, mean = 0, sd = 1)
28 |   # chain[imcmc,] = x
29 | }
30 | 
31 | dimstate = 100
32 | nmcmc = 1e4
33 | chain = matrix(0, nrow = nmcmc, ncol = dimstate)
34 | for (imcmc in 1:nmcmc){
35 |   if (imcmc == nmcmc){
36 |   }
37 |   x = rnorm(dimstate, mean = 0, sd = 1)
38 |   chain[imcmc,] = x
39 | }
40 | 
41 | # illustration that dim behaves very differently compared to nrow
42 | dimstate = 100
43 | nmcmc = 1e4
44 | chain = matrix(0, nrow = nmcmc, ncol = dimstate)
45 | for (imcmc in 1:nmcmc){
46 |   if (imcmc == dim(chain)[1]){
47 |   }
48 |   x = rnorm(dimstate, mean = 0, sd = 1)
49 |   chain[imcmc,] = x
50 | }
51 | 
52 | #
53 | x <- matrix(0, nrow=1e5, ncol=100) # matrix has ref count 1
54 | x[1,1] <- 1 # ref count is 1, so write with no copy
55 | nrow(x) # ref count is 2 even though nothing was touched
56 | x[1,1] <- 1 # ref count still 2, so R copies before writing first element. Now the ref count drops to 1 again
57 | x[2,2] <- 1 # this writes without a copy as ref count got reset on last line
58 | nrow(x) # ref count jumps
59 | x[3,3] <- 1 # copy invoked again! Aaaargh!
60 | 


--------------------------------------------------------------------------------
/inst/check/to_check_signed_measure.R:
--------------------------------------------------------------------------------
  1 | # This script plays with the computation of estimators
  2 | # using signed measure representations
  3 | 
  4 | library(unbiasedmcmc)
  5 | library(doParallel)
  6 | library(doRNG)
  7 | library(dplyr)
  8 | registerDoParallel(cores = detectCores()-2)
  9 | rm(list = ls())
 10 | set.seed(1)
 11 | 
 12 | #### Bivariate target
 13 | target <- function(x) fast_dmvnorm(matrix(x, nrow = 1), mean = c(0,0.5), covariance = diag(c(0.5, 0.2)))
 14 | Sigma_proposal <- diag(1, 2)
 15 | rinit <- function(){
 16 |   x <- fast_rmvnorm(1, c(10, 5), diag(3, 2, 2))
 17 |   return(list(chain_state = x, current_pdf = target(x)))
 18 | }
 19 | kernels <- get_mh_kernels(target, Sigma_proposal)
 20 | 
 21 | cx <- sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit)
 22 | cx$samples1 %>% head
 23 | cx$meetingtime
 24 | dim(cx$samples1)[1]
 25 | dim(cx$samples1)[2]
 26 | 
 27 | ms <- c_chains_to_measure_as_list(cx, 0, 0)
 28 | tail(ms$atoms)
 29 | cat(sum(ms$weights * ms$atoms[,1]), sum(ms$weights * ms$atoms[,ncol(ms$atoms)]), "\n")
 30 | H_bar(cx, h = function(x) x, k = 0, m = 0)
 31 | 
 32 | ms$MCMC
 33 | sum(ms$weights[ms$MCMC==1])
 34 | sum(ms$weights[ms$MCMC==0])
 35 | 
 36 | cx <- sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = 100)
 37 | cx$meetingtime
 38 | ms <- c_chains_to_measure_as_list(cx, 30, 100)
 39 | tail(ms$atoms)
 40 | cat(sum(ms$weights * ms$atoms[,1]), sum(ms$weights * ms$atoms[,ncol(ms$atoms)]), "\n")
 41 | H_bar(cx, h = function(x) x, k = 30, m = 100)
 42 | ms$MCMC
 43 | sum(ms$weights[ms$MCMC])
 44 | sum(ms$weights[!ms$MCMC])
 45 | 
 46 | cx <- sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = 100, lag = 5)
 47 | cx$meetingtime
 48 | ms <- c_chains_to_measure_as_list(cx, 3, 100)
 49 | ms$MCMC
 50 | sum(ms$weights[ms$MCMC])
 51 | sum(ms$weights[!ms$MCMC])
 52 | 
 53 | cat(sum(ms$weights * ms$atoms[,1]), sum(ms$weights * ms$atoms[,ncol(ms$atoms)]), "\n")
 54 | H_bar(cx, h = function(x) x, k = 3, m = 100)
 55 | 
 56 | c_chains_to_dataframe(cx, k = 3, m = 100)
 57 | 
 58 | nrepeats <- 10
 59 | coupledchains_ <- foreach(rep = 1:nrepeats) %dorng% {
 60 |   sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = 100, lag = 1)
 61 | }
 62 | summary(sapply(coupledchains_, function(l) l$meetingtime))
 63 | 
 64 | k <- 20
 65 | m <- 100
 66 | df_ <- c_chains_to_dataframe(coupledchains_, k, m, dopar = T)
 67 | head(df_)
 68 | colSums(df_$weight * df_[,4:ncol(df_)])
 69 | rowMeans(sapply(X = coupledchains_, FUN = function(x) H_bar(x, h = function(v) v, k = k, m = m)))
 70 | 
 71 | cat(sum(df_$weight * df_$atom.1 * cos(df_$atom.2)), "\n")
 72 | mean(sapply(X = coupledchains_, FUN = function(x) H_bar(x, h = function(v) v[1] * cos(v[2]), k = k, m = m)))
 73 | 
 74 | 
 75 | 
 76 | 
 77 | ##### Univariate target
 78 | target <- function(x){
 79 |   evals <- log(0.5) + dnorm(x, mean = c(-2, 2), sd = 0.5, log = TRUE)
 80 |   return(max(evals) + log(sum(exp(evals - max(evals)))))
 81 | }
 82 | Sigma_proposal <- diag(2, 1, 1)
 83 | rinit <- function(){
 84 |   x <- rnorm(1, 5, 5)
 85 |   return(list(chain_state = x, current_pdf = target(x)))
 86 | }
 87 | kernels <- get_mh_kernels(target, Sigma_proposal)
 88 | 
 89 | nrepeats <- 100
 90 | m <- 200
 91 | coupledchains_ <- foreach(rep = 1:nrepeats) %dorng% {
 92 |   sample_coupled_chains(kernels$single_kernel, kernels$coupled_kernel, rinit, m = m, lag = 50)
 93 | }
 94 | summary(sapply(coupledchains_, function(l) l$meetingtime))
 95 | k <- 20
 96 | df_unpruned <- c_chains_to_dataframe(coupledchains_, k, m, dopar = F, prune = FALSE)
 97 | df_pruned <- c_chains_to_dataframe(coupledchains_, k, m, dopar = F, prune = TRUE)
 98 | head(df_pruned)
 99 | dim(df_pruned)
100 | dim(df_unpruned)
101 | 
102 | 
103 | sum(df_pruned$weight * df_pruned$atom.1)
104 | colSums(df_pruned$weight * df_pruned[,4:ncol(df_pruned),drop=F])
105 | mean(sapply(X = coupledchains_, FUN = function(x) H_bar(x, h = function(v) v, k = k, m = m)))
106 | 
107 | df_pruned %>% summarise(mean1 = sum(weight * atom.1))
108 | df_pruned %>% group_by(MCMC) %>% summarise(mean1 = sum(weight * atom.1), sumweight = sum(weight), len = n(), lenuniqueweight = length(unique(weight))) %>% ungroup() %>% as.data.frame
109 | MCMC_traj <- sapply(coupledchains_, function(x) x$samples1[(k+1):(m+1),])
110 | length(as.numeric(MCMC_traj))
111 | mean(MCMC_traj)
112 | 
113 | 
114 | hist1 <- histogram_c_chains(coupledchains_, component = 1, k = 150, m = m)
115 | setmytheme()
116 | plot_histogram(hist1)
117 | 
118 | 


--------------------------------------------------------------------------------
/inst/reproducebaseball/baseball.mcmc.run.R:
--------------------------------------------------------------------------------
 1 | # load packages
 2 | library(unbiasedmcmc)
 3 | setmytheme()
 4 | rm(list = ls())
 5 | set.seed(21)
 6 | library(doParallel)
 7 | library(doRNG)
 8 | registerDoParallel(cores = detectCores())
 9 | 
10 | #
11 | # this example is taken from Rosenthal "Parallel Computing and Monte Carlo algorithms", 2000.
12 | # The data are baseball player's batting averages, as in Efron and Morris 1975 or Morris 1983
13 | # In particular, the data are listed in Morris 1983, and available as part of Rosenthal's online code:
14 | # http://probability.ca/jeff/comp/james.c
15 | 
16 | Y <- c(0.395, 0.375, 0.355, 0.334, 0.313, 0.313, 0.291, 0.269, 0.247, 0.247, 0.224, 0.224,
17 |        0.224, 0.224, 0.224, 0.200, 0.175, 0.148)
18 | ndata <- length(Y)
19 | 
20 | # The data are Y_i for i in {1,...,K}, with here K = 18.
21 | # The model specifies Y_i ~ Normal(theta_i, V), with V known and fixed to 0.00434
22 | V <- 0.00434
23 | # The prior is theta_i ~ Normal(mu, A), with mu ~ flat prior, and A ~ InverseGamma(a,b)
24 | # with density proportional to exp(-b/x) x^{-a-1}; Rosenthal chooses a = -1, b = 2
25 | a <- -1
26 | b <- 2
27 | # To target the posterior distribution, we follow Rosenthal and consider the following Gibbs sampler.
28 | # A given rest: IG(a + (K-1)/2, b + (sum_i=1^K (theta_i - theta_bar)^2)/2)
29 | # mu given rest: Normal(theta_bar, A / K)
30 | # theta_i given rest: Normal( (mu * V + Y_i * A) / (V + A), A * V / (V + A))
31 | # ... where theta_bar is the average of the theta_i, for i in {1,...,K}
32 | 
33 | 
34 | # we store the parameters as (mu, A, theta_1, ..., theta_K) so the parameter space is of dimension 20
35 | # the initialization comes from Rosenthal (except the initialization of mu and A which is irrelevant)
36 | rinit <- function(){
37 |   return(list(chain_state = c(0,1,rep(mean(Y), ndata))))
38 | }
39 | # here is the code for the Gibbs sampler
40 | single_kernel <- function(state){
41 |   theta <- state$chain_state[3:(ndata+2)]
42 |   theta_bar <- mean(theta)
43 |   # update of A given rest
44 |   A <- rinversegamma(1, a + 0.5 * (ndata-1), b + 0.5 * sum((theta - theta_bar)^2))
45 |   # update of mu given rest
46 |   mu <- rnorm(1, theta_bar, sqrt(A/ndata))
47 |   # update of each theta_i
48 |   theta <- rnorm(ndata, (mu * V + Y * A) / (V + A), sqrt(A * V / (V + A)))
49 |   return(list(chain_state = c(mu, A, theta)))
50 | }
51 | 
52 | ## Modify niterations
53 | niterations <- 5e5
54 | chain <- matrix(nrow = niterations, ncol = ndata+2)
55 | state <- rinit()
56 | chain[1,] <- state$chain_state
57 | for (iteration in 2:niterations){
58 |   state <- single_kernel(state)
59 |   chain[iteration,] <- state$chain_state
60 | }
61 | save(niterations, chain, file = "baseball.mcmc.RData")
62 | load(file = "baseball.mcmc.RData")
63 | 


--------------------------------------------------------------------------------
/inst/reproducebaseball/run.all.R:
--------------------------------------------------------------------------------
 1 | ## set working directory to the directory containing this script
 2 | 
 3 | print("run long MCMC")
 4 | source("baseball.mcmc.run.R")
 5 | 
 6 | print("run coupled chains with m=100")
 7 | source("baseball.run.R")
 8 | 
 9 | print("produce plots")
10 | source("baseball.plots.R")
11 | 


--------------------------------------------------------------------------------
/inst/reproducebayesianlasso/bayesianlasso.mcmc.run.R:
--------------------------------------------------------------------------------
 1 | # load packages
 2 | library(unbiasedmcmc)
 3 | library(coda)
 4 | #
 5 | rm(list = ls())
 6 | set.seed(1)
 7 | library(doParallel)
 8 | library(doRNG)
 9 | registerDoParallel(cores = detectCores() - 1)
10 | 
11 | data(diabetes)
12 | X <- scale(diabetes$x2)
13 | Y <- matrix(scale(diabetes$y), ncol = 1)
14 | p <- ncol(X)
15 | 
16 | 
17 | mcmc_blasso <- function(nmcmc, burnin, lambda){
18 |   pb <- get_blasso(Y, X, lambda)
19 |   state <- pb$rinit()
20 |   chain <- matrix(nrow = nmcmc, ncol = length(state$chain_state))
21 |   for (imcmc in 1:nmcmc){
22 |     state <- pb$gibbs_kernel(state)
23 |     chain[imcmc,] <- state$chain_state
24 |   }
25 |   return(chain)
26 | }
27 | 
28 | 
29 | ## Modify nmcmc
30 | nmcmc <- 50000
31 | burnin <- floor(nmcmc / 10)
32 | 
33 | # result <- mcmc_blasso(nmcmc, burnin, .1)
34 | # # matplot(result[,1:10], type = "l")
35 | # postmeans <- colMeans(result[burnin:nmcmc,])
36 | # ess <- effectiveSize(result[burnin:nmcmc,1:p])
37 | 
38 | lambdas <- 10^(seq(from = -2, to = 3, length.out = 25))
39 | df <- foreach (ilambda = 1:length(lambdas), .combine = rbind) %dorng% {
40 |   lambda <- lambdas[ilambda]
41 |   print(lambda)
42 |   result <- mcmc_blasso(nmcmc, burnin, lambda)
43 |   postmeans <- colMeans(result[burnin:nmcmc,1:p])
44 |   ess <- effectiveSize(result[burnin:nmcmc,1:p])
45 | 
46 |   data.frame(ilambda = rep(ilambda, p), lambda = rep(lambda, p), component = 1:p,
47 |                    ess = ess, postmeans = postmeans)
48 | }
49 | save(df, nmcmc, burnin, lambdas, file = "bayesianlasso.mcmc.RData")
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/inst/reproducebayesianlasso/bayesianlasso.plots.R:
--------------------------------------------------------------------------------
 1 | # load packages
 2 | library(unbiasedmcmc)
 3 | library(latex2exp)
 4 | library(dplyr)
 5 | setmytheme()
 6 | rm(list = ls())
 7 | set.seed(21)
 8 | 
 9 | data(diabetes)
10 | X <- scale(diabetes$x2)
11 | Y <- matrix(scale(diabetes$y), ncol = 1)
12 | p <- ncol(X)
13 | 
14 | # lambdas <- 10^(seq(from = -2, to = 3, length.out = 25))
15 | 
16 | ## meeting times as a function of lambda
17 | load("bayesianlasso.meetings.RData")
18 | head(df)
19 | meetingquantile <- df %>% group_by(ilambda) %>% summarise(lambda = mean(lambda), mean = mean(meetingtime), q99 = quantile(meetingtime, probs = 0.99))
20 | meetingquantile %>% head
21 | g <- ggplot(meetingquantile, aes(x = lambda, y = mean)) + geom_point()
22 | g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4))
23 | g <- g + scale_y_log10(breaks = c(10, 100, 1000, 5500))
24 | g <- g + xlab(expression(lambda)) + ylab("average meeting time")
25 | g
26 | ggsave(filename = "bayesianlasso.meetings.pdf", plot = g, width = 8, height = 6)
27 | 
28 | ## effective sample sizes as a function of lambda
29 | load("bayesianlasso.mcmc.RData")
30 | g <- ggplot(df, aes(x = lambda, y = ess / (nmcmc - burnin + 1))) + geom_point()
31 | g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4))
32 | g <- g + xlab(expression(lambda)) + ylab("effective sample sizes")
33 | g
34 | ggsave(filename = "bayesianlasso.ess.pdf", plot = g, width = 8, height = 6)
35 | 
36 | ## posterior mean of beta as function of lambda
37 | load("bayesianlasso.estimators.RData")
38 | summary.df <- df %>% group_by(ilambda, lambda, component) %>% summarise(m = mean(estimator), sd = sd(estimator), nsamples = n())
39 | summary.df$component %>% unique
40 | summary.df$nsamples %>% unique
41 | 
42 | load("bayesianlasso.extraestimators.RData")
43 | summary.extra.df <- rbind(df, extra.df) %>% group_by(ilambda, lambda, component) %>% summarise(m = mean(estimator), sd = sd(estimator), nsamples = n())
44 | 
45 | # g <- ggplot(summary.df %>% filter(component <= 64), aes(x = lambda, y = sd/sqrt(nsamples), group = component)) + geom_line() + geom_point()
46 | # g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4)) + theme(legend.position = "none")
47 | # g <- g + ylim(0,0.4)
48 | # g
49 | #
50 | # g <- ggplot(summary.extra.df %>% filter(component <= 64), aes(x = lambda, y = sd/sqrt(nsamples), group = component)) + geom_line() + geom_point()
51 | # g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4)) + theme(legend.position = "none")
52 | # g <- g + ylim(0,0.4)
53 | # g
54 | 
55 | g <- ggplot(summary.df %>% filter(component <= 64), aes(x = lambda, y = m, group = component)) + geom_line() + geom_point()
56 | g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4)) + theme(legend.position = "none")
57 | g <- g + geom_segment(aes(y = m - 2*sd / sqrt(nsamples), yend = m + 2*sd / sqrt(nsamples), xend = lambda))
58 | g <- g + xlab(expression(lambda)) + ylab(TeX("$E_{\\lambda} \\lbrack \\beta | Y,X \\rbrack $"))
59 | g <- g + ylim(-5,4)
60 | g
61 | ggsave(filename = "bayesianlasso.paths.pdf", plot = g, width = 8, height = 6)
62 | 
63 | g <- ggplot(summary.extra.df %>% filter(component <= 64), aes(x = lambda, y = m, group = component)) + geom_line() + geom_point()
64 | g <- g + scale_x_log10(breaks = c(1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4)) + theme(legend.position = "none")
65 | g <- g + geom_segment(aes(y = m - 2*sd / sqrt(nsamples), yend = m + 2*sd / sqrt(nsamples), xend = lambda))
66 | g <- g + xlab(expression(lambda)) + ylab(TeX("$E_{\\lambda} \\lbrack \\beta | Y,X \\rbrack $"))
67 | g <- g + ylim(-5,4)
68 | g
69 | ggsave(filename = "bayesianlasso.refinedpaths.pdf", plot = g, width = 8, height = 6)
70 | 
71 | 


--------------------------------------------------------------------------------
/inst/reproducebayesianlasso/bayesianlasso.run.R:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | library(unbiasedmcmc)
  3 | setmytheme()
  4 | rm(list = ls())
  5 | set.seed(21)
  6 | library(doParallel)
  7 | library(doRNG)
  8 | library(dplyr)
  9 | 
 10 | registerDoParallel(cores = detectCores()-2)
 11 | 
 12 | data(diabetes)
 13 | X <- scale(diabetes$x2)
 14 | Y <- matrix(scale(diabetes$y), ncol = 1)
 15 | p <- ncol(X)
 16 | 
 17 | ## Modify lambdas
 18 | # lambdas <- 10^(seq(from = -2, to = 3, length.out = 25))
 19 | lambdas <- 10^(seq(from = -2, to = 1, length.out = 15))
 20 | 
 21 | 
 22 | nrep <- 100
 23 | df <- data.frame()
 24 | for (ilambda in 1:length(lambdas)){
 25 |   print(ilambda)
 26 |   lambda <- lambdas[ilambda]
 27 |   pb <- get_blasso(Y, X, lambda)
 28 |   meetings <- foreach(irep = 1:nrep) %dorng% {
 29 |     sample_meetingtime(pb$gibbs_kernel, pb$coupled_gibbs_kernel, pb$rinit)
 30 |   }
 31 |   meetingtimes <- sapply(meetings, function(x) x$meetingtime)
 32 |   df <- rbind(df, data.frame(ilambda = ilambda, lambda = lambda, rep = 1:nrep, meetingtime = meetingtimes))
 33 |   save(df, lambdas, nrep, file = "bayesianlasso.meetings.RData")
 34 | }
 35 | save(df, lambdas, nrep, file = "bayesianlasso.meetings.RData")
 36 | 
 37 | load("bayesianlasso.meetings.RData")
 38 | head(df)
 39 | meetingquantile <- df %>% group_by(ilambda) %>% summarise(mean = mean(meetingtime), q99 = quantile(meetingtime, probs = 0.99))
 40 | meetingquantile$q99
 41 | 
 42 | 
 43 | df <- data.frame()
 44 | for (ilambda in 1:length(lambdas)){
 45 |   print(ilambda)
 46 |   lambda <- lambdas[ilambda]
 47 |   k <- meetingquantile$q99[ilambda]
 48 |   m <- 10 * k
 49 |   pb <- get_blasso(Y, X, lambda)
 50 |   ues <- foreach(irep = 1:nrep) %dorng% {
 51 |     sample_unbiasedestimator(pb$gibbs_kernel, pb$coupled_gibbs_kernel, pb$rinit, h = function(x) x, k = k, m = m)
 52 |   }
 53 |   for (irep in 1:nrep){
 54 |     df <- rbind(df, data.frame(ilambda = ilambda, lambda = lambda, rep = rep(irep, (2*p+1)),
 55 |                                component = 1:(2*p+1), estimator = ues[[irep]]$uestimator))
 56 |   }
 57 |   save(df, lambdas, nrep, file = "bayesianlasso.estimators.RData")
 58 | }
 59 | save(df, lambdas, nrep, file = "bayesianlasso.estimators.RData")
 60 | 
 61 | load("bayesianlasso.estimators.RData")
 62 | summary.df <- df %>% group_by(ilambda, lambda, component) %>% summarise(m = mean(estimator), sd = sd(estimator), nsamples = n())
 63 | summary.df$component %>% unique
 64 | summary.df$nsamples %>% unique
 65 | 
 66 | g <- ggplot(summary.df %>% filter(component <= 64), aes(x = lambda, y = m, group = component)) + geom_line() + scale_x_log10()
 67 | g + geom_segment(aes(y = m - 2*sd / sqrt(nrep), yend = m + 2*sd / sqrt(nrep), xend = lambda))
 68 | 
 69 | 
 70 | # compute extra estimators for first 10 lambdas
 71 | nrep <- 1000
 72 | extra.df <- data.frame()
 73 | for (ilambda in 1:10){
 74 |   print(ilambda)
 75 |   lambda <- lambdas[ilambda]
 76 |   k <- meetingquantile$q99[ilambda]
 77 |   m <- 10 * k
 78 |   pb <- get_blasso(Y, X, lambda)
 79 |   ues <- foreach(irep = 1:nrep) %dorng% {
 80 |     sample_unbiasedestimator(pb$gibbs_kernel, pb$coupled_gibbs_kernel, pb$rinit, h = function(x) x, k = k, m = m)
 81 |   }
 82 |   # meetingtimes <- sapply(ues, function(x) x$meetingtime)
 83 |   for (irep in 1:nrep){
 84 |     extra.df <- rbind(extra.df, data.frame(ilambda = ilambda, lambda = lambda, rep = rep(irep, (2*p+1)),
 85 |                                component = 1:(2*p+1), estimator = ues[[irep]]$uestimator))
 86 |   }
 87 |   save(extra.df, file = "bayesianlasso.extraestimators.RData")
 88 | }
 89 | save(extra.df, file = "bayesianlasso.extraestimators.RData")
 90 | 
 91 | load("bayesianlasso.extraestimators.RData")
 92 | 
 93 | summary.extra.df <- rbind(df, extra.df) %>% group_by(ilambda, lambda, component) %>% summarise(m = mean(estimator), sd = sd(estimator), nsamples = n())
 94 | summary.extra.df$nsamples %>% unique
 95 | 
 96 | g <- ggplot(summary.extra.df %>% filter(component <= 64), aes(x = lambda, y = m, group = component)) + geom_line() + scale_x_log10()
 97 | g + geom_segment(aes(y = m - 2*sd / sqrt(nsamples), yend = m + 2*sd / sqrt(nsamples), xend = lambda))
 98 | 
 99 | # g <- ggplot(summary.extra.df %>% filter(component <= 64), aes(x = ilambda, y = m, group = component)) + geom_line() + scale_x_log10()
100 | # g + geom_segment(aes(y = m - 2*sd / sqrt(nsamples), yend = m + 2*sd / sqrt(nsamples), xend = ilambda))
101 | 


--------------------------------------------------------------------------------
/inst/reproducebayesianlasso/run.all.R:
--------------------------------------------------------------------------------
 1 | # set working directory to folder containing this file
 2 | 
 3 | print("run various coupled MCMC")
 4 | source("bayesianlasso.run.R")
 5 | 
 6 | print("run long MCMC")
 7 | source("bayesianlasso.mcmc.run.R")
 8 | # this is used to get effective sample size as a function of lambda
 9 | 
10 | print("produce plots")
11 | source("bayesianlasso.plots.R")
12 | 
13 | 


--------------------------------------------------------------------------------
/inst/reproducebimodal/bimodal.mcmc.run.R:
--------------------------------------------------------------------------------
 1 | # load packages
 2 | library(unbiasedmcmc)
 3 | rm(list = ls())
 4 | set.seed(21)
 5 | 
 6 | #
 7 | target <- function(x){
 8 |   evals <- log(0.5) + dnorm(x, mean = c(-4, 4), sd = 1, log = TRUE)
 9 |   return(max(evals) + log(sum(exp(evals - max(evals)))))
10 | }
11 | #
12 | get_pb <- function(sd_proposal, initmean, initsd){
13 |   single_kernel <- function(state){
14 |     chain_state <- state$chain_state
15 |     current_pdf <- state$current_pdf
16 |     proposal_value <- rnorm(1, mean=chain_state, sd=sd_proposal)
17 |     proposal_pdf <- target(proposal_value)
18 |     accept <- (log(runif(1)) < (proposal_pdf - current_pdf))
19 |     if (accept){
20 |       return(list(chain_state = proposal_value, current_pdf = proposal_pdf))
21 |     } else {
22 |       return(list(chain_state = chain_state, current_pdf = current_pdf))
23 |     }
24 |   }
25 |   rinit <- function(){
26 |     chain_state <- rnorm(1, initmean, initsd)
27 |     current_pdf <- target(chain_state)
28 |     return(list(chain_state = chain_state, current_pdf = current_pdf))
29 |   }
30 | 
31 |   return(list(rinit = rinit, single_kernel = single_kernel))
32 | }
33 | #
34 | 
35 | # test function
36 | testfunction <- function(x) (x > 3)
37 | ## Modify nmcmc
38 | nmcmc <- 100000
39 | burnin <- 10000
40 | # easy setting: good proposal, but bad init
41 | pb <- get_pb(3, initmean = 10, initsd = 10)
42 | current <- pb$rinit()
43 | chain <- rep(0, nmcmc)
44 | for (imcmc in 1:nmcmc){
45 |   current <- pb$single_kernel(current)
46 |   chain[imcmc] <- current$chain_state
47 | }
48 | 
49 | 
50 | library(coda)
51 | mcmcvar.easy <- spectrum0(sapply(chain[burnin:nmcmc], testfunction))$spec
52 | mcmcvar.easy
53 | mean(sapply(chain[burnin:nmcmc], testfunction))
54 | save(nmcmc, mcmcvar.easy, file = "bimodal.mcmc.RData")
55 | 
56 | 
57 | # intermediate setting: bad proposal, good init
58 | pb <- get_pb(1, initmean = 10, initsd = 10)
59 | current <- pb$rinit()
60 | chain <- rep(0, nmcmc)
61 | for (imcmc in 1:nmcmc){
62 |   current <- pb$single_kernel(current)
63 |   chain[imcmc] <- current$chain_state
64 | }
65 | library(coda)
66 | mcmcvar.intermediate <- spectrum0(sapply(chain[burnin:nmcmc], testfunction))$spec
67 | save(nmcmc, mcmcvar.easy, mcmcvar.intermediate, file = "bimodal.mcmc.RData")
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/inst/reproducebimodal/run.all.R:
--------------------------------------------------------------------------------
 1 | ## set working directory to the directory containing this script
 2 | 
 3 | print("runs on bimodal target")
 4 | 
 5 | print("generate meeting times for different MCMC proposals / different initial distribution")
 6 | source("bimodal.run.R")
 7 | 
 8 | print("generate long MCMC run")
 9 | source("bimodal.mcmc.run.R")
10 | 
11 | print("produce plots and tables")
12 | source("bimodal.plots.R")
13 | 


--------------------------------------------------------------------------------
/inst/reproduceepidemiology/plummer.cut.plots.R:
--------------------------------------------------------------------------------
 1 | # load packages
 2 | library(unbiasedmcmc)
 3 | library(latex2exp)
 4 | library(dplyr)
 5 | setmytheme()
 6 | rm(list = ls())
 7 | set.seed(21)
 8 | library(doParallel)
 9 | library(doRNG)
10 | registerDoParallel(cores = detectCores())
11 | 
12 | #
13 | # Plummer's example
14 | dimension <- 2
15 | 
16 | filename <- "plummer.tuning.RData"
17 | load(file = filename)
18 | meetingtime <- sapply(c_chains_1, function(x) x$meetingtime)
19 | summary(meetingtime)
20 | hist(meetingtime)
21 | # from which we have found
22 | k
23 | m
24 | 
25 | # ##
26 | load(file = "plummer.results.RData")
27 | nsamples
28 | k
29 | m
30 | 
31 | meetingtime <- sapply(c_chains_2, function(x) x$meetingtime)
32 | summary(meetingtime)
33 | hist(meetingtime)
34 | #
35 | 
36 | mean_estimators <-  foreach(irep = 1:nsamples) %dorng% {
37 |   H_bar(c_chains_2[[irep]], k = k, m = m)
38 | }
39 | 
40 | square_estimators <-  foreach(irep = 1:nsamples) %dorng% {
41 |   H_bar(c_chains_2[[irep]], h = function(x) x^2, k = k, m = m)
42 | }
43 | 
44 | est_mean <- rep(0, dimension)
45 | est_var <- rep(0, dimension)
46 | for (component in 1:dimension){
47 |   estimators <- sapply(mean_estimators, function(x) x[component])
48 |   est_mean[component] <- mean(estimators)
49 |   cat("estimated mean: ", est_mean[component], "+/- ", 2*sd(estimators)/sqrt(nsamples), "\n")
50 |   s_estimators <- sapply(square_estimators, function(x) x[component])
51 |   cat("estimated second moment: ", mean(s_estimators), "+/- ", 2*sd(s_estimators)/sqrt(nsamples), "\n")
52 |   est_var[component] <- mean(s_estimators) - est_mean[component]^2
53 |   cat("estimated variance: ", est_var[component], "\n")
54 | }
55 | 
56 | nsamples <- length(c_chains_2)
57 | meetingtime <- sapply(c_chains_2, function(x) x$meetingtime)
58 | summary(meetingtime)
59 | 
60 | # ggsave(filename = "plummer.meetingtimes.pdf", plot = g, width = 7, height = 7)
61 | gmeetingtime <- qplot(x = meetingtime, geom = "blank") + geom_histogram(aes(y = ..density..)) + xlab("meeting time") + ylab("density")
62 | gmeetingtime
63 | ggsave(filename = "plummer.meetingtimes.pdf", plot = gmeetingtime, width = 5, height = 5)
64 | 
65 | sum(sapply(c_chains_2, function(x) x$iteration)) / nsamples
66 | 
67 | ### cut distribution from tedious parallel MCMC
68 | histogram1 <- histogram_c_chains(c_chains_2, 1, k, m, nclass = 35)
69 | histogram2 <- histogram_c_chains(c_chains_2, 2, k, m, nclass = 30)
70 | load(file = "plummer.mcmc.RData")
71 | 
72 | hist_mcmc <- hist(theta2s[,1], breaks = histogram1$breaks, plot = F)
73 | # hist_mcmc <- hist(theta2s[,1], plot = F)
74 | g1 <- plot_histogram(histogram1, with_bar = T) + xlab(TeX("$\\theta_{2,1}$")) + ylab("density")
75 | g1 <- g1 + geom_line(data=data.frame(x = hist_mcmc$mids, y = hist_mcmc$density), aes(x = x, y = y, xmin = NULL, xmax = NULL, ymin = NULL, ymax = NULL), colour = "red")
76 | g1 <- g1 + scale_x_continuous(breaks = c(-2.5, -2, -1.5))
77 | g1
78 | ggsave("plummer.histogram1.pdf", plot = g1, width = 5, height = 5)
79 | 
80 | hist_mcmc <- hist(theta2s[,2], breaks = histogram2$breaks, plot = F)
81 | g2 <- plot_histogram(histogram2, with_bar = T) + xlab(TeX("$\\theta_{2,2}$")) + ylab("density")
82 | g2 <- g2 + geom_line(aes(x = hist_mcmc$mids, y = hist_mcmc$density), colour = "red")
83 | g2 <- g2 + scale_x_continuous(breaks = c(10, 15, 20, 25))
84 | g2
85 | ggsave("plummer.histogram2.pdf", plot = g2, width = 5, height = 5)
86 | 


--------------------------------------------------------------------------------
/inst/reproduceepidemiology/run.all.R:
--------------------------------------------------------------------------------
 1 | ## set working directory to the directory containing this script
 2 | 
 3 | print("runs on cut distribution")
 4 | 
 5 | print("producing all results")
 6 | source("plummer.cut.run.R")
 7 | 
 8 | print("producing plots")
 9 | source("plummer.cut.plots.R")
10 | 


--------------------------------------------------------------------------------
/inst/reproducegermancredit/germancredit.mcmc.run.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # load packages
 3 | library(unbiasedmcmc)
 4 | rm(list = ls())
 5 | set.seed(21)
 6 | 
 7 | #
 8 | ##  This example is about the Polya Gamma Gibbs sampler for logistic regression models, as applied to the German credit data of Lichman 2013.
 9 | 
10 | data(germancredit)
11 | n <- nrow(X)
12 | p <- ncol(X)
13 | 
14 | # prior
15 | b <- matrix(0, nrow = p, ncol = 1)
16 | B <- diag(10, p, p)
17 | logistic_setting <- logisticregression_precomputation(Y, X, b, B)
18 | # define MCMC transition kernel
19 | single_kernel <- function(chain_state, logistic_setting){
20 |   zs <- abs(logisticregression_xbeta(logistic_setting$X, t(chain_state)))
21 |   w <- BayesLogit::rpg(logistic_setting$n, h=1, z=zs)
22 |   res <- logisticregression_m_and_sigma(w, X, logistic_setting$invB, logistic_setting$KTkappaplusinvBtimesb)
23 |   chain_state <- t(fast_rmvnorm_chol(1, res$m, res$Cholesky))
24 |   return(chain_state)
25 | }
26 | 
27 | rinit <- function(){
28 |   t(fast_rmvnorm(1, mean = b, covariance = B))
29 | }
30 | 
31 | chain_state <- rinit()
32 | zs <- abs(logisticregression_xbeta(logistic_setting$X, t(chain_state)))
33 | w <- BayesLogit::rpg(logistic_setting$n, h=1, z=zs)
34 | res <- logisticregression_m_and_sigma(w, X, logistic_setting$invB, logistic_setting$KTkappaplusinvBtimesb)
35 | Sigma_ <- solve(res$Sigma_inverse)
36 | Sigma_alt <- t(res$Cholesky) %*% res$Cholesky
37 | 
38 | Sigma_[1:5,1:5]
39 | Sigma_alt[1:5,1:5]
40 | 
41 | mean_ <- res$m
42 | n <- 1e5
43 | xx_2 <- fast_rmvnorm_chol(n, mean_, res$Cholesky)
44 | colMeans(xx_2[,1:5])
45 | mean_[1:5]
46 | cov(xx_2[,1:5])
47 | Sigma_[1:5,1:5]
48 | # xx_3 <- fast_rmvnorm_chol(n, mean_, chol(Sigma_))
49 | # colMeans(xx_3[,1:5])
50 | # cov(xx_3[,11:15])
51 | # Sigma_[11:15,11:15]
52 | # modify niterations
53 | 
54 | niterations <- 100000
55 | 
56 | chain <- matrix(nrow = niterations, ncol = p)
57 | chain[1,] <- rinit()
58 | for (iteration in 2:niterations){
59 |   chain[iteration,] <- single_kernel(chain[iteration-1,], logistic_setting)
60 | }
61 | save(niterations, chain, file = "germancredit.mcmc.RData")
62 | load("germancredit.mcmc.RData")
63 | matplot(chain[100:niterations,1:5], type = "l")
64 | 
65 | idx <- which('Instalment.per.cent' == colnames(X))
66 | hist(chain[100:niterations,idx])
67 | 


--------------------------------------------------------------------------------
/inst/reproducegermancredit/germancredit.plots.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # load packages
  3 | library(unbiasedmcmc)
  4 | library(dplyr)
  5 | setmytheme()
  6 | rm(list = ls())
  7 | set.seed(21)
  8 | library(doParallel)
  9 | library(doRNG)
 10 | registerDoParallel(cores = detectCores())
 11 | data(germancredit)
 12 | load("germancredit.tuning.RData")
 13 | 
 14 | #ndata <- 10
 15 | 
 16 | 
 17 | # Plot histogram of meetingtimes
 18 | nsamples <- meetingtime.list$nsamples
 19 | meetingtimes <- sapply(X = meetingtime.list$meetings_1, FUN = function(x) x$meetingtime)
 20 | #upper quantiles: 90% = 81, 95% = 96, 99% = 135. In the paper we consider 110.
 21 | 
 22 | mt_df = data.frame(meetingtime=meetingtimes)
 23 | head(mt_df)
 24 | #mt_df <- data.frame(meetingtime=sapply(c_chains, function(x) x$meetingtime))
 25 | g <- ggplot(data=mt_df,aes(x=meetingtime))+geom_histogram(aes(y = ..density..), binwidth=10)
 26 | g <- g + xlab("meeting time")
 27 | g
 28 | ggsave(filename="pgg.meetingtime.pdf", plot=g, width=7, height=7)
 29 | 
 30 | 
 31 | 
 32 | # Plot distances and number met for a particular run
 33 | g3 <- ggplot(df_w,aes(x=iter,y=nmet_w))+geom_line() + xlab('iteration') + ylab("count")
 34 | g3
 35 | ggsave(filename = "pgg.nmetw.pdf", plot = g3, width = 7, height = 7)
 36 | 
 37 | g2 <- ggplot(df_w,aes(x=iter,y=dist_w))+geom_line() + xlab('iteration') + ylab('distance') #+ ylab(TeX("$||w_1-w_2||_2$"))
 38 | g2
 39 | ggsave(filename = "pgg.distw.pdf", plot = g2, width = 7, height = 7)
 40 | 
 41 | g1 <- ggplot(df_beta,aes(x=iter,y=dist_beta))+geom_line() + xlab('iteration') + ylab('distance') #ylab(TeX("$||\\beta_1-\\beta_2||_2$"))
 42 | g1
 43 | ggsave(filename = "pgg.distbeta.pdf", plot = g1, width = 7, height = 7)
 44 | 
 45 | 
 46 | 
 47 | 
 48 | # Plot efficiency as a function of k
 49 | tuning.k.list$nsamples
 50 | g <- qplot(x = tuning.k.list$ks, y = 1/(tuning.k.list$cost * tuning.k.list$v), geom = "line") #+ scale_y_continuous(breaks = c(0.2, 0.3, 0.4))
 51 | g <- g + geom_point() + ylab('asymptotic efficiency') + xlab("k")
 52 | g
 53 | ggsave(filename = "pgg.asympt_eff.pdf", plot = g, width = 7, height = 7)
 54 | 
 55 | 
 56 | 
 57 | 
 58 | 
 59 | 
 60 | # Plot histogram
 61 | 
 62 | load(file = "germancredit.c_chain.RData")
 63 | load(file = "germancredit.mcmc.RData")
 64 | 
 65 | idx1 <- which('Instalment.per.cent' == colnames(X))
 66 | idx2 <- which('Duration.in.Current.address' == colnames(X))
 67 | idx <- idx1
 68 | 
 69 | nclass <- 27 #27 for idx1, 24 for idx2
 70 | rng <- range(find_breaks(c_chains_, idx, nclass, k, m, lag = 1))
 71 | breaks = seq(rng[1],rng[2],length=nclass)
 72 | 
 73 | histogram1 <- histogram_c_chains(c_chains_, idx, k, m, nclass = nclass)
 74 | 
 75 | niterations <- nrow(chain)
 76 | hist_mcmc <- hist(chain[1000:niterations, idx], breaks = histogram1$breaks, plot = FALSE)
 77 | 
 78 | g1 <- plot_histogram(histogram1, with_bar = TRUE) + xlab(expression(beta)) + ylab("density")
 79 | g1 <- g1 + geom_line(aes(x = hist_mcmc$mids, y = hist_mcmc$density), colour = "red")
 80 | g1
 81 | ggsave(filename = "pgg.histogram1.pdf", plot = g1, width = 7, height = 7)
 82 | 
 83 | library(coda)
 84 | testfunction <- function(x) x[idx1]
 85 | mcmc_ <- apply(X = chain[1000:nrow(chain),,drop=F], MARGIN = 1, FUN = testfunction)
 86 | mcmcvar <- spectrum0(mcmc_)$spec
 87 | mcmcvar
 88 | 
 89 | nsamples <- length(c_chains_)
 90 | k <- 110
 91 | m <- 1100
 92 | taus <- sapply(c_chains_, FUN = function(x) x$meetingtime)
 93 | estimators <-  foreach(irep = 1:nsamples) %dorng% {
 94 |   H_bar(c_chains_[[irep]], h = testfunction, k = k, m = m)
 95 | }
 96 | v <- var(unlist(estimators))
 97 | c <- mean(2 * taus + pmax(1, m + 1 - taus))
 98 | inef <- c * v
 99 | # loss of efficiency for this choice of k and m
100 | inef / mcmcvar
101 | #
102 | 
103 | 


--------------------------------------------------------------------------------
/inst/reproducegermancredit/germancredit.preparedata.R:
--------------------------------------------------------------------------------
 1 | # German Credit Data
 2 | data <- read.csv('german_credit.csv')
 3 | Y <- data[,"Creditability"]
 4 | x.categorical <- c('Account.Balance', 'Payment.Status.of.Previous.Credit', 'Purpose', 'Value.Savings.Stocks',
 5 |                    'Length.of.current.employment', 'Sex...Marital.Status', 'Guarantors', 'Most.valuable.available.asset',
 6 |                    'Concurrent.Credits', 'Type.of.apartment', 'Occupation', 'Telephone', 'Foreign.Worker')
 7 | x.quant <- c('Duration.of.Credit..month.', 'Credit.Amount', 'Instalment.per.cent', 'Duration.in.Current.address',
 8 |              'Age..years.', 'No.of.Credits.at.this.Bank', 'No.of.dependents')
 9 | for(x in x.categorical){
10 |   data[,x] = as.factor(data[,x])
11 | }
12 | 
13 | 
14 | fmla <- paste('~',paste(c(x.quant,x.categorical),collapse ='+'))
15 | X <- model.matrix(formula(fmla), data=data)
16 | 
17 | save(Y, X, file = "germancredit.RData")
18 | 


--------------------------------------------------------------------------------
/inst/reproducegermancredit/run.all.R:
--------------------------------------------------------------------------------
 1 | # set working directory to folder containing this file
 2 | 
 3 | 
 4 | print("run various coupled MCMC")
 5 | source("germancredit.run.R")
 6 | 
 7 | print("run long MCMC")
 8 | source("germancredit.mcmc.run.R")
 9 | 
10 | print("produce plots")
11 | source("germancredit.plots.R")
12 | 
13 | 


--------------------------------------------------------------------------------
/inst/reproduceisingmodel/ising.gibbs.meetings.run.R:
--------------------------------------------------------------------------------
  1 | ### This script plays with a coupled Gibbs sampler (i.e. single site updates)
  2 | ### for a basic Ising model, with different values of the temperatures
  3 | library(unbiasedmcmc)
  4 | rm(list = ls())
  5 | set.seed(21)
  6 | #
  7 | library(doRNG)
  8 | library(doParallel)
  9 | registerDoParallel(cores = detectCores()-2)
 10 | # library(dplyr)
 11 | # library(abind)
 12 | 
 13 | # size of the grid
 14 | size <- 32
 15 | # possible values of sum of neighbors
 16 | ss_ <- c(-4,-2,0,2,4)
 17 | # inverse temperature
 18 | # beta <- 0.4
 19 | # precomputed probability for single-site flips
 20 | # proba_ <-  exp(ss_*beta) / (exp(ss_*beta) + exp(-ss_*beta))
 21 | # # initialization
 22 | # chain_state1 <- ising_rinit()
 23 | # chain_state2 <- ising_rinit()
 24 | # # MCMC loop
 25 | # niterations <- 1000
 26 | # # store history of sum of states
 27 | # sumstates1 <- rep(0, niterations)
 28 | # sumstates1[1] <- unbiasedmcmc:::ising_sum_(chain_state1)
 29 | # sumstates2 <- rep(0, niterations)
 30 | # sumstates2[1] <- unbiasedmcmc:::ising_sum_(chain_state2)
 31 | #
 32 | # for (iter in 2:niterations){
 33 | #   res_ <- ising_coupled_kernel(chain_state1, chain_state2, proba_)
 34 | #   chain_state1 <- res_$chain_state1
 35 | #   chain_state2 <- res_$chain_state2
 36 | #   sumstates1[iter] <- unbiasedmcmc:::ising_sum_(chain_state1)
 37 | #   sumstates2[iter] <- unbiasedmcmc:::ising_sum_(chain_state2)
 38 | # }
 39 | 
 40 | # matplot(cbind(sumstates1, sumstates2), type = "l")
 41 | 
 42 | ising_meeting <- function(beta, m = 1, max_iterations = Inf){
 43 |   ss_ <- c(-4,-2,0,2,4)
 44 |   proba_ <-  exp(ss_*beta) / (exp(ss_*beta) + exp(-ss_*beta))
 45 |   chain_state1 <- ising_rinit()
 46 |   chain_state2 <- ising_rinit()
 47 |   current_nsamples1 <- 1
 48 |   chain_state1 <- ising_single_kernel(chain_state1, proba_)
 49 |   current_nsamples1 <- current_nsamples1 + 1
 50 |   iter <- 1
 51 |   meet <- FALSE
 52 |   finished <- FALSE
 53 |   meetingtime <- Inf
 54 |   while (!finished && iter < max_iterations){
 55 |     iter <- iter + 1
 56 |     if (meet){
 57 |       chain_state1 <- ising_single_kernel(chain_state1, proba_)
 58 |       chain_state2 <- chain_state1
 59 |     } else {
 60 |       res_coupled_kernel <- ising_coupled_kernel(chain_state1, chain_state2, proba_)
 61 |       chain_state1 <- res_coupled_kernel$chain_state1
 62 |       chain_state2 <- res_coupled_kernel$chain_state2
 63 |       if (all(chain_state1 == chain_state2) && !meet){
 64 |         # recording meeting time tau
 65 |         meet <- TRUE
 66 |         meetingtime <- iter
 67 |       }
 68 |     }
 69 |     current_nsamples1 <- current_nsamples1 + 1
 70 |     # stop after max(m, tau) steps
 71 |     if (iter >= max(meetingtime, m)){
 72 |       finished <- TRUE
 73 |     }
 74 |   }
 75 |   return(list(meetingtime = meetingtime, iteration = iter, finished = finished))
 76 | }
 77 | 
 78 | 
 79 | nrep <- 100
 80 | # betas <- seq(from = 0.3, to = 0.55, length.out = 15)
 81 | betas <- c(0.3, 0.317857142857143, 0.335714285714286, 0.353571428571429,
 82 |   0.371428571428571, 0.389285714285714, 0.407142857142857, 0.425,
 83 |   0.442857142857143, 0.460714285714286, 0.478571428571429)
 84 | singlesite.meetings.df <- data.frame()
 85 | for (ibeta in seq_along(betas)){
 86 |   print(ibeta)
 87 |   beta <- betas[ibeta]
 88 |   ccs_ <- foreach(irep = 1:nrep) %dorng% {
 89 |     ising_meeting(beta, m = 1, max_iterations = Inf)
 90 |   }
 91 |   meetingtimes <- sapply(ccs_, function(x) x$meetingtime)
 92 |   singlesite.meetings.df <- rbind(singlesite.meetings.df,
 93 |                                   data.frame(meeting = meetingtimes, beta = rep(beta, nrep)))
 94 |   print(summary(meetingtimes))
 95 |   save(nrep, betas, singlesite.meetings.df, file = "ising.singlesite.meetings.RData")
 96 | }
 97 | 
 98 | load(file = "ising.singlesite.meetings.RData")
 99 | 
100 | # tail(singlesite.meetings.df)
101 | #
102 | # setmytheme()
103 | # g <- ggplot(singlesite.meetings.df %>% group_by(beta) %>% summarise(m = mean(meeting)),
104 | #        aes(x = beta, y = m)) + geom_line() + geom_point() + scale_y_log10()
105 | # g <- g + xlab(expression(theta)) + ylab("average meeting time")
106 | # g
107 | #
108 | # ggsave(filename = "ising.singlesite.meetings.pdf", plot = g, width = 8, height = 6)
109 | 


--------------------------------------------------------------------------------
/inst/reproduceisingmodel/ising.mcmc.run.R:
--------------------------------------------------------------------------------
 1 | ### This script plays with parallel tempering
 2 | ### for a basic Ising model, with different values of the temperatures
 3 | ### i.e. Gibbs sampler at each temperature and swap moves between chains
 4 | library(unbiasedmcmc)
 5 | rm(list = ls())
 6 | set.seed(21)
 7 | #
 8 | library(doRNG)
 9 | library(doParallel)
10 | registerDoParallel(cores = detectCores()-2)
11 | 
12 | # size of the grid
13 | size <- 32
14 | # possible values of sum of neighbors
15 | ss_ <- c(-4,-2,0,2,4)
16 | 
17 | nchains <- 16
18 | # number of iterations
19 | niterations <- 1e5
20 | # history
21 | history_sumstates <- matrix(0, ncol = nchains, nrow = niterations)
22 | # probability of doing a swap move
23 | proba_swapmove <- 0.01
24 | # inverse temperatures
25 | betas <- seq(from = 0.3, to = 0.55, length.out = nchains)
26 | # precomputed probability for single-site flips
27 | probas_ <-  sapply(betas, function(beta) exp(ss_*beta) / (exp(ss_*beta) + exp(-ss_*beta)))
28 | # initialization
29 | current_states <- ising_pt_rinit(nchains)
30 | sumstates <- unlist(lapply(current_states, unbiasedmcmc:::ising_sum_))
31 | history_sumstates[1,] <- sumstates
32 | nswap_attempts <- 0
33 | nswap_accepts <- rep(0, nchains-1)
34 | # MCMC loop
35 | for (iteration in 2:niterations){
36 |   if (iteration %% 10000 == 1) cat("iteration", iteration, "/", niterations, "\n")
37 |   res_ <- ising_pt_single_kernel(current_states, sumstates, betas, probas_, proba_swapmove)
38 |   current_states <- res_$chain_states
39 |   sumstates <- res_$sumstates
40 |   nswap_accepts <- nswap_accepts + res_$nswap_accepts
41 |   nswap_attempts <- nswap_attempts + res_$nswap_attempts
42 |   history_sumstates[iteration,] <- sumstates
43 | }
44 | save(nswap_accepts, nswap_attempts, history_sumstates, file = "ising.mcmc.RData")
45 | 
46 | # load("ising.mcmc.RData")
47 | # niterations <- nrow(history_sumstates)
48 | # # swap acceptance in %
49 | # cat(paste0(round(100*nswap_accepts/nswap_attempts, 2), " %"), "\n")
50 | # par(mfrow = c(2,1))
51 | # # traceplots
52 | # matplot(apply(history_sumstates[2000:niterations,1:5], 2, function(x) cumsum(x)/(1:(niterations-2000+1))), type = "l", ylab = "sum of states at different temperatures")
53 | # # plot estimated average natural parameters
54 | # plot(betas, colMeans(history_sumstates[2000:niterations,]), type = "l")
55 | #
56 | # par(mfrow = c(1,1))
57 | # matplot(history_sumstates[2000:1e4,1:5], type = "l")
58 | #
59 | # library(coda)
60 | # vars <- sapply(1:nchains, function(i) spectrum0(history_sumstates[1e4:niterations,i])$spec)
61 | # sd_errors <- sqrt(vars/(niterations-1e4))
62 | # sd_errors
63 | # estimates <- colMeans(history_sumstates[1e4:niterations,])
64 | # ggplot(data.frame(betas = betas, estimates = estimates, sd_errors = sd_errors),
65 | #        aes(x = betas, y = estimates, ymin = estimates-2*sd_errors, ymax=estimates+2*sd_errors)) + geom_errorbar() +
66 | #   geom_line()
67 | 


--------------------------------------------------------------------------------
/inst/reproduceisingmodel/run.all.R:
--------------------------------------------------------------------------------
 1 | ## set working directory to the directory containing this script
 2 | 
 3 | print("obtain meeting times for single-site Gibbs, on a grid of values of beta")
 4 | source("ising.gibbs.meetings.run.R")
 5 | 
 6 | print("obtain meeting times for parallel tempering, on a grid of values of beta")
 7 | source("ising.swap.meetings.run.R")
 8 | 
 9 | print("long run of parallel tempering")
10 | source("ising.mcmc.run.R")
11 | 
12 | ## to run the rest of the calculation you need to run "run.batch.ising.R" many times
13 | ## which will populate an output/ subfolder
14 | 
15 | ## See explanations in "run.batch.ising.R" and "run.odyssey.ising.sh".
16 | 
17 | ## Example of command to run the script 10 times, using GNU Parallel and 5 processors:
18 | ## parallel -j5 'R CMD BATCH --no-save "--args {1}" run.batch.ising.R out/meeting.job{1}.out'  ::: {1..10}
19 | 
20 | ## Once the batch scripts are run, we can proceed and create the plots.
21 | 
22 | print("produce plots")
23 | source("ising.plots.R")
24 | 
25 | 


--------------------------------------------------------------------------------
/inst/reproduceisingmodel/run.odyssey.ising.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -J ising_rep                           # Job name
 3 | #SBATCH -n 1                                    # Number of cores
 4 | #SBATCH -N 1                                    # All cores on one machine
 5 | #SBATCH -t 0-24:00                              # Runtime in D-HH:MM
 6 | #SBATCH -p shared                               # Partition to submit to (general, shared, serial_requeue) *************** modify this line
 7 | #SBATCH --mem-per-cpu=2000M                     # Memory pool for all cores (see also --mem-per-cpu)
 8 | #SBATCH --mail-type=END                         # Type of email notification- BEGIN,END,FAIL,ALL
 9 | #SBATCH --mail-user=username@fas.harvard.edu    # Email *************** modify this line
10 | #SBATCH --array=1-100                           # *************** modify this line
11 | 
12 | ## LOAD SOFTWARE ENV ##
13 | source new-modules.sh
14 | module load R/3.4.2-fasrc01
15 | export R_LIBS_USER=$HOME/apps/R:$R_LIBS_USER
16 | input=run.batch.ising.R           # *************** modify this line
17 | cd /n/home12/pjacob/isingmodel/   # *************** modify this line
18 | 
19 | srun R CMD BATCH --no-save "--args $SLURM_ARRAY_TASK_ID" $input out/$input.$SLURM_ARRAY_TASK_ID.out     #****** modify this line
20 | 
21 | 
22 | ### How to run the calculations on the Harvard Odyssey cluster?
23 | ### command to ssh to the cluster, e.g.
24 | # ssh username@odyssey.rc.fas.harvard.edu
25 | 
26 | ###
27 | # First, install the 'unbiasedmcmc' package on the cluster (e.g. upload zip file and R CMD INSTALL).
28 | # Do this in an interactive session, e.g.
29 | ####
30 | # srun --pty -p test -t 20 --mem 3000 /bin/bash
31 | ### to have access to R:
32 | # source new-modules.sh
33 | # module load R/3.4.2-fasrc01
34 | # export R_LIBS_USER=$HOME/apps/R:$R_LIBS_USER
35 | # R CMD INSTALL package.tar.gz
36 | 
37 | 
38 | ### commands such as the following may be useful when it comes to transferring files
39 | # scp /home/bla*bla blabla@odyssey.rc.fas.harvard.edu:~/blabla/
40 | # scp blabla@odyssey.rc.fas.harvard.edu:~/blabla/output/*RData /home/blabla/
41 | ### see more details on: https://www.rc.fas.harvard.edu/resources/documentation/copying-data-to-and-from-odyssey-using-scp/
42 | 
43 | ### Create appropriate folders (subfolders named "output" and "out" in particular).
44 | ### tune run.batch.ising.R appropriately (it should save output in output/ subfolder).
45 | 
46 | ### Make sure path names are OK in run.odyssey.ising.sh.
47 | ### choose partition, size of array (e.g. 1-500 for 500 jobs), runtime
48 | ### To start the jobs:
49 | # sbatch run.odyssey.ising.sh
50 | 
51 | ### To monitor jobs,
52 | # sacct -j JOBID
53 | ### or
54 | # squeue -j 9999999
55 | ### Or go to the website https://portal.rc.fas.harvard.edu/jobs/
56 | ### To cancel a job,
57 | # scancel 9999999
58 | 


--------------------------------------------------------------------------------
/inst/reproducepumpfailures/pumpfailures.mcmc.run.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # load packages
 3 | library(unbiasedmcmc)
 4 | setmytheme()
 5 | rm(list = ls())
 6 | set.seed(21)
 7 | registerDoParallel(cores = detectCores())
 8 | 
 9 | #
10 | ##  This example is about failures of nuclear pumps. It's classic (e.g. Example 10.17 in Robert & Casella Monte Carlo Statistical Methods)
11 | ## It's used as an example in Murdoch and Green's perfect samplers paper and also Reutter and Johnson 1995
12 | ## about using coupled chains to monitor MCMC convergence
13 | 
14 | # The data:
15 | # number of failures
16 | s <- c(5, 1, 5, 14, 3, 19, 1, 1, 4, 22)
17 | # times
18 | t <- c(94.3, 15.7, 62.9, 126, 5.24, 31.4, 1.05, 1.05, 2.1, 10.5)
19 | # the model says s_k ~ Poisson(lambda_k * t_k), for k = 1,...,10
20 | ndata <- 10
21 | # and lambda_k ~ Gamma(alpha,beta), beta ~ Gamma(gamma, delta)
22 | alpha <- 1.802
23 | gamma <- 0.01
24 | delta <- 1
25 | # full conditionasl:
26 | # lambda_k given rest: Gamma(alpha + s_k, beta + t_k)
27 | # beta given rest: Gamma(gamma + 10*alpha, delta + sum_{k=1}^10 lambda_k)
28 | 
29 | single_kernel <- function(state){
30 |   lambda <- state$chain_state[1:ndata]
31 |   beta <- state$chain_state[ndata+1]
32 |   for (k in 1:ndata){
33 |     lambda[k] <- rgamma(1, shape = alpha + s[k], rate = beta + t[k])
34 |   }
35 |   beta <- rgamma(1, shape = gamma + 10*alpha, rate = delta + sum(lambda))
36 |   return(list(chain_state = c(lambda, beta)))
37 | }
38 | 
39 | rinit <- function(){
40 |   return(list(chain_state = rep(1, ndata+1)))
41 | }
42 | 
43 | 
44 | niterations <- 5e5
45 | chain <- matrix(nrow = niterations, ncol = ndata+1)
46 | current <- rinit()
47 | chain[1,] <- current$chain_state
48 | for (iteration in 2:niterations){
49 |   current <- single_kernel(current)
50 |   chain[iteration,] <- current$chain_state
51 | }
52 | save(niterations, chain, file = "pump.mcmc.RData")
53 | # load("pump.mcmc.RData")
54 | # hist(chain[,ndata+1])
55 | 


--------------------------------------------------------------------------------
/inst/reproducepumpfailures/pumpfailures.mykland.run.R:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | 
  3 | ##  This example is about failures of nuclear pumps. It's classic (e.g. Example 10.17 in Robert & Casella Monte Carlo Statistical Methods)
  4 | ## It's used as an example in Murdoch and Green's perfect samplers paper and also Reutter and Johnson 1995
  5 | ## about using coupled chains to monitor MCMC convergence
  6 | 
  7 | # The data:
  8 | # number of failures
  9 | s <- c(5, 1, 5, 14, 3, 19, 1, 1, 4, 22)
 10 | # times
 11 | t <- c(94.3, 15.7, 62.9, 126, 5.24, 31.4, 1.05, 1.05, 2.1, 10.5)
 12 | # the model says s_k ~ Poisson(lambda_k * t_k), for k = 1,...,10
 13 | ndata <- 10
 14 | # and lambda_k ~ Gamma(alpha,beta), beta ~ Gamma(gamma, delta)
 15 | alpha <- 1.802
 16 | gamma <- 0.01
 17 | delta <- 1
 18 | # full conditionasl:
 19 | # lambda_k given rest: Gamma(alpha + s_k, beta + t_k)
 20 | # beta given rest: Gamma(gamma + 10*alpha, delta + sum_{k=1}^10 lambda_k)
 21 | 
 22 | p_D <- function(Lambda,d1,d2){
 23 |   F_d1 <- pgamma(d1,gamma+10*alpha,Lambda+delta)
 24 |   F_d2 <- pgamma(d2,gamma+10*alpha,Lambda+delta)
 25 |   return(F_d2-F_d1)
 26 | }
 27 | 
 28 | d_function <- function(Lambda, Lambda_tilde, d1, d2){
 29 |     if(Lambda<Lambda_tilde){
 30 |       return(d1)
 31 |     } else {
 32 |       return(d2)
 33 |     }
 34 | }
 35 | 
 36 | s_function <- function(Lambda, Lambda_tilde, d1, d2){
 37 |   s <- p_D(Lambda,d1,d2)
 38 |   s <- s * ((Lambda+delta)/(Lambda_tilde+delta))^(gamma+10*alpha)
 39 |   s <- s * exp((Lambda_tilde-Lambda)*d_function(Lambda, Lambda_tilde, d1, d2))
 40 |   return(s)
 41 | }
 42 | 
 43 | r_function <- function(x, y, Lambda_tilde=6.7, d1=2.35-1.1*.69, d2=2.35+1.1*.69){
 44 |   lambda_x <- x[1:ndata]
 45 |   Lambda_x = sum(lambda_x)
 46 |   beta_y <-   x[ndata+1]
 47 | 
 48 |   if(d1 <= beta_y & beta_y <= d2){
 49 |     r <- exp((Lambda_tilde - Lambda_x)*
 50 |                (d_function(Lambda_x,Lambda_tilde,d1,d2) - beta_y))
 51 |     return(r)
 52 |   } else {
 53 |     return(0)
 54 |   }
 55 | }
 56 | 
 57 | # run split Gibbs sampler
 58 | 
 59 | single_kernel <- function(current_state){
 60 |   lambda <- current_state[1:ndata]
 61 |   beta <- current_state[ndata+1]
 62 |   for (k in 1:ndata){
 63 |     lambda[k] <- rgamma(1, shape = alpha + s[k], rate = beta + t[k])
 64 |   }
 65 |   beta <- rgamma(1, shape = gamma + 10*alpha, rate = delta + sum(lambda))
 66 |   return(c(lambda, beta))
 67 | }
 68 | 
 69 | rinit <- function(){
 70 |   return(rep(1, ndata+1))
 71 | }
 72 | 
 73 | n_rep = 1000
 74 | 
 75 | nt_vec = rep(NA,n_rep)
 76 | tl_vec = c()
 77 | beta_hat_vec = rep(NA,n_rep)
 78 | 
 79 | for(jj in 1:n_rep){
 80 |   print(jj)
 81 |   niterations <- 5000
 82 |   chain <- matrix(nrow = niterations, ncol = ndata+1)
 83 |   chain[1,] <- rinit()
 84 |   for (iteration in 2:niterations){
 85 |     chain[iteration,] <- single_kernel(chain[iteration-1,])
 86 |   }
 87 | 
 88 |   S_vec = rep(0, niterations)
 89 |   for(iteration in 1:(niterations-1)){
 90 |     r_xy = r_function(chain[iteration,],chain[iteration+1,])
 91 |     S_vec[[iteration]] <- rbinom(1, 1, r_xy)
 92 |   }
 93 | 
 94 |   T_vec <- which(S_vec==1)
 95 |   n_tours <- length(T_vec)-1
 96 |   t_length = T_vec[2:length(T_vec)] - T_vec[1:(length(T_vec)-1)]
 97 | 
 98 |   tl_vec = c(tl_vec,t_length)
 99 |   nt_vec[jj] = n_tours
100 | 
101 |   wh0 = T_vec[[1]]+1
102 |   wh1 = T_vec[[length(T_vec)]]
103 |   est_n = sum(chain[wh0:wh1,ncol(chain),drop=FALSE])
104 |   est_d = wh1-wh0+1
105 |   beta_hat_vec[jj] = est_n/est_d
106 | }
107 | 
108 | mean(tl_vec)
109 | mean(nt_vec)
110 | mean(beta_hat_vec)
111 | format(var(beta_hat_vec),scientific = TRUE)
112 | (5000*var(beta_hat_vec))^(-1)
113 | 
114 | 
115 | Y_chain <- matrix(nrow = n_tours, ncol = ndata+1)
116 | N_vec <- rep(NA, n_tours)
117 | 
118 | #SRQ plot
119 | #plot((1:(n_tours+1))/(n_tours+1),T_vec/tail(T_vec,1),type='l')
120 | 
121 | # estimators
122 | wh_cv = c()
123 | for(i in 1:n_tours){
124 |   wh <- (T_vec[[i]]+1):T_vec[[i+1]]
125 |   wh_cv = c(wh_cv,wh)
126 |   Y_chain[i,] <- colSums(chain[wh,,drop=FALSE])
127 |   N_vec[[i]] <- length(wh)
128 | }
129 | 
130 | #length(wh0:wh1)
131 | #length(wh_cv)
132 | 
133 | theta_hat <- colSums(Y_chain)/sum(N_vec)
134 | sigma2_hat <- rep(NA, ndata+1)
135 | for(i in 1:(ndata+1)){
136 |   sigma2_hat[[i]] = mean((Y_chain[,i] - theta_hat[i]*N_vec)^2)/mean(N_vec)^2
137 | }
138 | cbind(theta_hat,sigma2_hat)
139 | 
140 | 


--------------------------------------------------------------------------------
/inst/reproducepumpfailures/pumpfailures.run.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # load packages
 3 | library(unbiasedmcmc)
 4 | setmytheme()
 5 | rm(list = ls())
 6 | set.seed(21)
 7 | library(doParallel)
 8 | library(doRNG)
 9 | registerDoParallel(cores = detectCores())
10 | 
11 | #
12 | ##  This example is about failures of nuclear pumps. It's classic (e.g. Example 10.17 in Robert & Casella Monte Carlo Statistical Methods)
13 | ## It's used as an example in Murdoch and Green's perfect samplers paper and also Reutter and Johnson 1995
14 | ## about using coupled chains to monitor MCMC convergence
15 | 
16 | # The data:
17 | # number of failures
18 | s <- c(5, 1, 5, 14, 3, 19, 1, 1, 4, 22)
19 | # times
20 | t <- c(94.3, 15.7, 62.9, 126, 5.24, 31.4, 1.05, 1.05, 2.1, 10.5)
21 | # the model says s_k ~ Poisson(lambda_k * t_k), for k = 1,...,10
22 | ndata <- 10
23 | # and lambda_k ~ Gamma(alpha,beta), beta ~ Gamma(gamma, delta)
24 | alpha <- 1.802
25 | gamma <- 0.01
26 | delta <- 1
27 | # full conditionasl:
28 | # lambda_k given rest: Gamma(alpha + s_k, beta + t_k)
29 | # beta given rest: Gamma(gamma + 10*alpha, delta + sum_{k=1}^10 lambda_k)
30 | 
31 | single_kernel <- function(state){
32 |   lambda <- state$chain_state[1:ndata]
33 |   beta <- state$chain_state[ndata+1]
34 |   for (k in 1:ndata){
35 |     lambda[k] <- rgamma(1, shape = alpha + s[k], rate = beta + t[k])
36 |   }
37 |   beta <- rgamma(1, shape = gamma + 10*alpha, rate = delta + sum(lambda))
38 |   return(list(chain_state = c(lambda, beta)))
39 | }
40 | 
41 | rinit <- function(){
42 |   return(list(chain_state = rep(1, ndata+1)))
43 | }
44 | 
45 | coupled_kernel <- function(state1, state2){
46 |   lambda1 <- state1$chain_state[1:ndata]
47 |   beta1 <- state1$chain_state[ndata+1]
48 |   lambda2 <- state2$chain_state[1:ndata]
49 |   beta2 <- state2$chain_state[ndata+1]
50 |   identical_components <- logical(length = ndata + 1)
51 |   for (k in 1:ndata){
52 |     x <- rgamma_coupled(alpha1 = alpha + s[k], alpha2 = alpha + s[k],
53 |                         beta1 = beta1 + t[k], beta2 = beta2 + t[k])
54 |     lambda1[k] <- x$xy[1]
55 |     lambda2[k] <- x$xy[2]
56 |     identical_components[k] <- x$identical
57 |   }
58 |   x <- rgamma_coupled(alpha1 = gamma + 10*alpha, alpha2 = gamma + 10*alpha,
59 |                       beta1 = delta + sum(lambda1), beta2 = delta + sum(lambda2))
60 |   beta1 <- x$xy[1]
61 |   beta2 <- x$xy[2]
62 |   identical_components[ndata+1] <- x$identical
63 |   chain_state1 = c(lambda1, beta1); chain_state2 = c(lambda2, beta2)
64 |   identical_ <- all(identical_components)
65 |   return(list(state1 = list(chain_state = chain_state1), state2 = list(chain_state = chain_state2), identical = identical_))
66 | }
67 | 
68 | k <- 7
69 | m <- 10*7
70 | 
71 | nsamples <- 1000
72 | c_chains_ <-  foreach(irep = 1:nsamples) %dorng% {
73 |   sample_coupled_chains(single_kernel, coupled_kernel, rinit, m=m)
74 | }
75 | save(nsamples, k, m, c_chains_, file = "pump.c_chain.RData")
76 | 
77 | meetingtime <- sapply(c_chains_, function(x) x$meetingtime)
78 | table(meetingtime)
79 | 


--------------------------------------------------------------------------------
/inst/reproducepumpfailures/run.all.R:
--------------------------------------------------------------------------------
 1 | ## set working directory to the directory containing this script
 2 | 
 3 | print("runs on pump failure example")
 4 | 
 5 | print("generate estimators for different k and m")
 6 | source("pumpfailures.tuning.run.R")
 7 | 
 8 | print("generate estimators")
 9 | source("pumpfailures.run.R")
10 | 
11 | print("generate long MCMC run")
12 | source("pumpfailures.mcmc.run.R")
13 | 
14 | print("regeneration approach")
15 | source("pumpfailures.mykland.run.R")
16 | 
17 | print("produce plots and tables")
18 | source("pumpfailures.plots.R")
19 | 


--------------------------------------------------------------------------------
/inst/reproducescalingdimension/run.all.R:
--------------------------------------------------------------------------------
 1 | ## set working directory to the directory containing this script
 2 | 
 3 | print("runs on scaling of meeting times with respect to the dimension of a Normal target")
 4 | 
 5 | print("produce random walk MH with maximal coupling runs")
 6 | source("scaling.rwmh.maximalcoupling.R")
 7 | print("produce random walk MH with reflection-maximal coupling runs")
 8 | source("scaling.rwmh.reflectionmaxcoupling.R")
 9 | print("produce Gibbs sampler runs")
10 | source("scaling.gibbs.R")
11 | print("produce Hamiltonian Monte Carlo runs")
12 | source("scaling.hmc.R")
13 | print("produce plots")
14 | source("scaling.plots.R")
15 | 


--------------------------------------------------------------------------------
/inst/reproducescalingdimension/scaling.plots.R:
--------------------------------------------------------------------------------
 1 | # load packages
 2 | library(unbiasedmcmc)
 3 | library(dplyr)
 4 | setmytheme()
 5 | rm(list = ls())
 6 | set.seed(21)
 7 | #
 8 | 
 9 | load(file = "scalingdimension.rwmh.maxcoupling.RData")
10 | df
11 | g <- ggplot(df %>% filter(target_type == "dense"), aes(x = d, y = mean_time, group = init_type, linetype = factor(init_type))) + geom_line() + ylab("average meeting time")
12 | g <- g + scale_linetype("initialization:") + scale_x_continuous(breaks = sort(unique((df %>% filter(target_type == "dense"))$d)))
13 | g <- g + scale_y_log10(breaks = c(10,100,1e3, 1e4)) + xlab("dimension")
14 | g
15 | ggsave(filename = "scalingdimension.rwmh.maxcoupling.pdf", plot = g, width = 8, height = 6)
16 | 
17 | load(file = "scalingdimension.rwmh.reflmaxcoupling.RData")
18 | g <- ggplot(df, aes(x = d, y = mean_time, group = init_type, linetype = init_type)) + geom_line() + ylab("average meeting time")
19 | g <- g + scale_x_continuous(breaks = sort(unique((df %>% filter(target_type == "dense"))$d)))
20 | g <- g + xlab("dimension") + scale_linetype("initialization:")
21 | g
22 | ggsave(filename = "scalingdimension.rwmh.reflmaxcoupling.pdf", plot = g, width = 8, height = 6)
23 | 
24 | load(file = "scalingdimension.gibbs.sparse.RData")
25 | df.sparse <- df
26 | load("scalingdimension.gibbs.dense.RData")
27 | df.dense <- df
28 | 
29 | g <- ggplot(df.sparse, aes(x = d, y = mean_time, group = init_type, linetype = factor(init_type))) + geom_line() + ylab("average meeting time")
30 | g <- g + scale_linetype("initialization:") + scale_x_continuous(breaks = sort(unique(df.sparse$d))) + xlab("dimension")
31 | g
32 | ggsave(filename = "scalingdimension.gibbs.sparse.pdf", plot = g, width = 8, height = 6)
33 | 
34 | g <- ggplot(df.dense, aes(x = d, y = median_time, group = init_type, linetype = factor(init_type))) + geom_line() + ylab("median meeting time")
35 | g <- g + scale_linetype("initialization:") + scale_x_continuous(breaks = sort(unique(df.dense$d))) + xlab("dimension")
36 | # g <- g + scale_y_log10(breaks = c(1e1, 1e2, 1e3, 1e4, 1e5))
37 | g
38 | ggsave(filename = "scalingdimension.gibbs.dense.pdf", plot = g, width = 8, height = 6)
39 | 
40 | load("scalingdimension.wishart.hmc.meetings.RData")
41 | df.summary <- df %>% group_by(dimension, init_type) %>% summarise(mean_time = mean(meetingtimes))
42 | g <- ggplot(df.summary, aes(x = dimension, y = mean_time, group = init_type, linetype = factor(init_type))) + geom_line() + ylab("average meeting time")
43 | g <- g + scale_linetype("initialization:") + scale_x_continuous(breaks = sort(unique(df.summary$dimension))) + xlab("dimension") + ylim(0, 60)
44 | g
45 | 
46 | ggsave(filename = "scalingdimension.hmc.dense.pdf", plot = g, width = 8, height = 6)
47 | 
48 | 


--------------------------------------------------------------------------------
/inst/reproducevarselection/run.all.R:
--------------------------------------------------------------------------------
 1 | ## set working directory to the directory containing this script
 2 | 
 3 | 
 4 | print("generate synthetic data sets")
 5 | source("varselection.generatedata.run.R")
 6 | 
 7 | print("generate long MCMC chain")
 8 | source("varselection.mcmc.run.R")
 9 | 
10 | 
11 | ## The rest requires running batch scripts.
12 | 
13 | ## The tables were obtained by running varselection.script.R using GNU parallel.
14 | ## The script takes 8 arguments:
15 | # JOB_ID
16 | # NRUNS
17 | # design  # 0 for independent design, 1 for correlated
18 | # n       # number of rows
19 | # p       # number of columns
20 | # SNR     # Signal to noise ratio, e.g. 0.5, 1, or 2
21 | # k       # set to zero if only interested in meeting times
22 | # m       # set to zero if only interested in meeting times
23 | 
24 | # for instance to run the script on 10 machines, 10 times on each machine,
25 | # with either design=0 and design=1, with n=500, with p=1000 and p=5000, and with k=m=0
26 | 
27 | # parallel -j10 'Rscript varselection.script.R {}' ::: {1..10} ::: 10 ::: {0,1} ::: 500 ::: {1000,5000} ::: {0.5,1,2} ::: 0 ::: 0
28 | 
29 | ## Once this has been run with the desired values of n and p
30 | print("producing tables")
31 | source("varselection.tables.R")
32 | 
33 | ## The figures were obtained as follows.
34 | 
35 | ## For the impact of dimension
36 | print("producing results on the impact of dimension")
37 | source("varselection.differentp.R")
38 | source("varselection.differentp.plots.R")
39 | 
40 | 
41 | ## For the impact of the hyperparameter kappa
42 | ## First, the script "varselection.clusterscript.kappas.R" was run on a cluster.
43 | ## This was done by creating a script, say "run.odyssey.varselection.kappas.sh", which contains the following lines
44 | 
45 | # #!/bin/bash
46 | # #SBATCH -J varselection_rep                     # Job name
47 | # #SBATCH -n 1                                    # Number of cores
48 | # #SBATCH -N 1                                    # All cores on one machine
49 | # #SBATCH -t 0-24:00                              # Runtime in D-HH:MM
50 | # #SBATCH -p shared                               # Partition to submit to (general, shared, serial_requeue)
51 | # #SBATCH --mem-per-cpu=2000M                     # Memory pool for all cores (see also --mem-per-cpu)
52 | # #SBATCH --mail-type=END                         # Type of email notification- BEGIN,END,FAIL,ALL
53 | # #SBATCH --mail-user=username@provider.com       # Email
54 | # #SBATCH --array=1-600                           # Requesting 100 jobs
55 | #
56 | # ## LOAD SOFTWARE ENV ##
57 | # source new-modules.sh
58 | # module load R/3.4.2-fasrc01
59 | # export R_LIBS_USER=$HOME/apps/R:$R_LIBS_USER
60 | # input=varselection.differentkapps.R                                  #****** modify this line
61 | # cd /n/home12/pjacob/                                     #****** modify this line
62 | #
63 | # srun R CMD BATCH --no-save $input out/$input.$SLURM_ARRAY_TASK_ID.out     #****** modify this line
64 | 
65 | 
66 | ## and then running in a command line
67 | ## sbatch run.odyssey.varselection.kappas.sh
68 | 
69 | ## Once these are produced, it remains to run long MCMC chains for comparison,
70 | ## and to produce the plots. This is done as follows.
71 | print("producing results on the impact of hyperparameter kappa")
72 | source("varselection.differentkappas.R")
73 | source("varselection.differentkappas.plots.R")
74 | 


--------------------------------------------------------------------------------
/inst/reproducevarselection/varselection.differentp.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | setmytheme()
 3 | rm(list = ls())
 4 | set.seed(21)
 5 | #
 6 | library(doRNG)
 7 | library(doParallel)
 8 | library(tidyr)
 9 | library(viridis)
10 | library(dplyr)
11 | registerDoParallel(cores = detectCores()-2)
12 | #
13 | 
14 | # later put n = 500 and p = 1000
15 | n <- 500
16 | SNR <- 1
17 | #
18 | # load data
19 | load(paste0("varselection.dataSNR", SNR, ".RData"))
20 | X_full <- X
21 | Y_full <- Y
22 | kappa <- 2
23 | # subset data to desired size
24 | #
25 | s0 <- 100
26 | proportion_singleflip <- 0.5
27 | 
28 | nrep <- 1000
29 | nps <- 5
30 | ps <- c(100, 250, 500, 750, 1000)
31 | df.ue <- data.frame()
32 | meetingsfilepath <- paste0("varselection.meetings.differentp.n", n, ".RData")
33 | 
34 | for (ip in seq_along(ps)){
35 |   print(ip)
36 |   p <- ps[ip]
37 |   Y <- Y_full[1:n]
38 |   X <- X_full[1:n,1:p]
39 |   Y2 <- (t(Y) %*% Y)[1,1]
40 |   g <- p^3
41 |   # load model
42 |   vs <- get_variableselection(Y,X,g,kappa,s0,proportion_singleflip)
43 |   prior <- vs$prior
44 |   marginal_likelihood <- vs$marginal_likelihood
45 |   rinit <- vs$rinit
46 |   single_kernel <- vs$single_kernel
47 |   coupled_kernel <- vs$coupled_kernel
48 |   unbiasedestimator <- vs$unbiasedestimator
49 |   # Unbiased MCMC
50 |   ues <- foreach(i = 1:nrep) %dorng% {
51 |     unbiasedestimator(single_kernel, coupled_kernel, rinit, h = function(x) x, k = 0, m = 0)
52 |   }
53 |   meetings <- sapply(ues, function(x) x$meetingtime)
54 |   df.ue <- rbind(df.ue, data.frame(irep = 1:nrep, ip = rep(ip, nrep), p = rep(p, nrep), meetings = meetings))
55 |   save(df.ue, file = meetingsfilepath)
56 | }
57 | 
58 | load(meetingsfilepath)
59 | df.summary <- df.ue %>% group_by(ip,p) %>% summarise(m = mean(meetings)) %>% mutate(scaledm = m/p)
60 | 
61 | g <- ggplot(df.ue, aes(x = p, y = meetings/p, group = p)) + geom_violin() + ylab("meeting times / p")
62 | g <- g + scale_x_continuous(breaks = ps)
63 | g
64 | 
65 | ggsave(filename = "varselection.meetings.differentp.pdf", plot = g, width = 8, height = 6)
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/inst/reproducevarselection/varselection.differentp.plots.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | setmytheme()
 3 | rm(list = ls())
 4 | set.seed(21)
 5 | #
 6 | library(doRNG)
 7 | library(doParallel)
 8 | library(tidyr)
 9 | library(viridis)
10 | library(dplyr)
11 | registerDoParallel(cores = detectCores()-2)
12 | #
13 | 
14 | # later put n = 500 and p = 1000
15 | n <- 500
16 | SNR <- 1
17 | #
18 | # load data
19 | load(paste0("varselection.dataSNR", SNR, ".RData"))
20 | X_full <- X
21 | Y_full <- Y
22 | kappa <- 2
23 | # subset data to desired size
24 | #
25 | s0 <- 100
26 | proportion_singleflip <- 0.5
27 | 
28 | nrep <- 1000
29 | nps <- 5
30 | ps <- c(100, 250, 500, 750, 1000)
31 | df.ue <- data.frame()
32 | meetingsfilepath <- paste0("varselection.meetings.differentp.n", n, ".RData")
33 | 
34 | load(meetingsfilepath)
35 | df.summary <- df.ue %>% group_by(ip,p) %>% summarise(m = mean(meetings)) %>% mutate(scaledm = m/p)
36 | 
37 | g <- ggplot(df.ue, aes(x = p, y = meetings/p, group = p)) + geom_violin() + ylab("meeting times / p")
38 | g <- g + scale_x_continuous(breaks = ps)
39 | g
40 | 
41 | ggsave(filename = "varselection.meetings.differentp.pdf", plot = g, width = 8, height = 6)
42 | 


--------------------------------------------------------------------------------
/inst/reproducevarselection/varselection.generatedata.run.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | rm(list = ls())
 3 | set.seed(1)
 4 | # registerDoParallel(cores = detectCores())
 5 | 
 6 | # simulate data
 7 | n <- 1000
 8 | p <- 5000
 9 | for (SNR in c(0.5, 1, 2)){
10 |   s_star <- 10
11 |   s0 <- 100
12 |   sigma0 <- 1
13 |   beta_star <- SNR * sqrt(sigma0^2 * log(p) / n) * c(2,-3,2,2,-3,3,-2,3,-2,3, rep(0, p-10))
14 |   # independent design
15 |   X <- matrix(rnorm(n * p), nrow = n, ncol = p) # fast_rmvnorm_chol(n, rep(0, p), diag(1, p, p))
16 |   X <- scale(X)
17 |   Y <- X %*% matrix(beta_star, ncol = 1) + rnorm(n, 0, sigma0)
18 |   Y <- scale(Y)
19 | 
20 |   save(X, Y, beta_star, file = paste0("varselection.dataSNR", SNR, ".RData"))
21 | }
22 | # correlated design
23 | n <- 1000
24 | p <- 5000
25 | for (SNR in c(0.5, 1, 2)){
26 |   s_star <- 10
27 |   s0 <- 100
28 |   sigma0 <- 1
29 |   beta_star <- SNR * sqrt(sigma0^2 * log(p) / n) * c(2,-3,2,2,-3,3,-2,3,-2,3, rep(0, p-10))
30 |   covariance <- matrix(0, nrow = p, ncol = p)
31 |   for (i in 1:p){
32 |     for (j in 1:p){
33 |       covariance[i,j] <- exp(-abs(i-j))
34 |     }
35 |   }
36 | 
37 |   X <- fast_rmvnorm(n, mean = rep(0, p), covariance)
38 |   # X <- matrix(rnorm(n * p), nrow = n, ncol = p) # fast_rmvnorm_chol(n, rep(0, p), diag(1, p, p))
39 |   X <- scale(X)
40 |   Y <- X %*% matrix(beta_star, ncol = 1) + rnorm(n, 0, sigma0)
41 |   Y <- scale(Y)
42 | 
43 |   save(X, Y, beta_star, file = paste0("varselection.data.correlatedSNR", SNR, ".RData"))
44 | }
45 | 


--------------------------------------------------------------------------------
/inst/reproducevarselection/varselection.mcmc.run.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | # setmytheme()
 3 | rm(list = ls())
 4 | set.seed(1)
 5 | 
 6 | # file paths
 7 | n <- 500
 8 | SNR <- 1
 9 | p <- 1000
10 | #
11 | load(paste0("varselection.dataSNR", SNR, ".RData"))
12 | 
13 | Y <- Y[1:n]
14 | X <- X[1:n,1:p]
15 | Y2 <- (t(Y) %*% Y)[1,1]
16 | g <- p^3
17 | 
18 | s0 <- 100
19 | kappa <- .1
20 | proportion_singleflip <- 0.5
21 | 
22 | vs <- get_variableselection(Y,X,g,kappa,s0,proportion_singleflip)
23 | 
24 | prior <- vs$prior
25 | marginal_likelihood <- vs$marginal_likelihood
26 | rinit <- vs$rinit
27 | single_kernel <- vs$single_kernel
28 | 
29 | nmcmc <- 2e4
30 | 
31 | current_gamma <- rinit()
32 | current_pdf <- marginal_likelihood(current_gamma) + prior(current_gamma)
33 | chain <- matrix(nrow = nmcmc, ncol = p)
34 | pdfs <- rep(0, nmcmc)
35 | chain[1,] <- current_gamma
36 | pdfs[1] <- current_pdf
37 | for (imcmc in 2:nmcmc){
38 |   result <- single_kernel(current_gamma, current_pdf)
39 |   current_gamma <- result$state
40 |   current_pdf <- result$pdf
41 |   chain[imcmc,] <- current_gamma
42 |   pdfs[imcmc] <- current_pdf
43 | }
44 | #
45 | 
46 | plot(pdfs, type = "l")
47 | 
48 | burnin <- nmcmc/2
49 | 
50 | postburnin <- chain[burnin:nmcmc,]
51 | postmean <- colMeans(postburnin)
52 | 
53 | # plot(postmean, ylim = c(0,1), type = "l")
54 | plot(postmean[1:20], ylim = c(0,1), type = "b")
55 | print(postmean[1:20])
56 | 
57 | library(coda)
58 | mcmcvar <- spectrum0(postburnin[,1:10])
59 | mcmcvar
60 | # save(nmcmc, burnin, pdfs, postmean, mcmcvar, file = "varselection.SNR1.n500.p1000.mcmc.RData")
61 | #
62 | 


--------------------------------------------------------------------------------
/inst/reproducevarselection/varselection.script.R:
--------------------------------------------------------------------------------
 1 | # Script that sets an initial seed using rlecuyer's package
 2 | # and compute a number of estimators using that seed
 3 | #
 4 | arguments <- commandArgs(TRUE)
 5 | if (length(arguments)!=8){
 6 |   cat("needs 8 integers as arguments; JOB_ID, NRUNS, design, n, p, SNR, k, m")
 7 |   q()
 8 | }
 9 | 
10 | ###
11 | # arguments <- c("1", "10", "0", 1000", "500", "2", "0", "0")
12 | ### means JOB_ID=1, NRUNS=10, design=0, n=1000, p=500, SNR=2, k=0, m=0
13 | 
14 | JOB_ID <- as.numeric(arguments[1])
15 | NRUNS <- as.numeric(arguments[2]) # number of runs for this job
16 | design <- as.numeric(arguments[3]) # 0 for independent design, 1 for correlated
17 | n <- as.numeric(arguments[4])
18 | p <- as.numeric(arguments[5])
19 | SNR <- as.numeric(arguments[6])
20 | k <- as.numeric(arguments[7])
21 | m <- as.numeric(arguments[8])
22 | 
23 | # file paths
24 | workingdirectory <- ""
25 | resultpath <- paste0("varselection.design", design, ".SNR", SNR, ".n", n, ".p", p, ".k", k, ".m", m, ".job", JOB_ID, ".RData")
26 | #
27 | setwd(workingdirectory)
28 | # load packages
29 | library(rlecuyer)
30 | library(unbiasedmcmc)
31 | setmytheme()
32 | # initial seed
33 | .lec.SetPackageSeed(c(42, 66, 101, 123454, 7, 54321))
34 | nstream <- 1000 # number larger than total # of processors expected to run this script
35 | stream.names <- paste(1:nstream)
36 | .lec.CreateStream(stream.names)
37 | .lec.CurrentStream(paste(JOB_ID))
38 | 
39 | ### beginning of job
40 | ### load data (generated by "varselection.generatedata.R")
41 | 
42 | if (design == 0){
43 |   load(paste0("varselection.dataSNR", SNR, ".RData"))
44 | } else {
45 |   load(paste0("varselection.data.correlatedSNR", SNR, ".RData"))
46 | }
47 | Ysub <- Y[1:n]
48 | Xsub <- X[1:n,1:p]
49 | Y2 <- (t(Ysub) %*% Ysub)[1,1]
50 | g <- p^3
51 | s0 <- 100
52 | kappa <- 2
53 | proportion_singleflip <- 0.5
54 | 
55 | vs <- get_variableselection(Ysub,Xsub,g,kappa,s0,proportion_singleflip)
56 | 
57 | # load functions to generate meeting times
58 | prior <- vs$prior
59 | marginal_likelihood <- vs$marginal_likelihood
60 | rinit <- vs$rinit
61 | single_kernel <- vs$single_kernel
62 | coupled_kernel <- vs$coupled_kernel
63 | coupled_chains <- vs$coupled_chains
64 | unbiasedestimator <- vs$unbiasedestimator
65 | 
66 | result <- list()
67 | for (irun in 1:NRUNS){
68 |   cat("Run #", irun, "\n")
69 |   ue <- unbiasedestimator(single_kernel, coupled_kernel, rinit, h = function(x) x, k = k, m = m)
70 |   result[[irun]] <- list(irun = irun, JOB_ID = JOB_ID,
71 |                          mcmcestimator = ue$mcmcestimator, correction = ue$correction,
72 |                          uestimator = ue$uestimator, meetingtime = ue$meetingtime, niterations = ue$iteration)
73 |   save(result, file = resultpath)
74 | }
75 | .lec.CurrentStreamEnd()
76 | 


--------------------------------------------------------------------------------
/inst/reproducevarselection/varselection.tables.R:
--------------------------------------------------------------------------------
 1 | library(unbiasedmcmc)
 2 | # setmytheme()
 3 | library(dplyr)
 4 | library(tidyr)
 5 | rm(list = ls())
 6 | set.seed(1)
 7 | 
 8 | # the function gets the results from all the jobs matching the
 9 | # given parameters
10 | get_results <- function(design, SNR, n, p, k){
11 |   resultsfiles <- list.files(pattern = paste0("varselection.design", design, ".SNR", SNR, ".n", n, ".p", p, ".k", k, ".*"))
12 |   results <- list()
13 |   iresult <- 1
14 |   resultsfiles
15 |   nfiles <- length(resultsfiles)
16 |   for (ifile in 1:nfiles){
17 |     load(resultsfiles[ifile])
18 |     cat("file", ifile, "/", nfiles, ", containing", length(result), "runs\n")
19 |     for (ires in 1:length(result)){
20 |       results[[iresult]] <- result[[ires]]
21 |       iresult <- iresult + 1
22 |     }
23 |   }
24 |   return(results)
25 | }
26 | #
27 | 
28 | designs <- c(0,1)
29 | SNRs <- c(0.5, 1, 2)
30 | ns <- c(500,1000)
31 | ps <- c(1000, 5000)
32 | k <- 0
33 | df_ <- data.frame()
34 | for (design in designs){
35 |   for (SNR in SNRs){
36 |     cat("SNR=", SNR, "\n")
37 |     for (n in ns){
38 |       for (p in ps){
39 |         cat("p=", p, "\n")
40 |         results <- get_results(design, SNR, n, p, k)
41 |         meetings <- sapply(results, function(x) x$meetingtime)
42 |         df_ <- rbind(df_, data.frame(design = design, SNR = SNR, n = n, p = p, meanmeeting = mean(meetings), sdmeeting = sd(meetings), nrep = length(meetings)))
43 |       }
44 |     }
45 |   }
46 | }
47 | 
48 | df_$sderror <- df_$sdmeeting/sqrt(df_$nrep)
49 | 
50 | table.independent <- df_ %>% filter(design == 0) %>% select(SNR,n,p,meanmeeting) %>% arrange(n,SNR,p) %>%
51 |   spread(SNR, meanmeeting) %>% setNames(c("n", "p", "SNR = 0.5", "SNR = 1", "SNR = 2"))
52 | 
53 | library(xtable)
54 | cap <- "bla"
55 | formatted.df <- xtable(table.independent, digits = 0, caption = cap)
56 | formatted.df
57 | 
58 | 
59 | table.correlated <- df_ %>% filter(design == 1) %>% select(SNR,n,p,meanmeeting) %>% arrange(n,SNR,p) %>%
60 |   spread(SNR, meanmeeting) %>% setNames(c("n", "p", "SNR = 0.5", "SNR = 1", "SNR = 2"))
61 | formatted.df <- xtable(table.correlated, digits = 0, caption = cap)
62 | formatted.df
63 | 
64 | 


--------------------------------------------------------------------------------
/man/H_bar.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/H_bar.R
 3 | \name{H_bar}
 4 | \alias{H_bar}
 5 | \title{Compute unbiased estimators from coupled chains}
 6 | \usage{
 7 | H_bar(c_chains, h = function(x) x, k = 0, m = 1)
 8 | }
 9 | \arguments{
10 | \item{c_chains}{A list containing coupled chains generated by \code{\link{sample_coupled_chains}}.}
11 | 
12 | \item{h}{A test function of interest, which should take a chain state ("chain_state" entry of the output of "rinit", for instance)
13 | and return a numeric vector}
14 | 
15 | \item{k}{An integer at which to start computing the unbiased estimator; should be less than m}
16 | 
17 | \item{m}{A time horizon: should be less than the length of the chains; typically the same
18 | m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value}
19 | }
20 | \value{
21 | A value (or vector of values) of an unbiased estimator of \eqn{\int h(x) \pi(x) dx}
22 | }
23 | \description{
24 | Compute unbiased estimators based on coupled chains.
25 | Presumably generated via \code{\link{sample_coupled_chains}}.
26 | 
27 | The test function h should take a "chain_state" as argument and return a numeric vector.
28 | The estimand of interest is \eqn{\int h(x) \pi(x) dx}, where \eqn{\pi} is the invariant distribution
29 | of the chains.
30 | 
31 | The lag is inferred from the coupled chains, so there's no argument to specify it.
32 | }
33 | 


--------------------------------------------------------------------------------
/man/c_chains_to_dataframe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/c_chains_to_dataframe.R
 3 | \name{c_chains_to_dataframe}
 4 | \alias{c_chains_to_dataframe}
 5 | \title{Obtain data frame representation of measure from list of coupled chains}
 6 | \usage{
 7 | c_chains_to_dataframe(c_chains, k, m, dopar = FALSE, prune = TRUE)
 8 | }
 9 | \arguments{
10 | \item{c_chains}{A list containing coupled chains generated by \code{\link{sample_coupled_chains}}.}
11 | 
12 | \item{k}{An integer at which to start computing the signed measure; should be less than m}
13 | 
14 | \item{m}{A time horizon: should be less than the length of the chains; typically the same
15 | m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value}
16 | 
17 | \item{dopar}{Boolean (default to FALSE) indicating whether to perform calculation using registered parallel cores}
18 | }
19 | \value{
20 | A data frame
21 | }
22 | \description{
23 | From coupled chains,
24 | presumably generated via \code{\link{sample_coupled_chains}},
25 | and a choice of integers k and m, the function constructs
26 | a data frame representation of an empirical signed measure, i.e.
27 | 
28 |  \deqn{\hat{\pi}(dx) = \sum_{n=1}^N \omega_n \delta_{Z_n}(dx)}
29 | 
30 | The function returns a data frame with first column "rep" indicating index of coupled chain,
31 | second column "MCMC" indicating whether atom is part of the "MCMC" part of the signed measure (1) or the bias correction part (0)
32 | third column "weight" indicating weights,
33 | remaining columns "atom.1", "atom.2", etc containing components of the atoms
34 | }
35 | 


--------------------------------------------------------------------------------
/man/c_chains_to_measure_as_list.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/c_chains_to_measure_as_list.R
 3 | \name{c_chains_to_measure_as_list}
 4 | \alias{c_chains_to_measure_as_list}
 5 | \title{Obtain empirical measure (as list) from coupled chains}
 6 | \usage{
 7 | c_chains_to_measure_as_list(c_chains, k, m)
 8 | }
 9 | \arguments{
10 | \item{c_chains}{A list containing coupled chains generated by \code{\link{sample_coupled_chains}}.}
11 | 
12 | \item{k}{An integer at which to start computing the signed measure; should be less than m}
13 | 
14 | \item{m}{A time horizon: should be less than the length of the chains; typically the same
15 | m that was used in the call to \code{\link{sample_coupled_chains}}, or a smaller value}
16 | }
17 | \value{
18 | A list with "weights" and "atoms"
19 | }
20 | \description{
21 | From coupled chains,
22 | presumably generated via \code{\link{sample_coupled_chains}},
23 | and a choice of integers k and m, the function constructs
24 | a representation of an empirical signed measure, i.e.
25 | 
26 |  \deqn{\hat{\pi}(dx) = \sum_{n=1}^N \omega_n \delta_{Z_n}(dx)}
27 | 
28 | The function returns the weights \eqn{\omega} and the atoms \eqn{Z},
29 | in a list with two entries, "weights" and "atoms".
30 | }
31 | 


--------------------------------------------------------------------------------
/man/dinversegamma.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/invgamma_couplings.R
 3 | \name{dinversegamma}
 4 | \alias{dinversegamma}
 5 | \title{compute log-density of inverse gamma}
 6 | \usage{
 7 | dinversegamma(x, alpha, beta)
 8 | }
 9 | \description{
10 | at x > 0 and with given parameters alpha, beta, given by
11 |  alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x
12 | }
13 | 


--------------------------------------------------------------------------------
/man/dinvgaussian.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/invgaussian_couplings.R
 3 | \name{dinvgaussian}
 4 | \alias{dinvgaussian}
 5 | \title{Log-density of inverse Gaussian}
 6 | \usage{
 7 | dinvgaussian(x, mu, lambda)
 8 | }
 9 | \value{
10 | A vector of n log-pdf values, one for each element in the first argument 'x'.
11 | }
12 | \description{
13 | Computes log-pdf at x > 0 of inverse Gaussian with given parameters mu, lambda, given by
14 |  \deqn{0.5 * log(\lambda/(2*\pi)) - 1.5 * log(x) - \lambda * (x-\mu)^2 / (2 * \mu^2 * x)}
15 | }
16 | 


--------------------------------------------------------------------------------
/man/expit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{expit}
 4 | \alias{expit}
 5 | \title{expit}
 6 | \usage{
 7 | expit(z)
 8 | }
 9 | \description{
10 | expit function
11 | }
12 | 


--------------------------------------------------------------------------------
/man/fast_dmvnorm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mvnorm.R
 3 | \name{fast_dmvnorm}
 4 | \alias{fast_dmvnorm}
 5 | \title{fast_dmvnorm}
 6 | \usage{
 7 | fast_dmvnorm(x, mean, covariance)
 8 | }
 9 | \arguments{
10 | \item{x}{A matrix of size n times d}
11 | 
12 | \item{mean}{A vector of size d specifying the mean vector of the multivariate Normal}
13 | 
14 | \item{covariance}{A matrix of size d x d specifying the covariance matrix of the multivariate Normal}
15 | }
16 | \value{
17 | A vector of n evaluations of the multivariate Normal log-pdf, one for each row of \code{x}
18 | }
19 | \description{
20 | Compute multivariate Normal density (log-value) evaluated at each row of a given matrix. The function does not check
21 | the arguments, use at your own risk.
22 | }
23 | \examples{
24 | x <- fast_rmvnorm(2, rep(0, 5), diag(1,5,5))
25 | fast_dmvnorm(x, rep(0, 5), diag(1,5,5))
26 | }
27 | 


--------------------------------------------------------------------------------
/man/fast_dmvnorm_chol_inverse.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mvnorm.R
 3 | \name{fast_dmvnorm_chol_inverse}
 4 | \alias{fast_dmvnorm_chol_inverse}
 5 | \title{fast_dmvnorm_chol_inverse}
 6 | \usage{
 7 | fast_dmvnorm_chol_inverse(x, mean, chol_inverse)
 8 | }
 9 | \arguments{
10 | \item{x}{A matrix of size n times d}
11 | 
12 | \item{mean}{A vector of size d specifying the mean vector of the multivariate Normal}
13 | 
14 | \item{chol_inverse}{A matrix of size d x d specifying the inverse of the upper-triangular Cholesky
15 | factor of the covariance matrix of the multivariate Normal,
16 | for instance obtained using \code{solve(chol(Sigma))}}
17 | }
18 | \value{
19 | A vector of n evaluations of the multivariate Normal log-pdf, one for each row of \code{x}
20 | }
21 | \description{
22 | Compute multivariate Normal density (log-value) evaluated at each row of a given matrix. The function does not check
23 | the arguments, use at your own risk.
24 | }
25 | \examples{
26 | Sigma <- diag(1, 5, 5)
27 | Sigma[1,2] <- Sigma[2,1] <- 0.3
28 | Sigma_chol <- chol(Sigma)
29 | x <- fast_rmvnorm_chol(2, rep(0, 5), Sigma_chol)
30 | fast_dmvnorm_chol_inverse(x, rep(0, 5), solve(Sigma_chol))
31 | }
32 | 


--------------------------------------------------------------------------------
/man/fast_rmvnorm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mvnorm.R
 3 | \name{fast_rmvnorm}
 4 | \alias{fast_rmvnorm}
 5 | \title{fast_rmvnorm}
 6 | \usage{
 7 | fast_rmvnorm(n, mean, covariance)
 8 | }
 9 | \arguments{
10 | \item{n}{An integer >= 1 specifying the desired number of draws}
11 | 
12 | \item{mean}{A vector of size d specifying the mean vector of the multivariate Normal}
13 | 
14 | \item{covariance}{A matrix of size d x d specifying the covariance matrix of the multivariate Normal}
15 | }
16 | \value{
17 | A matrix of size n x d containing n d-dimensional multivariate Normal draws (one per row)
18 | }
19 | \description{
20 | Generate multivariate Normal draws. The function does not check
21 | the arguments, use at your own risk.
22 | }
23 | \examples{
24 |  fast_rmvnorm(2, rep(0, 5), diag(1, 5, 5))
25 | }
26 | 


--------------------------------------------------------------------------------
/man/fast_rmvnorm_chol.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mvnorm.R
 3 | \name{fast_rmvnorm_chol}
 4 | \alias{fast_rmvnorm_chol}
 5 | \title{fast_rmvnorm_chol}
 6 | \usage{
 7 | fast_rmvnorm_chol(nparticles, mean, chol)
 8 | }
 9 | \arguments{
10 | \item{mean}{A vector of size d specifying the mean vector of the multivariate Normal}
11 | 
12 | \item{chol}{A matrix of size d x d specifying the upper triangular Cholesky factor
13 | of the covariance matrix of the multivariate Normal target,
14 | for instance obtained using the \code{\link[base]{chol}}
15 | function of R.}
16 | 
17 | \item{n}{An integer >= 1 specifying the desired number of draws}
18 | }
19 | \value{
20 | A matrix of size n x d containing n d-dimensional multivariate Normal draws (one per row)
21 | }
22 | \description{
23 | Generate multivariate Normal draws. The function does not check
24 | the arguments, use at your own risk.
25 | }
26 | \examples{
27 | Sigma <- diag(1, 5, 5)
28 | Sigma[1,2] <- Sigma[2,1] <- 0.3
29 | fast_rmvnorm_chol(2, rep(0, 5), chol(Sigma))
30 | }
31 | 


--------------------------------------------------------------------------------
/man/get_blasso.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bayesianlasso.R
 3 | \name{get_blasso}
 4 | \alias{get_blasso}
 5 | \title{Y and X need to be matrices, and lambda non-negative}
 6 | \usage{
 7 | get_blasso(Y, X, lambda)
 8 | }
 9 | \description{
10 | Y and X need to be matrices, and lambda non-negative
11 | }
12 | 


--------------------------------------------------------------------------------
/man/get_max_coupling.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_max_coupling.R
 3 | \name{get_max_coupling}
 4 | \alias{get_max_coupling}
 5 | \title{Sample from maximal coupling of two distributions p and q}
 6 | \usage{
 7 | get_max_coupling(rp, dp, rq, dq)
 8 | }
 9 | \arguments{
10 | \item{rp}{A function taking n as an argument and returning n samples from the distribution p}
11 | 
12 | \item{dp}{A function taking x as an argument and returning log-pdf of p evaluated at x}
13 | 
14 | \item{rq}{A function taking n as an argument and returning n samples from the distribution q}
15 | 
16 | \item{dq}{A function taking x as an argument and returning log-pdf of q evaluated at x}
17 | }
18 | \value{
19 | Returns a list with
20 | 
21 | \itemize{
22 | \item "xy": the pair of samples \eqn{(x,y)}
23 | 
24 | \item "identical": TRUE if \eqn{x = y}, FALSE otherwise
25 | }
26 | }
27 | \description{
28 | Takes two univariate continuous distributions (specified by random number generator and log-pdf function),
29 | and returns a function to sample from a maximal coupling of these two distributions.
30 | }
31 | \examples{
32 | mu1 <- 0; mu2 <- 1; sigma1 <- 0.5; sigma2 <- 1.2
33 |  f <- get_max_coupling(function(n) rnorm(n, mu1, sigma1),
34 |  function(x) dnorm(x, mu1, sigma1, log = TRUE),
35 |  function(n) rnorm(n, mu2, sigma2),
36 |  function(x) dnorm(x, mu2, sigma2, log = TRUE))
37 | f()
38 | }
39 | 


--------------------------------------------------------------------------------
/man/get_mh_kernel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_mh_kernels.R
 3 | \name{get_mh_kernels}
 4 | \alias{get_mh_kernels}
 5 | \title{Get random walk Metropolis-Hastings kernels}
 6 | \usage{
 7 | get_mh_kernels(target, Sigma_proposal)
 8 | }
 9 | \arguments{
10 | \item{target}{function taking a vector as input and returning target log-density evaluation}
11 | 
12 | \item{Sigma_proposal}{covariance of the Normal random walk proposal}
13 | }
14 | \value{
15 | A list containing the keys
16 | \code{single_kernel}, \code{coupled_kernel}.
17 | }
18 | \description{
19 | This function takes a target (specified through its log-pdf)
20 | and a covariance matrix for a Normal random walk proposal, and returns a list containing the keys
21 | \code{single_kernel}, \code{coupled_kernel} corresponding to marginal
22 | and coupled MH kernels.
23 | 
24 | The coupling is done by reflection-maximal coupling of the proposals,
25 | and common uniform variable for the accept/reject step. For reflection-maximal
26 | couplings, see \code{\link{rnorm_reflectionmax}} and \code{\link{rmvnorm_reflectionmax}}.
27 | 
28 | The returned kernels can then be used in the functions \code{\link{sample_meetingtime}} or
29 | \code{\link{sample_coupled_chains}} or \code{\link{sample_unbiasedestimator}}.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/get_variableselection.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/variableselection.R
 3 | \name{get_variableselection}
 4 | \alias{get_variableselection}
 5 | \title{Y and X need to be matrices, and lambda non-negative}
 6 | \usage{
 7 | get_variableselection(Y, X, g, kappa, s0, proportion_singleflip)
 8 | }
 9 | \description{
10 | Y and X need to be matrices, and lambda non-negative
11 | }
12 | 


--------------------------------------------------------------------------------
/man/hello.Rd:
--------------------------------------------------------------------------------
 1 | \name{hello}
 2 | \alias{hello}
 3 | \title{Hello, World!}
 4 | \usage{
 5 | hello()
 6 | }
 7 | \description{
 8 | Prints 'Hello, world!'.
 9 | }
10 | \examples{
11 | hello()
12 | }
13 | 


--------------------------------------------------------------------------------
/man/histogram_c_chains.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/histogram_c_chains.R
 3 | \name{histogram_c_chains}
 4 | \alias{histogram_c_chains}
 5 | \title{histogram_c_chains}
 6 | \usage{
 7 | histogram_c_chains(
 8 |   c_chains,
 9 |   component,
10 |   k,
11 |   m,
12 |   breaks = NULL,
13 |   nclass = 30,
14 |   dopar = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{c_chains}{A list of coupled chains, each as produced by \code{\link{sample_coupled_chains}}}
19 | 
20 | \item{component}{An integer specifying which marginal to approximate}
21 | 
22 | \item{k}{An integer (see \code{\link{H_bar}})}
23 | 
24 | \item{m}{Another integer (see \code{\link{H_bar}})}
25 | 
26 | \item{breaks}{A vector indicating how to bin the space (optional)}
27 | 
28 | \item{nclass}{An integer specifying the number of bins to aim for, if "breaks" is not specified}
29 | 
30 | \item{dopar}{A boolean indicating whether to parallelize the computation (requires doParallel and having registed parallel cores)}
31 | }
32 | \description{
33 | Compute histogram approximations of marginal distributions based on coupled Markov chains
34 | }
35 | 


--------------------------------------------------------------------------------
/man/logistic_precomputation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/logistic_regression.R
 3 | \name{logisticregression_precomputation}
 4 | \alias{logisticregression_precomputation}
 5 | \title{Precomputation to prepare for the Polya-Gamma sampler}
 6 | \usage{
 7 | logisticregression_precomputation(Y, X, b, B)
 8 | }
 9 | \description{
10 | This function takes the canonical elements defining the logistic regression
11 | problem (the vector of outcome Y, covariate matrix X, the prior mean b and the prior variance B),
12 | and precomputes some quantities repeatedly used in the Polya-Gamma sampler and variants of it.
13 | The precomputed quantities are returned in a list, which is then meant to be passed to the samplers.
14 | }
15 | 


--------------------------------------------------------------------------------
/man/pg_gibbs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/logistic_regression.R
 3 | \name{pg_gibbs}
 4 | \alias{pg_gibbs}
 5 | \title{Polya-Gamma Gibbs sampler}
 6 | \usage{
 7 | pg_gibbs(niterations, logistic_setting)
 8 | }
 9 | \value{
10 | a matrix where each row corresponds to an iteration of the sampler, and contains
11 | the regression coefficient at that iteration.
12 | }
13 | \description{
14 | This implements the sampler proposed in
15 | Nicholas G Polson, James G Scott, and Jesse Windle. Bayesian inference for logistic models using
16 | Polya–Gamma latent variables. Journal of the American statistical Association, 108(504):1339–1349, 2013.
17 | The arguments are:
18 |  \itemize{
19 |   \item niterations: the number of desired MCMC iterations,
20 |   \item logistic_setting: a list of precomputed quantities obtained via 'logistic_precomputation'.
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/man/rcpp_hello.Rd:
--------------------------------------------------------------------------------
 1 | \name{rcpp_hello}
 2 | \alias{rcpp_hello}
 3 | \title{Hello, Rcpp!}
 4 | \usage{
 5 | rcpp_hello()
 6 | }
 7 | \description{
 8 | Returns an \R \code{list} containing the character vector
 9 | \code{c("foo", "bar")} and the numeric vector \code{c(0, 1)}.
10 | }
11 | \examples{
12 | rcpp_hello()
13 | }
14 | 


--------------------------------------------------------------------------------
/man/rgamma_coupled.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gamma_couplings.R
 3 | \name{rgamma_coupled}
 4 | \alias{rgamma_coupled}
 5 | \title{Sample from maximally coupled Gamma variables}
 6 | \usage{
 7 | rgamma_coupled(alpha1, alpha2, beta1, beta2)
 8 | }
 9 | \arguments{
10 | \item{alpha1}{First shape}
11 | 
12 | \item{alpha2}{Second shape}
13 | 
14 | \item{beta1}{First rate}
15 | 
16 | \item{beta2}{Second rate}
17 | }
18 | \value{
19 | A list with entry 'xy' for the pair of values, and boolean 'identical' indicating whether the two values
20 | are identical.
21 | }
22 | \description{
23 | Draws a pair of variables, respectively Gamma(alpha1, beta1) and Gamma(alpha2, beta2)
24 | where the parametrization is that beta is the rate, i.e. the log-pdf of Gamma(alpha,beta) evaluated at x is
25 | \deqn{\alpha * log(\beta) - lgamma(\alpha) + (\alpha-1) * log(x) - \beta x}
26 | where \eqn{lgamma} stands for the logarithm of the Gamma function.
27 | }
28 | 


--------------------------------------------------------------------------------
/man/rinversegamma.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/invgamma_couplings.R
 3 | \name{rinversegamma}
 4 | \alias{rinversegamma}
 5 | \title{Sample from inverse gamma}
 6 | \usage{
 7 | rinversegamma(n, alpha, beta)
 8 | }
 9 | \description{
10 | with given parameters alpha, beta, with log-density given by
11 | alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x
12 | }
13 | 


--------------------------------------------------------------------------------
/man/rinversegamma_coupled.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/invgamma_couplings.R
 3 | \name{rinversegamma_coupled}
 4 | \alias{rinversegamma_coupled}
 5 | \title{Sample from maximally coupled inverse gamma}
 6 | \usage{
 7 | rinversegamma_coupled(alpha1, alpha2, beta1, beta2)
 8 | }
 9 | \description{
10 | with given parameters alpha1, alpha2, beta1, beta2,
11 | where the parametrization is that the log-density of IG(alpha, beta) is
12 | alpha * log(beta) - lgamma(alpha) - (alpha+1) * log(x) - beta / x
13 | }
14 | 


--------------------------------------------------------------------------------
/man/rinvgaussian.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/invgaussian_couplings.R
 3 | \name{rinvgaussian}
 4 | \alias{rinvgaussian}
 5 | \title{Sample from inverse Gaussian}
 6 | \usage{
 7 | rinvgaussian(n, mu, lambda)
 8 | }
 9 | \value{
10 | A vector of n draws, where n is the first argument.
11 | }
12 | \description{
13 | Parametrized by mu, lambda, with log-density given by
14 |  \deqn{0.5 * log(\lambda/(2*\pi)) - 1.5 * log(x) - \lambda * (x-\mu)^2 / (2 * \mu^2 * x)}
15 | 
16 |  The procedure goes as follows.
17 | 
18 |  \itemize{
19 |  \item Generate nu ~ Normal(0,1).
20 |  \item Define y = nu^2.
21 |  \item Define x = mu + mu^2 * y / (2 * lambda) - mu / (2 * lambda) * sqrt(4 * mu * lambda * y + mu^2 * y^2).
22 |  \item Generate Z ~ Uniform(0,1).
23 |  \item If z <= mu / (mu + x), output x, otherwise output mu^2 / x.
24 |  }
25 | }
26 | 


--------------------------------------------------------------------------------
/man/rinvgaussian_coupled.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/invgaussian_couplings.R
 3 | \name{rinvgaussian_coupled}
 4 | \alias{rinvgaussian_coupled}
 5 | \title{Sample from maximally coupled inverse Gaussian}
 6 | \usage{
 7 | rinvgaussian_coupled(mu1, mu2, lambda1, lambda2)
 8 | }
 9 | \value{
10 | A pair of values in a vector of size two.
11 | }
12 | \description{
13 | with parameters mu1, mu2, lambda1, lambda2; see \code{\link{rinvgaussian}}.
14 | }
15 | 


--------------------------------------------------------------------------------
/man/rmvnorm_max.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mvnorm_couplings.R
 3 | \name{rmvnorm_max}
 4 | \alias{rmvnorm_max}
 5 | \title{Maximal coupling of two multivariate Normal distributions}
 6 | \usage{
 7 | rmvnorm_max(mu1, mu2, Sigma1, Sigma2)
 8 | }
 9 | \arguments{
10 | \item{mu1}{First mean}
11 | 
12 | \item{mu2}{First mean}
13 | 
14 | \item{Sigma1}{First covariance matrix}
15 | 
16 | \item{Sigma2}{Second covariance matrix}
17 | }
18 | \value{
19 | A list containing 'xy', a matrix with 2 columns (one for each draw),
20 | and a boolean indicator 'identical' indicating whether the two draws
21 | are identical.
22 | }
23 | \description{
24 | Sample from maximal coupling of two multivariate Normal distributions,
25 | specified through their means and covariance matrices. See \code{\link{rmvnorm_max_chol}}
26 | for a version using Cholesky factors and their inverses.
27 | See \code{\link{rmvnorm_reflectionmax}}
28 | for a reflection-maximal coupling, in the case Sigma1=Sigma2.
29 | }
30 | \examples{
31 | p <- 3
32 | mu1 <- rep(0, p)
33 | mu2 <- rep(1, p)
34 | Sigma1 <- diag(0.4, p, p)
35 | Sigma1[1,2] <- Sigma1[2,1] <- 0.2
36 | Sigma2 <- diag(1.4, p, p)
37 | Sigma2[1,2] <- Sigma2[2,1] <- -0.5
38 | rmvnorm_max(mu1, mu2, Sigma1, Sigma2)
39 | }
40 | 


--------------------------------------------------------------------------------
/man/rmvnorm_max_chol.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mvnorm_couplings.R
 3 | \name{rmvnorm_max_chol}
 4 | \alias{rmvnorm_max_chol}
 5 | \title{Maximal coupling of two multivariate Normal distributions}
 6 | \usage{
 7 | rmvnorm_max_chol(
 8 |   mu1,
 9 |   mu2,
10 |   Cholesky1,
11 |   Cholesky2,
12 |   Cholesky_inverse1,
13 |   Cholesky_inverse2
14 | )
15 | }
16 | \arguments{
17 | \item{mu1}{First mean}
18 | 
19 | \item{mu2}{First mean}
20 | 
21 | \item{Cholesky1}{First Cholesky factor, e.g. obtained with \code{\link[base]{chol}}}
22 | 
23 | \item{Cholesky2}{Second Cholesky factor}
24 | 
25 | \item{Cholesky_inverse1}{First inverse of Cholesky factor, e.g. obtained with \code{solve(chol(Sigma))}}
26 | 
27 | \item{Cholesky_inverse2}{Second inverse of Cholesky factor}
28 | }
29 | \value{
30 | A list containing 'xy', a matrix with 2 columns (one for each draw),
31 | and a boolean indicator 'identical' indicating whether the two draws
32 | are identical.
33 | }
34 | \description{
35 | Sample from maximal coupling of two multivariate Normal distributions,
36 | specified through their means, the Cholesky factors of their covariance matrices,
37 | and the inverse of the Cholesky factors of the covariance matrices.
38 | }
39 | \examples{
40 | p <- 3
41 | mu1 <- rep(0, p)
42 | mu2 <- rep(1, p)
43 | Sigma1 <- diag(0.4, p, p)
44 | Sigma1[1,2] <- Sigma1[2,1] <- 0.2
45 | Sigma2 <- diag(1.4, p, p)
46 | Sigma2[1,2] <- Sigma2[2,1] <- -0.5
47 | Sigma1_chol <- chol(Sigma1)
48 | Sigma2_chol <- chol(Sigma2)
49 | Sigma1_chol_inv <- solve(Sigma1_chol)
50 | Sigma2_chol_inv <- solve(Sigma2_chol)
51 | rmvnorm_max_chol(mu1, mu2, Sigma1_chol, Sigma2_chol, Sigma1_chol_inv, Sigma2_chol_inv)
52 | }
53 | 


--------------------------------------------------------------------------------
/man/rmvnorm_reflectionmax.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mvnorm_couplings.R
 3 | \name{rmvnorm_reflectionmax}
 4 | \alias{rmvnorm_reflectionmax}
 5 | \title{Reflection-Maximal coupling of two multivariate Normal distributions}
 6 | \usage{
 7 | rmvnorm_reflectionmax(mu1, mu2, Cholesky, Cholesky_inverse)
 8 | }
 9 | \arguments{
10 | \item{mu1}{First mean}
11 | 
12 | \item{mu2}{First mean}
13 | 
14 | \item{Cholesky}{Cholesky factor, e.g. obtained with \code{\link[base]{chol}}}
15 | 
16 | \item{Cholesky_inverse}{Inverse of Cholesky factor, e.g. obtained with \code{solve(chol(Sigma))}}
17 | }
18 | \value{
19 | A list containing 'xy', a matrix with 2 columns (one for each draw),
20 | and a boolean indicator 'identical' indicating whether the two draws
21 | are identical.
22 | }
23 | \description{
24 | Sample from reflection-maximal coupling of two multivariate Normal distributions,
25 | specified through their means, with the same covariance matrix, specified
26 | through its Cholesky factor and inverse of Cholesky factor.
27 | 
28 | The idea is that a multivariate Normal is drawn around the first mean (mu1),
29 | and then reflected with respect to a hyperplane orthogonal to the direction between mu1 and mu2.
30 | 
31 | For univariate Normal distribution, see \code{\link{rnorm_reflectionmax}}.
32 | }
33 | \examples{
34 | p <- 3
35 | mu1 <- rep(0, p)
36 | mu2 <- rep(1, p)
37 | Sigma <- diag(0.4, p, p)
38 | Sigma[1,2] <- Sigma[2,1] <- 0.2
39 | Sigma_chol <- chol(Sigma)
40 | Sigma_chol_inv <- solve(Sigma_chol)
41 | rmvnorm_reflectionmax(mu1, mu2, Sigma_chol, Sigma_chol_inv)
42 | }
43 | 


--------------------------------------------------------------------------------
/man/rnorm_max_coupling.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rnorm_max_coupling.R
 3 | \name{rnorm_max_coupling}
 4 | \alias{rnorm_max_coupling}
 5 | \title{Maximal coupling of two univariate Normal distributions}
 6 | \usage{
 7 | rnorm_max_coupling(mu1, mu2, sigma1, sigma2)
 8 | }
 9 | \arguments{
10 | \item{mu1}{First mean}
11 | 
12 | \item{mu2}{Second mean}
13 | 
14 | \item{sigma1}{First mean}
15 | 
16 | \item{sigma2}{Second mean}
17 | }
18 | \value{
19 | Returns a list with
20 | 
21 | \itemize{
22 | \item "xy": the pair of samples \eqn{(x,y)}
23 | 
24 | \item "identical": TRUE if \eqn{x = y}, FALSE otherwise
25 | }
26 | }
27 | \description{
28 | Sample from maximal coupling of two univariate Normal distributions,
29 | specified through their means and standard deviations. See \code{\link{rmvnorm_max}} for a multivariate version.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/rnorm_reflectionmax.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rnorm_reflectionmax.R
 3 | \name{rnorm_reflectionmax}
 4 | \alias{rnorm_reflectionmax}
 5 | \title{Reflection-maximal coupling of two univariate Normal distributions}
 6 | \usage{
 7 | rnorm_reflectionmax(mu1, mu2, sigma)
 8 | }
 9 | \arguments{
10 | \item{mu1}{First mean}
11 | 
12 | \item{mu2}{Second mean}
13 | 
14 | \item{sigma}{Common standard deviation}
15 | }
16 | \value{
17 | Returns a list with
18 | \itemize{
19 | 
20 | \item "xy": the pair of samples \eqn{(x,y)}
21 | 
22 | \item "identical": TRUE if \eqn{x = y}, FALSE otherwise
23 | }
24 | }
25 | \description{
26 | Sample from reflection-maximal coupling of two univariate Normal distributions,
27 | specified through their means, with common standard deviation.
28 | See \code{\link{rmvnorm_reflectionmax}} for a multivariate version.
29 | }
30 | 


--------------------------------------------------------------------------------
/man/sample_coupled_chains.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/coupled_chains.R
 3 | \name{sample_coupled_chains}
 4 | \alias{sample_coupled_chains}
 5 | \title{Sample coupled Markov chains}
 6 | \usage{
 7 | sample_coupled_chains(
 8 |   single_kernel,
 9 |   coupled_kernel,
10 |   rinit,
11 |   m = 1,
12 |   lag = 1,
13 |   max_iterations = Inf,
14 |   preallocate = 10
15 | )
16 | }
17 | \arguments{
18 | \item{single_kernel}{A list taking a state and returning a state, performing one step of a Markov kernel}
19 | 
20 | \item{coupled_kernel}{A list taking two states and returning two states, performing one step of a coupled Markov kernel;
21 | it also returns a boolean "identical" indicating whether the two states are identical.}
22 | 
23 | \item{rinit}{A list representing the initial state of the chain, that can be given to 'single_kernel'}
24 | 
25 | \item{m}{A time horizon: the chains are sampled until the maximum between m and the meeting time}
26 | 
27 | \item{lag}{A time lag, equal to one by default}
28 | 
29 | \item{max_iterations}{A maximum number of iterations, at which to interrup the while loop; Inf by default}
30 | 
31 | \item{preallocate}{A number of anticipated iterations, used to pre-allocate memory; 10 by default}
32 | }
33 | \value{
34 | A list with
35 | \itemize{
36 | 
37 | \item samples1: the first chain, of length max(m, tau)
38 | 
39 | \item samples2: the second chain, of length max(m, tau) - lag
40 | 
41 | \item meetingtime: the meeting time; equal to Inf if while loop was interrupted
42 | 
43 | \item iteration: final iteration; could be equal to m, to meetingtime, or to max_iterations
44 | 
45 | \item elapsedtime: elapsed wall-clock time, in seconds
46 | 
47 | \item cost: computing cost in terms of calls to Markov kernels (counting coupled kernel as twice the cost)
48 | }
49 | }
50 | \description{
51 | Sample two Markov chains, each following 'single_kernel' marginally,
52 | and 'coupled_kernel' jointly, until min(max(tau, m), max_iterations), where tau
53 | is the first time the two chains meet (the "meeting time").
54 | 
55 | Or more precisely, they meet with a delay of lag, i.e. X_t = Y_{t-lag}, and lag is one by default.
56 | 
57 | Once the coupled chains are obtained, unbiased estimators can be computed for arbitrary test
58 | functions via the function \code{\link{H_bar}}.
59 | 
60 | If you're only interested in sampling meeting times, see \code{\link{sample_meetingtime}}.
61 | }
62 | 


--------------------------------------------------------------------------------
/man/sample_meetingtime.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/meetingtime.R
 3 | \name{sample_meetingtime}
 4 | \alias{sample_meetingtime}
 5 | \title{Sample coupled Markov chains until meeting}
 6 | \usage{
 7 | sample_meetingtime(
 8 |   single_kernel,
 9 |   coupled_kernel,
10 |   rinit,
11 |   lag = 1,
12 |   max_iterations = Inf
13 | )
14 | }
15 | \arguments{
16 | \item{single_kernel}{A list taking a state and returning a state, performing one step of a Markov kernel}
17 | 
18 | \item{coupled_kernel}{A list taking two states and returning two states, performing one step of a coupled Markov kernel;
19 | it also returns a boolean "identical" indicating whether the two states are identical.}
20 | 
21 | \item{rinit}{A list representing the initial state of the chain, that can be given to 'single_kernel'}
22 | 
23 | \item{lag}{A time lag, equal to one by default}
24 | 
25 | \item{max_iterations}{A maximum number of iterations, at which to interrup the while loop; Inf by default}
26 | }
27 | \value{
28 | A list with
29 | \itemize{
30 | 
31 | \item meetingtime: the meeting time; equal to Inf if while loop was interrupted
32 | 
33 | \item elapsedtime: elapsed wall-clock time, in seconds
34 | }
35 | }
36 | \description{
37 | Sample two Markov chains, each following 'single_kernel' marginally,
38 | until they meet, and report the meeting time, as well as the elapsed wall-clock time in seconds.
39 | 
40 | This function does not record the trajectories of the chains, with the goal of being memory-light.
41 | To record these trajectories, see \code{\link{sample_coupled_chains}}. To directly
42 | compute unbiased estimators on the fly, see  \code{\link{sample_unbiasedestimator}}.
43 | }
44 | 


--------------------------------------------------------------------------------
/man/sample_unbiasedestimator.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/unbiasedestimator.R
 3 | \name{sample_unbiasedestimator}
 4 | \alias{sample_unbiasedestimator}
 5 | \title{Unbiased MCMC estimators}
 6 | \usage{
 7 | sample_unbiasedestimator(
 8 |   single_kernel,
 9 |   coupled_kernel,
10 |   rinit,
11 |   h = function(x) x,
12 |   k = 0,
13 |   m = 1,
14 |   lag = 1,
15 |   max_iterations = Inf
16 | )
17 | }
18 | \arguments{
19 | \item{single_kernel}{A list taking a state and returning a state, performing one step of a Markov kernel}
20 | 
21 | \item{coupled_kernel}{A list taking two states and returning two states, performing one step of a coupled Markov kernel;
22 | it also returns a boolean "identical" indicating whether the two states are identical.}
23 | 
24 | \item{rinit}{A list representing the initial state of the chain, that can be given to 'single_kernel'}
25 | 
26 | \item{h}{A test function of interest, which should take a chain state ("chain_state" entry of the output of "rinit", for instance)
27 | and return a numeric vector}
28 | 
29 | \item{k}{An integer at which to start computing the unbiased estimator}
30 | 
31 | \item{m}{A time horizon: the chains are sampled until the maximum between m and the meeting time}
32 | 
33 | \item{lag}{A time lag, equal to one by default}
34 | 
35 | \item{max_iterations}{A maximum number of iterations, at which to interrup the while loop; Inf by default}
36 | }
37 | \value{
38 | A list with
39 | \itemize{
40 | 
41 | \item mcmcestimator: an MCMC estimator computed on the first chain, from step k to m
42 | 
43 | \item correction: the bias correction term
44 | 
45 | \item uestimator: unbiased estimator, equal to the sum of mcmcestimator and correction
46 | 
47 | \item meetingtime: the meeting time; equal to Inf if while loop was interrupted
48 | 
49 | \item iteration: final iteration; could be equal to m, to meetingtime, or to max_iterations
50 | 
51 | \item elapsedtime: elapsed wall-clock time, in seconds
52 | 
53 | \item cost: computing cost in terms of calls to Markov kernels (counting coupled kernel as twice the cost)
54 | }
55 | }
56 | \description{
57 | Sample two Markov chains, each following 'single_kernel' marginally,
58 | and 'coupled_kernel' jointly, until min(max(tau, m), max_iterations), where tau
59 | is the first time the two chains meet (the "meeting time"). An unbiased estimator
60 | of the expectation of a test function h is computed on the fly, between step k and step m, and returned.
61 | 
62 | Allows for an arbitrary lag, i.e. X_t = Y_{t-lag}, and lag is one by default.
63 | 
64 | Compared to \code{\link{sample_coupled_chains}} this function requires specifying
65 | the test function, but does not record the trajectories, and thus is memory-light.
66 | 
67 | If you're only interested in sampling meeting times, see \code{\link{sample_meetingtime}}.
68 | }
69 | 


--------------------------------------------------------------------------------
/man/setmytheme.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{setmytheme}
 4 | \alias{setmytheme}
 5 | \title{Customize graphical settings}
 6 | \usage{
 7 | setmytheme()
 8 | }
 9 | \description{
10 | This function customizes the theme used by ggplot2. Loads the packages ggplot2 and
11 | ggthemes.
12 | }
13 | 


--------------------------------------------------------------------------------
/man/unbiasedmcmc-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/unbiasedmcmc-package.R
 3 | \docType{package}
 4 | \name{unbiasedmcmc-package}
 5 | \alias{unbiasedmcmc-package}
 6 | \alias{unbiasedmcmc}
 7 | \title{unbiasedmcmc}
 8 | \description{
 9 | Unbiased MCMC estimators with couplings
10 | }
11 | \details{
12 | This package contains scripts to reproduce the figures of the
13 | paper "Unbiased Markov chain Monte Carlo with couplings" by
14 | Pierre E. Jacob, John O'Leary, Yves F Atchade, available on arXiv at
15 | https://arxiv.org/abs/1708.03625
16 | }
17 | \author{
18 | Pierre E. Jacob <pierre.jacob.work@gmail.com>, John O'Leary, Yves F. Atchade
19 | }
20 | \keyword{package}
21 | 


--------------------------------------------------------------------------------
/src/PolyaGamma.h:
--------------------------------------------------------------------------------
 1 | // -*- mode: c++; -*-
 2 | 
 3 | ////////////////////////////////////////////////////////////////////////////////
 4 | 
 5 | // Copyright 2014 Nick Polson, James Scott, and Jesse Windle.
 6 | 
 7 | // This file is part of BayesLogit.
 8 | 
 9 | // BayesLogit is free software: you can redistribute it and/or modify it under
10 | // the terms of the GNU General Public License as published by the Free Software
11 | // Foundation, either version 3 of the License, or any later version.
12 | 
13 | // BayesLogit is distributed in the hope that it will be useful, but WITHOUT ANY
14 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15 | // A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
16 | 
17 | // You should have received a copy of the GNU General Public License along with
18 | // BayesLogit.  If not, see <http://www.gnu.org/licenses/>.
19 | 
20 | ////////////////////////////////////////////////////////////////////////////////
21 | 
22 | // See <http://arxiv.org/abs/1205.0310> for implementation details.
23 | 
24 | #ifndef __POLYAGAMMA__
25 | #define __POLYAGAMMA__
26 | 
27 | #include "RNG.h"
28 | #include <cmath>
29 | #include <vector>
30 | 
31 | using std::vector;
32 | 
33 | // The numerical accuracy of __PI will affect your distribution.
34 | const double __PI = 3.141592653589793238462643383279502884197;
35 | const double HALFPISQ = 0.5 * __PI * __PI;
36 | const double FOURPISQ = 4 * __PI * __PI;
37 | const double __TRUNC = 0.64;
38 | const double __TRUNC_RECIP = 1.0 / __TRUNC;
39 | 
40 | class PolyaGamma
41 | {
42 | 
43 |   // For sum of Gammas.
44 |   int T;
45 |   vector<double> bvec;
46 | 
47 |  public:
48 | 
49 |   // Constructors.
50 |   PolyaGamma(int trunc = 200);
51 | 
52 |   // Draw.
53 |   // double draw(double n, double z, RNG& r);
54 |   double draw(int n, double z, RNG& r);
55 |   double draw_sum_of_gammas(double n, double z, RNG& r);
56 |   double draw_like_devroye(double z, RNG& r);
57 | 
58 |   //void draw(MF x, double a, double z, RNG& r);
59 |   //void draw(MF x, MF a, MF z, RNG& r);
60 | 
61 |   // Utility.
62 |   void set_trunc(int trunc);
63 | 
64 |   // Helper.
65 |   double a(int n, double x);
66 |   double pigauss(double x, double Z);
67 |   double mass_texpon(double Z);
68 |   double rtigauss(double Z, RNG& r);
69 | 
70 |   static double jj_m1(double b, double z);
71 |   static double jj_m2(double b, double z);
72 |   static double pg_m1(double b, double z);
73 |   static double pg_m2(double b, double z);
74 | 
75 | };
76 | 
77 | #endif
78 | 


--------------------------------------------------------------------------------
/src/RRNG.cpp:
--------------------------------------------------------------------------------
  1 | #include "RRNG.h"
  2 | #include <cmath>
  3 | 
  4 | using std::pow;
  5 | using std::fabs;
  6 | using std::sqrt;
  7 | using std::log;
  8 | using std::exp;
  9 | 
 10 | //////////////////////////////////////////////////////////////////////
 11 | 		      // R Random Variates //
 12 | //////////////////////////////////////////////////////////////////////
 13 | 
 14 | //--------------------------------------------------------------------
 15 | // Distributions with one parameter.
 16 | 
 17 | #define ONEP(NAME, CALL, P1)			\
 18 |   double BasicRNG::NAME(double P1)	\
 19 |   {						\
 20 |     return CALL (P1);				\
 21 |   }						\
 22 | 
 23 | ONEP(expon_mean, rexp  , mean)
 24 | ONEP(chisq     , rchisq, df  )
 25 | 
 26 | #undef ONEP
 27 | 
 28 | //--------------------------------------------------------------------
 29 | // Distributions with two parameters.
 30 | 
 31 | #define TWOP(NAME, CALL, P1, P2)			\
 32 |   double BasicRNG::NAME(double P1, double P2)	\
 33 |   {							\
 34 |     return CALL (P1, P2);				\
 35 |   }							\
 36 | 
 37 | TWOP(gamma_scale, rgamma, shape, scale)
 38 | TWOP(norm      , rnorm , mean , sd  )
 39 | TWOP(flat      , runif , a    , b   )
 40 | TWOP(beta      , rbeta , a    , b   )
 41 | 
 42 | // x ~ Gamma(shape=a, scale=b)
 43 | // x ~ x^{a-1} exp(x / b).
 44 | 
 45 | #undef TWOP
 46 | 
 47 | //--------------------------------------------------------------------
 48 | 			    // Uniform //
 49 | 
 50 | double BasicRNG::unif()
 51 | {
 52 |   return unif_rand();
 53 | } // unif
 54 | 
 55 | //--------------------------------------------------------------------
 56 | 			  // Exponential //
 57 | double BasicRNG::expon_rate(double rate)
 58 | {
 59 |   return expon_mean(1.0 / rate);
 60 | }
 61 | 
 62 | //--------------------------------------------------------------------
 63 | 			    // Normal //
 64 | 
 65 | double BasicRNG::norm(double sd)
 66 | {
 67 |   return rnorm(0, sd);
 68 | } // norm
 69 | 
 70 | //--------------------------------------------------------------------
 71 | 			   // gamma_rate //
 72 | 
 73 | double BasicRNG::gamma_rate(double shape, double rate)
 74 | {
 75 |   return gamma_scale(shape, 1.0 / rate);
 76 | }
 77 | 
 78 | //--------------------------------------------------------------------
 79 | 			   // Inv-Gamma //
 80 | 
 81 | // a = shape, b = scale
 82 | // x ~ IG(shape, scale) ~ x^{-a-1} exp(b / x).
 83 | // => 1/x ~ Ga(shape, scale*=1/scale).
 84 | 
 85 | double BasicRNG::igamma(double shape, double scale)
 86 | {
 87 |   return 1.0/rgamma(shape, 1.0 / scale);
 88 | } // igamma
 89 | 
 90 | ////////////////////////////////////////////////////////////////////////////////
 91 | 
 92 | double BasicRNG::p_norm(double x, int use_log)
 93 | {
 94 |   return pnorm(x, 0.0, 1.0, 1, use_log);
 95 | }
 96 | 
 97 | double BasicRNG::p_gamma_rate(double x, double shape, double rate, int use_log)
 98 | {
 99 |   double scale = 1.0 / rate;
100 |   return pgamma(x, shape, scale, 1, use_log);
101 | }
102 | 
103 | ////////////////////////////////////////////////////////////////////////////////
104 | 
105 | double BasicRNG::Gamma (double x, int use_log)
106 | {
107 |   double y = lgammafn(x);
108 |   if (!use_log) y = exp(y);
109 |   return y;
110 | }
111 | 
112 | ////////////////////////////////////////////////////////////////////////////////
113 | 
114 | double BasicRNG::d_beta(double x, double a, double b)
115 | {
116 |   return dbeta(x, a, b, false);
117 | }
118 | 
119 | ////////////////////////////////////////////////////////////////////////////////
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/src/RRNG.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2012 Jesse Windle - jesse.windle@gmail.com
 2 | 
 3 | // This program is free software: you can redistribute it and/or
 4 | // modify it under the terms of the GNU General Public License as
 5 | // published by the Free Software Foundation, either version 3 of the
 6 | // License, or (at your option) any later version.
 7 | 
 8 | // This program is distributed in the hope that it will be useful, but
 9 | // WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 | // General Public License for more details.
12 | 
13 | // You should have received a copy of the GNU General Public License
14 | // along with this program.  If not, see
15 | // <http://www.gnu.org/licenses/>.
16 | 
17 | //////////////////////////////////////////////////////////////////////
18 | 
19 | // YOU MUST ALWAYS CALL GetRNGSeed() and PutRNGSeed() WHEN USING THESE FUNCTIONS!!!
20 | 
21 | //////////////////////////////////////////////////////////////////////
22 | 
23 | #ifndef __BASICRNG__
24 | #define __BASICRNG__
25 | 
26 | #include "R.h"
27 | #include "Rmath.h"
28 | // #include "Matrix.h"
29 | 
30 | class BasicRNG {
31 | 
32 |  public:
33 | 
34 |   // Random variates.
35 |   double unif  ();                             // Uniform
36 |   double expon_mean(double mean);                  // Exponential
37 |   double expon_rate(double rate);                  // Exponential
38 |   double chisq (double df);                    // Chisq
39 |   double norm  (double sd);                    // Normal
40 |   double norm  (double mean , double sd);      // Normal
41 |   double gamma_scale (double shape, double scale); // Gamma_Scale
42 |   double gamma_rate  (double shape, double rate);  // Gamma_Rate
43 |   double igamma(double shape, double scale);   // Inv-Gamma
44 |   double flat  (double a=0  , double b=1  );   // Flat
45 |   double beta  (double a=1.0, double b=1.0);   // Beta
46 | 
47 |   int bern  (double p);                     // Bernoulli
48 | 
49 |   // CDF
50 |   static double p_norm (double x, int use_log=0);
51 |   static double p_gamma_rate(double x, double shape, double rate, int use_log=0);
52 | 
53 |   // Density
54 |   static double d_beta(double x, double a, double b);
55 | 
56 |   // Utility
57 |   static double Gamma (double x, int use_log=0);
58 | 
59 | }; // BasicRNG
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/src/blassoutil.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | #include "mvnorm.h"
 3 | using namespace Rcpp;
 4 | using namespace std;
 5 | using namespace Eigen;
 6 | 
 7 | 
 8 | //   D_tau_inv <- diag(1/tau2, p, p)
 9 | //   A <- XtX + D_tau_inv
10 | //   A_inv <- solve(A)
11 | //   beta <- t(fast_rmvnorm(1, (A_inv %*% XtY)[,1], sigma2 * A_inv))
12 | //   norm <- sum((Y - X %*% beta)^2)
13 | //   betaDbeta <- sum(beta^2 / tau2)
14 | //
15 | // [[Rcpp::export]]
16 | List blassoconditional(const Eigen::VectorXd & Y, const Eigen::MatrixXd & X,
17 |                        const Eigen::VectorXd & XtY, const Eigen::MatrixXd & XtX,
18 |                        const NumericVector tau2, const double sigma2){
19 |   int p = X.cols();
20 | 
21 |   Eigen::MatrixXd D_tau_inv(p, p);
22 |   D_tau_inv.setIdentity();
23 |   for (int i=0; i < p; i++){
24 |     D_tau_inv(i,i) /= tau2(i);
25 |   }
26 |   Eigen::MatrixXd A_inv(p, p);
27 |   A_inv = (XtX + D_tau_inv).inverse();
28 |   NumericVector mean(wrap(A_inv * XtY));
29 |   NumericMatrix Sigma(wrap(A_inv.array() * sigma2));
30 |   NumericMatrix beta = fast_rmvnorm_(1, mean, Sigma);
31 |   Eigen::VectorXd beta_eigen = as<Eigen::VectorXd >(beta);
32 |   double norm = (Y - X * beta_eigen).array().square().sum();
33 |   double betaDbeta = 0;
34 |   for (int i=0; i < p; i++){
35 |     betaDbeta += beta_eigen(i) * beta_eigen(i) / tau2(i);
36 |   }
37 |   return List::create(Named("beta") = beta.row(0), Named("norm") = norm,
38 |                       Named("betaDbeta") = betaDbeta);
39 | }
40 | 
41 | 
42 | // [[Rcpp::export]]
43 | List blassoconditional_coupled(const Eigen::VectorXd & Y, const Eigen::MatrixXd & X,
44 |                        const Eigen::VectorXd & XtY, const Eigen::MatrixXd & XtX,
45 |                        const NumericVector & tau21, const NumericVector & tau22, const double sigma21, const double sigma22){
46 |   int p = X.cols();
47 |   Eigen::MatrixXd D_tau_inv1(p, p);
48 |   D_tau_inv1.setIdentity();
49 |   for (int i=0; i < p; i++){
50 |     D_tau_inv1(i,i) /= tau21(i);
51 |   }
52 |   Eigen::MatrixXd A_inv1(p, p);
53 |   A_inv1 = (XtX + D_tau_inv1).inverse();
54 |   NumericVector mean1(wrap(A_inv1 * XtY));
55 |   NumericMatrix Sigma1(wrap(A_inv1.array() * sigma21));
56 |   Eigen::MatrixXd D_tau_inv2(p, p);
57 |   D_tau_inv2.setIdentity();
58 |   for (int i=0; i < p; i++){
59 |     D_tau_inv2(i,i) /= tau22(i);
60 |   }
61 |   Eigen::MatrixXd A_inv2(p, p);
62 |   A_inv2 = (XtX + D_tau_inv2).inverse();
63 |   NumericVector mean2(wrap(A_inv2 * XtY));
64 |   NumericMatrix Sigma2(wrap(A_inv2.array() * sigma22));
65 | 
66 | 
67 |   List betas_coupled = rmvnorm_max_coupling_(mean1, mean2, Sigma1, Sigma2);
68 |   NumericMatrix betas = betas_coupled["xy"];
69 |   // NumericMatrix beta = rmvnorm(1, mean, Sigma);
70 |   Eigen::VectorXd beta_eigen1(p);
71 |   Eigen::VectorXd beta_eigen2(p);
72 |   for (int i=0; i < p; i++){
73 |     beta_eigen1(i) = betas(i,0);
74 |     beta_eigen2(i) = betas(i,1);
75 |   }
76 |   // = as<Eigen::VectorXd >(betas.col(1));
77 |   // = as<Eigen::VectorXd >(betas.col(0));
78 |   double norm1 = (Y - X * beta_eigen1).array().square().sum();
79 |   double norm2 = (Y - X * beta_eigen2).array().square().sum();
80 |   double betaDbeta1 = 0.;
81 |   double betaDbeta2 = 0.;
82 |   for (int i=0; i < p; i++){
83 |     betaDbeta1 += beta_eigen1(i) * beta_eigen1(i) / tau21(i);
84 |     betaDbeta2 += beta_eigen2(i) * beta_eigen2(i) / tau22(i);
85 |   }
86 |   return List::create(Named("beta1") = betas(_,0), Named("beta2") = betas(_,1),
87 |                       Named("norm1") = norm1, Named("norm2") = norm2,
88 |                       Named("betaDbeta1") = betaDbeta1, Named("betaDbeta2") = betaDbeta2);
89 | }
90 | 


--------------------------------------------------------------------------------
/src/c_chains_to_measure.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | #include <math.h>
 3 | using namespace Rcpp;
 4 | 
 5 | // This function takes coupled chains, containing chains X (samples1) and Y (samples2)
 6 | // and returns a signed empirical measure with atoms and weights.
 7 | // lag is an integer >= 1.
 8 | 
 9 | // The chains are generated as follows:
10 | // X_0 ~ pi_0, then X_t | X_{t-1} ~ P(X_{t-1}, dot) for t=1,...,lag,
11 | // Y_0 ~ pi_0.
12 | 
13 | // Then they are coupled, to produce
14 | // (X_{t},Y_{t-lag})|(X_{t-1},Y_{t-lag-1}) ~ bar{P}((X_{t-1},Y_{t-lag-1}), dot)
15 | // for t = lag+1,...,max(m,tau)
16 | // The meeting time is denoted by tau; it is the smallest t such that X_{t} = Y_{t-lag}.
17 | 
18 | // Thus upon completion, we have
19 | // X_0,..., X_{max(m,tau)}  (of length 1+max(m, tau))
20 | // Y_0,..., Y_{max(m,tau)-lag} (of length 1+max(m, tau) - lag)
21 | 
22 | // The signed measure approximation, for a general lag, is given by
23 | // (MCMC):             (m-k+1)^{-1} \sum_{t=k}^m delta_{X_t}
24 | // (bias correction):  (m-k+1)^{-1} \sum_{t=k+lag}^{tau-1} (floor((t-k) / lag) - ceil(max(lag, t-m)/lag) + 1) {delta_{X_t} - delta_{Y_{t-lag}}}
25 | 
26 | // Thus total number of atoms is
27 | // m-k+1                    from the MCMC approximation
28 | // + 2 * max(0, (tau-1) - (k+lag) + 1) from the bias correction term
29 | //  the max above simplifies to max(0, tau - (k + lag))
30 | 
31 | // Of course some of the atoms are identical but we can deal with that separately,
32 | // by pruning the generated data set later.
33 | 
34 | // [[Rcpp::export]]
35 | List c_chains_to_measure_as_list_(const List & c_chains, int k, int m){
36 |   int meetingtime = c_chains["meetingtime"];
37 |   NumericMatrix samples1 = c_chains["samples1"];
38 |   NumericMatrix samples2 = c_chains["samples2"];
39 |   int lag = samples1.nrow() - samples2.nrow();
40 |   int size;
41 |   if (((meetingtime-1) - (k+lag) + 1) > 0){
42 |     size = (m - k + 1) + 2 * ((meetingtime-1) - (k+lag) + 1);
43 |   } else {
44 |     size = (m - k + 1);
45 |   }
46 |   // equal weight for each atom in the MCMC part
47 |   double eqweight = 1. / (double) (m - k + 1);
48 |   // number of columns in samples1
49 |   // (should be equal to number of columns in samples2)
50 |   int dimension = samples1.cols();
51 |   // matrix of all atoms
52 |   NumericMatrix atoms(size, dimension);
53 |   // vector of all weights
54 |   NumericVector weights(size);
55 |   // vector indicating which atoms are part of the MCMC measure and which are part of the bias correction
56 |   // TRUE refers to MCMC, FALSE to bias correction
57 |   LogicalVector whichpart(size);
58 |   // fill in MCMC part
59 |   for (int i = 0; i < (m-k+1); i ++){
60 |     atoms(i,_) = samples1(k+i,_);
61 |     weights(i) = eqweight;
62 |     whichpart(i) = true;
63 |   }
64 |   // index keeps track of which row to fill in "atoms"
65 |   int index = m-k+1;
66 |   // now bias correction part
67 |   if (((meetingtime-1) - (k+lag) + 1) > 0){
68 |     for (int time = k+lag; time <= meetingtime-1; time ++){
69 |       atoms(index,_) = samples1(time,_);
70 |       atoms(index+1,_) = samples2(time-lag,_);
71 |       // weights(index) = ceil(((double) time - k)/((double) lag));
72 |       //if ((m - k + 1) < weights(index)){
73 |       //  weights(index) = m - k + 1;
74 |       //}
75 |       weights(index) = floor(((double) time - k) / (double) lag) - ceil(std::max<double>(lag, (double) time - m)/ (double) lag) + 1.;
76 |       weights(index) = weights(index) * eqweight;
77 |       weights(index+1) = - weights(index);
78 |       whichpart(index) = false;
79 |       whichpart(index+1) = false;
80 |       index += 2;
81 |     }
82 |   }
83 |   return List::create(Named("atoms") = atoms, Named("weights") = weights, Named("MCMC") = whichpart);
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/src/estimator_bin.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | #include <math.h>
 3 | 
 4 | using namespace Rcpp;
 5 | using namespace std;
 6 | 
 7 | // given c_chains, a list produced by the function 'coupled_chains',
 8 | // returns estimator of probability of component being between lower and upper
 9 | // [[Rcpp::export]]
10 | double estimator_bin_(List c_chains, int component, double lower, double upper, int k, int m, int lag){
11 |   int meetingtime = c_chains["meetingtime"];
12 |   NumericMatrix samples1 = c_chains["samples1"];
13 |   NumericMatrix samples2 = c_chains["samples2"];
14 |   double estimator = 0;
15 |   for (int isample = k; isample <= m; isample ++){
16 |     if (samples1(isample,component-1) > lower && samples1(isample,component-1) < upper){
17 |       estimator += 1;
18 |     }
19 |   }
20 |   // next, add bias correction terms
21 |   if (meetingtime > k + lag){
22 |     double coefficient = 0.;
23 |     double increment = 0.;
24 |     for (int time = k+lag; time <= meetingtime-1; time ++){
25 |       increment = 0.;
26 |       coefficient = floor(((double) time - k) / (double) lag) - ceil(std::max<double>(lag, (double) time - m)/ (double) lag) + 1.;
27 |       // compute min between m - k + 1 and ceiling ((t - k) / L)
28 |       // coefficient = ceil(((double) time - k)/((double) lag));
29 |       // if ((m - k + 1) < coefficient){
30 |       //   coefficient = m - k + 1.;
31 |       // }
32 |       if (samples1(time,component-1) > lower && samples1(time,component-1) < upper){
33 |         increment += coefficient;
34 |       }
35 |       if (samples2(time-lag,component-1) > lower && samples2(time-lag,component-1) < upper){
36 |         increment -= coefficient;
37 |       }
38 |       estimator += increment;
39 |     }
40 |   }
41 |   return estimator / (m - k + 1.);
42 | }
43 | 


--------------------------------------------------------------------------------
/src/inversegaussian.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | 
 3 | using namespace Rcpp;
 4 | 
 5 | // [[Rcpp::export]]
 6 | NumericVector rinvgaussian_c(int n, double mu, double lambda){
 7 |   RNGScope scope;
 8 |   NumericVector results(n);
 9 |   GetRNGstate();
10 |   NumericVector nu = rnorm(n);
11 |   NumericVector z = runif(n);
12 |   PutRNGstate();
13 |   NumericVector x = nu * nu;
14 |   x = mu + mu*mu * x / (2. * lambda) - mu / (2. * lambda) * sqrt(4. * mu * lambda * x + mu*mu * x*x);
15 |   for (int i = 0; i < n; i++){
16 |     if (z(i) <= mu / (mu + x(i))){
17 |       results(i) = x(i);
18 |     } else {
19 |       results(i) = mu * mu / x(i);
20 |     }
21 |   }
22 |   return results;
23 | }
24 | 
25 | double dinvgaussian_c(double x, double mu, double lambda){
26 |   return 0.5 * log(lambda/6.283185) - 1.5 * log(x) - lambda * (x - mu) * (x - mu) / (2 * mu * mu * x);
27 | }
28 | 
29 | 
30 | // [[Rcpp::export]]
31 | NumericVector rinvgaussian_coupled_c(double mu1, double mu2, double lambda1, double lambda2){
32 |   RNGScope scope;
33 |   NumericVector results(2);
34 |   NumericVector x = rinvgaussian_c(1, mu1, lambda1);
35 |   GetRNGstate();
36 |   NumericVector u = runif(1);
37 |   PutRNGstate();
38 |   if (x(0) < 1e-20){
39 |     x(0) = 1e-20;
40 |   }
41 |   if (dinvgaussian_c(x(0), mu1, lambda1) + log(u(0)) < dinvgaussian_c(x(0), mu2, lambda2)){
42 |     results(0) = x(0);
43 |     results(1) = x(0);
44 |   } else {
45 |     bool reject = true;
46 |     NumericVector y(1);
47 |     while (reject){
48 |       y = rinvgaussian_c(1, mu2, lambda2);
49 |       if (y(0) < 1e-20){
50 |         y(0) = 1e-20;
51 |       }
52 |       GetRNGstate();
53 |       u = runif(1);
54 |       PutRNGstate();
55 |       reject = (dinvgaussian_c(y(0), mu2, lambda2) + log(u(0)) < dinvgaussian_c(y(0), mu1, lambda1));
56 |     }
57 |     results(0) = x(0);
58 |     results(1) = y(0);
59 |   }
60 | 
61 |   return results;
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/src/ising.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | #include <cmath>
 3 | using namespace Rcpp;
 4 | 
 5 | // given a square grid x = {x_i}, e.g. 32x32 or something
 6 | // with each x_i in {-1,+1}
 7 | // the following computes sum_{i~j} x_i x_j
 8 | // where i~j denotes the neighboring relation; here we use perodic boundary condition
 9 | // so (0,0) is neighbor with (32,0) in a 32x32 grid, for instance
10 | // [[Rcpp::export]]
11 | int ising_sum_(const IntegerMatrix & state){
12 |   int size = state.rows();
13 |   int s = 0;
14 |   // below, (i1,j1) will index a neighbor of (i,j)
15 |   int i1;
16 |   int j1;
17 |   for (int i = 0; i < size; i++){
18 |     if (i == (size - 1)){
19 |       i1 = 0;
20 |     } else {
21 |       i1 = i+1;
22 |     }
23 |     for (int j = 0; j < size; j++){
24 |       if (j == (size - 1)){
25 |         j1=0;
26 |       } else {
27 |         j1 = j+1;
28 |       }
29 |       // sums over two neighbors ("top right" neighbors
30 |       // of current location, if counting from bottom left corner)
31 |       s += state(i,j) * (state(i,j1) + state(i1,j));
32 |     }
33 |   }
34 |   return s;
35 | }
36 | 
37 | // given a square grid 'state'
38 | // and a vector proba_beta of values between 0 and 1
39 | // corresponding to probabilities of drawing a +1 given the neighbors sum to
40 | // -4, -2, 0, 2, 4 (respectively, so proba_beta should contain 5 values)
41 | // then the following functions perform a sweep over all locations in the grid and performs a flip
42 | // [[Rcpp::export]]
43 | IntegerMatrix ising_gibbs_sweep_(IntegerMatrix state, NumericVector proba_beta){
44 |   RNGScope scope;
45 |   int size = state.rows();
46 |   int s;
47 |   int itop, ibottom, jright, jleft;
48 |   for (int i = 0; i < size; i++){
49 |     for (int j = 0; j < size; j++){
50 |       s = 0;
51 |       itop = (i+1) % size;
52 |       ibottom = ((i + size - 1) % size);
53 |       jright = (j+1) % size;
54 |       jleft = (j + size - 1) % size;
55 |       s += state(itop, j) + state(ibottom, j) + state(i, jright) + state(i, jleft);
56 |       GetRNGstate();
57 |       state(i,j) = 2*((runif(1))(0) < proba_beta((s+4)/2)) - 1;
58 |       PutRNGstate();
59 |     }
60 |   }
61 |   return state;
62 | }
63 | 
64 | // coupled version of single-site Gibbs update,
65 | // where the strategy is to maximally couple each conditional update
66 | // [[Rcpp::export]]
67 | List ising_coupled_gibbs_sweep_(IntegerMatrix state1, IntegerMatrix state2, NumericVector proba_beta){
68 |   RNGScope scope;
69 |   int size = state1.rows();
70 |   int s1;
71 |   int s2;
72 |   int itop,ibottom,jright,jleft;
73 |   IntegerVector x(2);
74 |   for (int i = 0; i < size; i++){
75 |     for (int j = 0; j < size; j++){
76 |       s1 = 0;
77 |       s2 = 0;
78 |       itop = (i+1) % size;
79 |       ibottom = ((i + size - 1) % size);
80 |       jright = (j+1) % size;
81 |       jleft = (j + size - 1) % size;
82 |       s1 += state1(itop, j) + state1(ibottom, j) + state1(i, jright) + state1(i, jleft);
83 |       s2 += state2(itop, j) + state2(ibottom, j) + state2(i, jright) + state2(i, jleft);
84 |       GetRNGstate();
85 |       double u_ = (runif(1))(0);
86 |       PutRNGstate();
87 |       state1(i,j) = 2*(u_ < proba_beta((s1+4)/2)) - 1;
88 |       state2(i,j) = 2*(u_ < proba_beta((s2+4)/2)) - 1;
89 |     }
90 |   }
91 |   return List::create(Named("state1") = state1, Named("state2") = state2);
92 | }
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/src/logisticregression.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | using namespace Rcpp;
 3 | 
 4 | // [[Rcpp::export]]
 5 | NumericMatrix sigma_(const NumericMatrix & X, const NumericVector & w){
 6 |   int n = X.rows();
 7 |   int p = X.cols();
 8 |   NumericMatrix inv_sigma(p,p);
 9 |   for (int j1 = 0; j1 < p; j1 ++){
10 |     for (int j2 = j1; j2 < p; j2 ++){
11 |       inv_sigma(j1,j2) = 0;
12 |       for (int i = 0; i < n; i++){
13 |         inv_sigma(j1,j2) = inv_sigma(j1,j2) + X(i,j1) * X(i,j2) * w(i);
14 |       }
15 |     }
16 |   }
17 |   for (int j1 = 1; j1 < p; j1 ++){
18 |     for (int j2 = 0; j2 < j1; j2 ++){
19 |       inv_sigma(j1,j2) = inv_sigma(j2,j1);
20 |     }
21 |   }
22 |   return inv_sigma;
23 | }
24 | 
25 | // The following function computes m(omega) and Sigma(omega)... (or what we really need instead)
26 | // it returns m (= m(omega)), Sigma_inverse = Sigma(omega)^{-1},
27 | // as well as Cholesky_inverse and Cholesky that are such that
28 | // Cholesky_inverse is the lower triangular matrix L, in the decomposition Sigma^{-1} = L L^T
29 | // whereas Cholesky is the lower triangular matrix Ltilde in the decomposition Sigma = Ltilde^T Ltilde
30 | 
31 | // [[Rcpp::export]]
32 | List m_sigma_function_(const Eigen::Map<Eigen::MatrixXd>  & omega,
33 |                        const Eigen::Map<Eigen::MatrixXd>  & X,
34 |                        const Eigen::Map<Eigen::MatrixXd>  & invB,
35 |                        const Eigen::Map<Eigen::VectorXd>  & KTkappaplusinvBtimesb){
36 |   int n = X.rows();
37 |   int p = X.cols();
38 |   // The matrix A stores XT Omega X + B^{-1}, that is, Sigma^{-1}
39 |   Eigen::MatrixXd A(p,p);
40 |   for (int j1 = 0; j1 < p; j1 ++){
41 |     for (int j2 = j1; j2 < p; j2 ++){
42 |       A(j1,j2) = invB(j1, j2);
43 |       for (int i = 0; i < n; i++){
44 |         A(j1,j2) = A(j1,j2) + X(i,j1) * X(i,j2) * omega(i);
45 |       }
46 |       A(j2,j1) = A(j1,j2);
47 |     }
48 |   }
49 |   Eigen::LLT<Eigen::MatrixXd> lltofA(A);
50 |   Eigen::MatrixXd lower = lltofA.matrixL();
51 |   Eigen::VectorXd x = lltofA.solve(KTkappaplusinvBtimesb);
52 |   return List::create(Named("m")=x,
53 |                       Named("Sigma_inverse") = A,
54 |                       Named("Cholesky_inverse") = lower,
55 |                       Named("Cholesky") = lower.inverse());
56 | }
57 | 


--------------------------------------------------------------------------------
/src/logisticregressioncoupling.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | #include "RNG.h"
  3 | #include "PolyaGamma.h"
  4 | 
  5 | using namespace Rcpp;
  6 | 
  7 | 
  8 | // [[Rcpp::export]]
  9 | double logcosh(double x){
 10 |   double result = 0.;
 11 |   if (x > 0.){
 12 |     result = x + log(1.0 + exp(-2.0*x)) - 0.6931472;
 13 |   } else {
 14 |     result = -x + log(1.0 + exp(2.0*x)) - 0.6931472;
 15 |   }
 16 |   return result;
 17 | }
 18 | 
 19 | 
 20 | 
 21 | // [[Rcpp::export]]
 22 | NumericVector xbeta_(const NumericMatrix & X, const NumericVector & beta){
 23 |   int n = X.rows();
 24 |   NumericVector xbeta(n);
 25 |   for (int i = 0; i < n; i++){
 26 |     xbeta(i) = 0;
 27 |     for (int j = 0; j < X.cols(); j++){
 28 |       xbeta(i) = xbeta(i) + X(i,j) * beta(j);
 29 |     }
 30 |   }
 31 |   return(xbeta);
 32 | }
 33 | 
 34 | double rpg_devroye(int n, double z){
 35 |   RNG r;
 36 |   PolyaGamma pg(1);
 37 |   double x = pg.draw(n, z, r);
 38 |   return(x);
 39 | }
 40 | 
 41 | // [[Rcpp::export]]
 42 | NumericMatrix w_rejsamplerC(const NumericVector & beta1,
 43 |                             const NumericVector & beta2,
 44 |                             const NumericMatrix & X){
 45 |   RNGScope scope;
 46 |   int n = X.rows();
 47 |   NumericMatrix w(n,2);
 48 |   NumericVector z1s = abs(xbeta_(X, beta1));
 49 |   NumericVector z2s = abs(xbeta_(X, beta2));
 50 |   for(int i = 0; i < n; ++i){
 51 |     double z1 = z1s(i);
 52 |     double z2 = z2s(i);
 53 |     double z_min = std::min(z1,z2);
 54 |     double z_max = std::max(z1,z2);
 55 |     double w_max;
 56 |     double w_min = rpg_devroye(1,z_min);
 57 |     GetRNGstate();
 58 |     double log_u = log(runif(1,0,1)(0));
 59 |     PutRNGstate();
 60 |     double log_ratio = - 0.5 * w_min * (pow(z_max,2.)-pow(z_min,2.));
 61 |     if(log_u < log_ratio){
 62 |       w_max = w_min;
 63 |     } else {
 64 |       w_max = rpg_devroye(1,z_max);
 65 |     }
 66 |     if(z1<z2){
 67 |       w(i,0) = w_min;
 68 |       w(i,1) = w_max;
 69 |     } else {
 70 |       w(i,0) = w_max;
 71 |       w(i,1) = w_min;
 72 |     }
 73 |   }
 74 |   return w;
 75 | }
 76 | 
 77 | 
 78 | // [[Rcpp::export]]
 79 | NumericMatrix w_max_couplingC(const NumericVector & beta1,
 80 |                               const NumericVector & beta2,
 81 |                               const NumericMatrix & X){
 82 |   RNGScope scope;
 83 |   int n = X.rows();
 84 |   NumericMatrix w(n,2);
 85 |   NumericVector z1s = abs(xbeta_(X, beta1));
 86 |   NumericVector z2s = abs(xbeta_(X, beta2));
 87 |   for(int i = 0; i < n; ++i){
 88 |     double z1 = z1s(i);
 89 |     double z2 = z2s(i);
 90 |     double w1 = rpg_devroye(1,z1);
 91 |     w(i,0) = w1;
 92 |     double w2;
 93 |     GetRNGstate();
 94 |     double u1 = runif(1)(0);
 95 |     PutRNGstate();
 96 |     double logaccept1 = logcosh(z2/2.) - 0.5*z2*z2*w1 - (logcosh(z1/2.) - 0.5*z1*z1*w1);
 97 |     if(log(u1) <= logaccept1){
 98 |       w2 = w1;
 99 |     } else {
100 |       bool accept = FALSE;
101 |       while (accept==FALSE){
102 |         w2 = rpg_devroye(1,z2);
103 |         GetRNGstate();
104 |         double u2 = runif(1)(0);
105 |         PutRNGstate();
106 |         double logaccept2 = logcosh(z1/2.) - 0.5*z1*z1*w2 - (logcosh(z2/2.) - 0.5*z2*z2*w2);
107 |         if (log(u2) > logaccept2){
108 |           accept = TRUE;
109 |         }
110 |       }
111 |     }
112 |     w(i,1) = w2;
113 |   }
114 |   return w;
115 | }
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/src/mvnorm.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INCL_MVNORM_
 2 | #define _INCL_MVNORM_
 3 | #include <RcppEigen.h>
 4 | using namespace Rcpp;
 5 | 
 6 | // generate samples from a multivariate Normal distribution
 7 | NumericMatrix fast_rmvnorm_(int nsamples, const NumericVector & mean, const NumericMatrix & covariance);
 8 | NumericMatrix fast_rmvnorm_cholesky_(int nsamples, const NumericVector & mean, const Eigen::MatrixXd & cholesky);
 9 | 
10 | // evaluate probability density function of a multivariate Normal distribution
11 | NumericVector fast_dmvnorm_(const NumericMatrix & x, const NumericVector & mean, const NumericMatrix & covariance);
12 | NumericVector fast_dmvnorm_cholesky_inverse_(const NumericMatrix & x, const NumericVector & mean, const Eigen::MatrixXd & cholesky_inverse);
13 | 
14 | // Couplings --
15 | // The following functions sample from couplings of multivariate Normals.
16 | // They return a list with two entries:
17 | // "xy" is a matrix with two columns, one for x and one for y, which are Normally distributed vectors.
18 | // "identical" which is a boolean indicating whether x = y.
19 | 
20 | // sample from maximum coupling of two multivariate Normals
21 | Rcpp::List rmvnorm_max_coupling_(const NumericVector & mu1, const NumericVector & mu2, const NumericMatrix & Sigma1, const NumericMatrix & Sigma2);
22 | 
23 | 
24 |  // sample from maximum coupling of two multivariate Normals
25 | Rcpp::List rmvnorm_max_coupling_cholesky(const NumericVector & mu1, const NumericVector & mu2,
26 |                                              const Eigen::MatrixXd & Cholesky1, const Eigen::MatrixXd & Cholesky2,
27 |                                              const Eigen::MatrixXd & Cholesky_inverse1, const Eigen::MatrixXd & Cholesky_inverse2);
28 | 
29 | 
30 | // sample from reflection-maximum coupling of two multivariate Normals with common covariance matrix
31 | Rcpp::List rmvnorm_reflection_max_coupling_(const Eigen::VectorXd & mu1, const Eigen::VectorXd & mu2,
32 |                                 const Eigen::MatrixXd & Sigma_chol, const Eigen::MatrixXd & inv_Sigma_chol);
33 | 
34 | #endif
35 | 
36 | 


--------------------------------------------------------------------------------
/src/propensityscore.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | using namespace Rcpp;
 3 | 
 4 | // [[Rcpp::export]]
 5 | NumericVector beta2e_(const NumericVector & beta, const NumericMatrix & C){
 6 |   double betatimescovar;
 7 |   int nbeta = beta.size();
 8 |   int nobservations = C.nrow();
 9 |   NumericVector e(nobservations);
10 |   for (int i = 0; i < nobservations; i++){
11 |     betatimescovar = beta(0);
12 |     for (int j = 0; j < nbeta-1; j++){
13 |       betatimescovar += beta(j+1) * C(i,j);
14 |     }
15 |     e(i) = 1. / (1. + exp(-betatimescovar));
16 |   }
17 |   return e;
18 | }
19 | 
20 | // function that takes a vector of doubles
21 | // compute the quintiles
22 | // and return a vector of indicators of quintile memberships,
23 | // ie with values in {1,2,3,4,5}.
24 | // [[Rcpp::export]]
25 | IntegerVector cut_in_fifth_(const NumericVector & x){
26 |   NumericVector y = clone(x);
27 |   std::sort(y.begin(), y.end());
28 |   NumericVector q = NumericVector::create(0.2, 0.4, 0.6, 0.8);
29 |   int s = x.size();
30 |   int nqs = q.size();
31 |   NumericVector quintiles(nqs);
32 |   for (int i = 0; i < nqs; i ++){
33 |     quintiles(i) = y[(int) (s * q[i])];
34 |   }
35 |   IntegerVector indicators(s);
36 |   int indicator;
37 |   for (int i = 0; i < s; i ++){
38 |     indicator = 0;
39 |     while ((x(i) >= quintiles(indicator)) && (indicator < (nqs - 1))){
40 |       indicator ++;
41 |     }
42 |     if (x(i) >= quintiles(indicator)){ indicator ++; }
43 |     indicators(i) = indicator + 1; // so that the result is between 1 and 5.
44 |   }
45 |   return indicators;
46 | }
47 | 
48 | 
49 | // [[Rcpp::export]]
50 | NumericVector propensity_module2_loglik2_(NumericMatrix theta1s, NumericMatrix theta2s, const NumericVector & X, const NumericMatrix & C, const NumericVector & Y){
51 |   int n = theta1s.nrow();
52 |   NumericVector evals(n);
53 |   std::fill(evals.begin(), evals.end(), 0);
54 |   int theta_dim = 7;
55 |   double logpr, betatimescovar;
56 |   for (int itheta = 0; itheta < n; itheta++){
57 |     NumericVector beta = theta1s.row(itheta);
58 |     int nobservations = X.size();
59 |     NumericVector e = beta2e_(beta, C);
60 |     IntegerVector indicators = cut_in_fifth_(e);
61 |     NumericVector theta = theta2s.row(itheta);
62 |     double z;
63 |     for (int i = 0; i < nobservations; i++){
64 |       z = theta(0) + theta(1) * X(i);
65 |       if (indicators(i) > 1){
66 |         z = z + theta(0 + indicators(i));
67 |       }
68 |       logpr = - log(1 + exp(-z));
69 |       evals(itheta) += logpr + (1 - Y(i)) * (-z);
70 |     }
71 |   }
72 |   return evals;
73 | }
74 | 


--------------------------------------------------------------------------------
/src/prune.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | 
 3 | using namespace Rcpp;
 4 | 
 5 | // [[Rcpp::export]]
 6 | NumericMatrix prune_measure_(const NumericMatrix & df){
 7 |   // df is meant to have a first column with repeat numbers
 8 |   // second column made of 1 and 0, indicating (1) whether atom is part of MCMC part or bias correction part
 9 |   // third columns with weights
10 |   // and remaining columns (4, 5, ...) with atoms
11 |   // sorted in increasing/lexicographic order
12 |   NumericMatrix newdf(df.nrow(), df.ncol());
13 |   std::fill(newdf.begin(), newdf.end(), 0.);
14 |   // keep track of index of row in newdf in which to write
15 |   int index_row = 0;
16 |   // s will serve as a measure of difference between atoms
17 |   double s = 0;
18 |   newdf(0,_) = df(0,_);
19 |   // loop over rows of df
20 |   for (int i=1; i < df.nrow(); i++){
21 |     // do not prune if different repeat (column 0 contains repeat)
22 |     // if same repeat
23 |     if (df(i,0) == df(i-1,0)){
24 |       // sum absolute value of difference over each component of atom
25 |       s = 0;
26 |       for (int j = 3; j < df.ncol(); j++){
27 |         s += std::abs(df(i,j) - df(i-1,j));
28 |       }
29 |       // if this sum is very small then the atoms are considered equal
30 |       if (s < 1e-20){
31 |         // if so, add up the weights to current weight (column 2 contains weights)
32 |         newdf(index_row, 2)  += df(i, 2);
33 |       } else {
34 |         // new atom, add it to newdf
35 |         index_row++;
36 |         newdf(index_row,_) = df(i,_);
37 |       }
38 |     } else {
39 |       // new atom, add it to newdf
40 |       index_row++;
41 |       newdf(index_row,_) = df(i,_);
42 |     }
43 |   }
44 |   return newdf(Range(0,index_row),_);
45 | }
46 | 


--------------------------------------------------------------------------------
/src/sample_pairs01.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | 
 3 | using namespace Rcpp;
 4 | using namespace std;
 5 | 
 6 | // This function takes a vector of zeros and ones,
 7 | // with at least one zero and one one
 8 | // and samples uniformly among the indices pointing to zeros,
 9 | // and uniformly among the indices pointing to ones, and
10 | // outputs the pair
11 | // [[Rcpp::export]]
12 | IntegerVector sample_pair01(const NumericVector & selection){
13 |   RNGScope scope;
14 |   int l = selection.size();
15 |   int sumones = sum(selection);
16 |   IntegerVector indices(2);
17 |   indices(0) = -1;
18 |   indices(1) = -1;
19 |   GetRNGstate();
20 |   NumericVector us = runif(2);
21 |   PutRNGstate();
22 |   double u0 = us(0);
23 |   double u1 = us(1);
24 |   double w0 = 1. / (l - sumones);
25 |   double w1 = 1. / sumones;
26 |   double csw0 = 0.;
27 |   double csw1 = 0.;
28 |   for (int k = 0; k < l; k++){
29 |     if (selection(k) == 0){
30 |       csw0 += w0;
31 |       if (indices(0) < 0 && csw0 > u0){
32 |         indices(0) = k+1;
33 |       }
34 |     } else {
35 |       csw1 += w1;
36 |       if (indices(1) < 0 && csw1 > u1){
37 |         indices(1) = k+1;
38 |       }
39 |     }
40 |   }
41 |   return indices;
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/src/vs_mlikelihood.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppEigen.h>
 2 | using namespace Rcpp;
 3 | 
 4 | // [[Rcpp::export]]
 5 | double marginal_likelihood_c_2(Eigen::VectorXf selection, const Eigen::MatrixXf & X, const Eigen::VectorXf & Y, double Y2, double g){
 6 |   double l = 0.;
 7 |   int n = X.rows();
 8 |   int p = X.cols();
 9 |   int s = selection.sum();
10 |   Eigen::MatrixXf temp;
11 |   if (s > 0){
12 |     Eigen::MatrixXf Xselected(n,s);
13 |     int counter = 0;
14 |     for (int column = 0; column < p; column++){
15 |       if (selection(column)){
16 |         Xselected.col(counter) = X.col(column);
17 |         counter++;
18 |       }
19 |     }
20 |     temp = Y.transpose() * Xselected;
21 |     temp = temp * ((Xselected.transpose() * Xselected).inverse()) * temp.transpose();
22 |     l = temp(0,0);
23 |   } else {
24 |     l = 0.;
25 |   }
26 |   l = -((double) s + 1.) / 2. * log(g + 1.) - (double) n / 2. * log(Y2 - g / (g + 1.) * l);
27 |   return l;
28 | }
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/unbiasedmcmc.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.png
3 | *.R
4 | *files
5 | 


--------------------------------------------------------------------------------