├── .Rbuildignore ├── .gitignore ├── CRAN-SUBMISSION ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS ├── R ├── bayesboot.R └── plotPost.R ├── README.Rmd ├── README.md ├── bayesboot.Rproj ├── cran-comments.md ├── man ├── as.bayesboot.Rd ├── bayesboot.Rd ├── figures │ ├── README-car_plot-1.png │ ├── README-height_comparison-1.png │ ├── README-president_summary-1.png │ ├── plotPost1.jpg │ └── plotPost2.jpg ├── plot.bayesboot.Rd ├── plotPost.Rd ├── print.bayesboot.Rd ├── rudirichlet.Rd └── summary.bayesboot.Rd └── tests ├── testthat.R └── testthat ├── Rplots.pdf ├── test-bayesboot-deterministic.R └── test-bayesboot-non-deterministic.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | ^README-.*\.png$ 5 | ^testingground$ 6 | ^blogposts$ 7 | Rplots.pdf 8 | cran-comments.md 9 | ^README-files$ 10 | ^CRAN-SUBMISSION$ 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | testingground 5 | blogposts 6 | -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 0.2.3 2 | Date: 2025-03-30 22:01:24 UTC 3 | SHA: 0dbf2a95589f882cd4385b4c77a5823e84d8b81e 4 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bayesboot 2 | Type: Package 3 | Title: An Implementation of Rubin's (1981) Bayesian Bootstrap 4 | Version: 0.2.3 5 | Date: 2025-03-30 6 | Authors@R: person("Rasmus", "Bååth", email = "rasmus.baath@gmail.com", role = c("aut", "cre")) 7 | Description: Functions for performing the Bayesian bootstrap as introduced by 8 | Rubin (1981) and for summarizing the result. 9 | The implementation can handle both summary statistics that works on a 10 | weighted version of the data and summary statistics that works on a 11 | resampled data set. 12 | License: MIT + file LICENSE 13 | URL: https://github.com/rasmusab/bayesboot 14 | BugReports: https://github.com/rasmusab/bayesboot/issues 15 | RoxygenNote: 7.3.2 16 | Imports: plyr (>= 1.8.3), 17 | HDInterval(>= 0.1.1) 18 | Depends: R (>= 3.2.0) 19 | Suggests: 20 | testthat, 21 | foreach, 22 | doParallel, 23 | boot 24 | Encoding: UTF-8 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2016 2 | COPYRIGHT HOLDER: Rasmus Bååth 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(plot,bayesboot) 4 | S3method(print,bayesboot) 5 | S3method(print,summary.bayesboot) 6 | S3method(summary,bayesboot) 7 | export(as.bayesboot) 8 | export(bayesboot) 9 | export(plotPost) 10 | export(rudirichlet) 11 | import(grDevices) 12 | import(graphics) 13 | import(stats) 14 | import(utils) 15 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | bayesboot 0.2.3 2 | =================== 3 | 4 | * Fixed issue with Rd \link{} targets missing package anchors. 5 | 6 | bayesboot 0.2.2 7 | =================== 8 | 9 | * Fixed issue with unstated dependencies in the tests. 10 | 11 | bayesboot 0.2.1 12 | ======================= 13 | 14 | * Modified the tests so that they are compatible with upcoming version of the testthat package. 15 | 16 | bayesboot 0.2.0 17 | ======================= 18 | 19 | * Added print.bayesboot method. 20 | * bayesboot now warns when the posterior sample contains NAs, NaNs and NULLs. 21 | * plot.bayesboot now correctly resets graph parameters. 22 | * Removed bayesboot::hdi and instead depend on HDInterval::hdi . 23 | 24 | 25 | bayesboot 0.1.0 26 | ======================= 27 | 28 | * Initial release. 29 | -------------------------------------------------------------------------------- /R/bayesboot.R: -------------------------------------------------------------------------------- 1 | # Importing some of the base packages which are used in the functions below. 2 | #' @import stats 3 | #' @import grDevices 4 | #' @import graphics 5 | #' @import utils 6 | NULL 7 | 8 | #' Produce random draws from a uniform Dirichlet distribution 9 | #' 10 | #' \code{rudirichlet} produces \code{n} draws from a \code{d}-dimensional 11 | #' uniform Dirichlet distribution. Here "uniform" implies that any combination 12 | #' of values on the support of the distribution is equally likely, that is, the 13 | #' \eqn{\alpha} parameters to the Dirichlet distribution are all set to 1.0. 14 | #' 15 | #' In the context of the Bayesian bootstrap \code{rudirichlet} is used to 16 | #' produces the bootstrap weights. Therefore, \code{rudirichlet} can be used if 17 | #' you directly want to generate Bayesian bootstrap weights. 18 | #' 19 | #' 20 | #' @param n the number of draws. 21 | #' @param d the dimension of the Dirichlet distribution. 22 | #' 23 | #' @return An \code{n} by \code{d} matrix. 24 | #' @export 25 | #' 26 | #' @examples 27 | #' set.seed(123) 28 | #' rudirichlet(2, 3) 29 | #' # Should produce the following matrix: 30 | #' # [,1] [,2] [,3] 31 | #' # [1,] 0.30681 0.2097 0.4834 32 | #' # [2,] 0.07811 0.1390 0.7829 33 | #' 34 | #' # The above could be seen as a sample of two Bayesian bootstrap weights for a 35 | #' # dataset of size three. 36 | rudirichlet <- function(n, d) { 37 | # Using the facts that you can transform gamma distributed draws into 38 | # Dirichlet draws and that rgamma(n, 1) <==> rexp(n, 1) 39 | # See here for explanation: 40 | # https://en.wikipedia.org/wiki/Dirichlet_distribution#Random_number_generation#Gamma_distribution 41 | rexp.mat <- matrix( rexp(d * n, 1) , nrow = n, ncol = d) 42 | dirichlet.weights <- rexp.mat / rowSums(rexp.mat) 43 | dirichlet.weights 44 | } 45 | 46 | # From the examples on the help page for base::is.integer 47 | is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) { 48 | abs(x - round(x)) < tol 49 | } 50 | 51 | 52 | #' The Bayesian bootstrap 53 | #' 54 | #' Performs a Bayesian bootstrap and returns a \code{data.frame} with a sample 55 | #' of size \code{R} representing the posterior distribution of the (possibly 56 | #' multivariate) summary \code{statistic}. 57 | #' 58 | #' The summary statistic is a function of the data that represents a feature of 59 | #' interest, where a typical statistic is the mean. In \code{bayesboot} it is 60 | #' most efficient to define the statistic as a function taking the data as the 61 | #' first argument and a vector of weights as the second argument. An example of 62 | #' such a function is \code{\link{weighted.mean}}. Indicate that you are using a 63 | #' statistic defined in this way by setting \code{use.weights = TRUE}. 64 | #' 65 | #' It is also possible to define the statistic as a function only taking data 66 | #' (and no weights) by having \code{use.weights = FALSE} (the default). This 67 | #' will, for each of the \code{R} Bayesian bootstrap draws, give a resampled 68 | #' version of the \code{data} of size \code{R2} to \code{statistic}. This will 69 | #' be much slower than using \code{use.weights = TRUE} but will work with a 70 | #' larger range of statistics (the \code{\link{median}}, for example) 71 | #' 72 | #' For more information regarding this implementation of the Bayesian bootstrap 73 | #' see the blog post 74 | #' \href{https://www.sumsar.net/blog/2015/07/easy-bayesian-bootstrap-in-r/}{Easy 75 | #' Bayesian Bootstrap in R}. For more information about the model behind the 76 | #' Bayesian bootstrap see the blog post 77 | #' \href{https://www.sumsar.net/blog/2015/04/the-non-parametric-bootstrap-as-a-bayesian-model/}{The 78 | #' Non-parametric Bootstrap as a Bayesian Model} and, of course, 79 | #' \href{https://projecteuclid.org/euclid.aos/1176345338}{the original Bayesian 80 | #' bootstrap paper by Rubin (1981)}. 81 | #' 82 | #' @note \itemize{ 83 | #' \item While \code{R} and \code{R2} are set to \code{4000} by 84 | #' default, that should not be taken to indicate that a sample of size 4000 is 85 | #' sufficient nor recommended. 86 | #' 87 | #' \item When using \code{use.weights = FALSE} it is important to use a summary 88 | #' statistic that does not depend on the sample size. That is, doubling the size 89 | #' of a dataset by cloning data should result in the same statistic as when 90 | #' using the original dataset. An example of a statistic that depends on the 91 | #' sample size is the sample standard deviation (that is, \code{\link{sd}}), and 92 | #' when using \code{bayesboot} it would make more sense to use the population 93 | #' standard deviation (as in the example below). } 94 | #' 95 | #' @param data Either a vector or a list, or a matrix or a data.frame with one 96 | #' datapoint per row. The format of \code{data} should be compatible with the 97 | #' first argument of \code{statistic} 98 | #' @param statistic A function implementing the summary statistic of interest 99 | #' where the first argument should take the data. If \code{use.weights = TRUE} 100 | #' then the second argument should take a vector of weights. 101 | #' @param R The size of the posterior sample from the Bayesian bootstrap. 102 | #' @param R2 When \code{use.weights = FALSE} this is the size of the resample of 103 | #' the data used to approximate the weighted statistic. 104 | #' @param use.weights When \code{TRUE} the data will be reweighted, like in the 105 | #' original Bayesian bootstrap. When \code{FALSE} (the default) the 106 | #' reweighting will be approximated by resampling the data. 107 | #' @param .progress The type of progress bar ("none", "text", "tk", and "win"). 108 | #' See the \code{.progress} argument to \code{\link[plyr]{adply}} in the plyr 109 | #' package. 110 | #' @param .parallel If \code{TRUE} enables parallel processing. See the 111 | #' \code{.parallel} argument to \code{\link[plyr]{adply}} in the plyr package. 112 | #' @param ... Other arguments passed on to \code{statistic} 113 | #' 114 | #' @return A \code{data.frame} with \code{R} rows, each row being a draw from 115 | #' the posterior distribution of the Bayesian bootstrap. The number of columns 116 | #' is decided by the length of the output from \code{statistic}. If 117 | #' \code{statistic} does not return a vector or data frame with named values 118 | #' then the columns will be given the names \code{V1}, \code{V2}, \code{V3}, 119 | #' etc. While the output is a \code{data.frame} it has subclass 120 | #' \code{bayesboot} which enables specialized \code{\link{summary}} and 121 | #' \code{\link{plot}} functions for the result of a \code{bayesboot} call. 122 | #' 123 | #' @examples 124 | #' 125 | #' ### A Bayesian bootstrap analysis of a mean ### 126 | #' 127 | #' # Heights of the last ten American presidents in cm (Kennedy to Obama). 128 | #' heights <- c(183, 192, 182, 183, 177, 185, 188, 188, 182, 185); 129 | #' b1 <- bayesboot(heights, mean) 130 | #' # But it's more efficient to use the a weighted statistic. 131 | #' b2 <- bayesboot(heights, weighted.mean, use.weights = TRUE) 132 | #' 133 | #' # The result of bayesboot can be plotted and summarized 134 | #' plot(b2) 135 | #' summary(b2) 136 | #' 137 | #' # It can also be easily post processed. 138 | #' # Here the probability that the mean is > 182 cm. 139 | #' mean( b2[,1] > 182) 140 | #' 141 | #' ### A Bayesian bootstrap analysis of a SD ### 142 | #' 143 | #' # When use.weights = FALSE it is important that the summary statistics 144 | #' # does not change as a function of sample size. This is the case with 145 | #' # the sample standard deviation, so here we have to implement a 146 | #' # function calculating the population standard deviation. 147 | #' pop.sd <- function(x) { 148 | #' n <- length(x) 149 | #' sd(x) * sqrt( (n - 1) / n) 150 | #' } 151 | #' 152 | #' b3 <- bayesboot(heights, pop.sd) 153 | #' summary(b3) 154 | #' 155 | #' ### A Bayesian bootstrap analysis of a correlation coefficient ### 156 | #' 157 | #' # Data comparing two methods of measuring blood flow. 158 | #' # From Table 1 in Miller (1974) and used in an example 159 | #' # by Rubin (1981, p. 132). 160 | #' blood.flow <- data.frame( 161 | #' dye = c(1.15, 1.7, 1.42, 1.38, 2.80, 4.7, 4.8, 1.41, 3.9), 162 | #' efp = c(1.38, 1.72, 1.59, 1.47, 1.66, 3.45, 3.87, 1.31, 3.75)) 163 | #' 164 | #' # Using the weighted correlation (corr) from the boot package. 165 | #' library(boot) 166 | #' b4 <- bayesboot(blood.flow, corr, R = 1000, use.weights = TRUE) 167 | #' hist(b4[,1]) 168 | #' 169 | #' ### A Bayesian bootstrap analysis of lm coefficients ### 170 | #' 171 | #' # A custom function that returns the coefficients of 172 | #' # a weighted linear regression on the blood.flow data 173 | #' lm.coefs <- function(d, w) { 174 | #' coef( lm(efp ~ dye, data = d, weights = w) ) 175 | #' } 176 | #' 177 | #' b5 <- bayesboot(blood.flow, lm.coefs, R = 1000, use.weights = TRUE) 178 | #' 179 | #' # Plotting the marginal posteriors 180 | #' plot(b5) 181 | #' 182 | #' # Plotting a scatter of regression lines from the posterior 183 | #' plot(blood.flow) 184 | #' for(i in sample(nrow(b5), size = 20)) { 185 | #' abline(coef = b5[i, ], col = "grey") 186 | #' } 187 | #' @references 188 | #' Miller, R. G. (1974) The jackknife - a review. \emph{Biometrika}, 189 | #' \bold{61(1)}, 1--15. 190 | #' 191 | #' Rubin, D. B. (1981). The Bayesian bootstrap. \emph{The annals of statistics}, 192 | #' \bold{9(1)}, 130--134. 193 | #' @export 194 | bayesboot <- function(data, statistic, R = 4000, R2 = 4000, use.weights = FALSE, 195 | .progress = "none", .parallel = FALSE, ...) { 196 | call <- match.call() 197 | # Pick out the first part of statistic matching a legal variable name, 198 | # just to be used as a label when plotting later. 199 | statistic.label <- deparse(substitute(statistic)) 200 | match <- regexpr("^(\\w|\\.|_)*", statistic.label[1]) 201 | statistic.label <- regmatches(statistic.label[1], match) 202 | 203 | # Doing some error checks 204 | if(length(R) != 1 || !is.wholenumber(R) || R < 1) { 205 | stop("R should be a single whole number >= 1.") 206 | } 207 | 208 | if ((NROW(data) == 0) || is.null(NROW(data))) { 209 | stop("no data in call to 'bayesboot'") 210 | } 211 | 212 | if(use.weights) { 213 | if (length(formals(statistic)) < 2) { 214 | stop("If use.weights == TRUE then statistic should take a weight vector as the second argument.") 215 | } 216 | w <- rep(1 / NROW(data), NROW(data)) 217 | tryCatch(stat.result <- statistic(data, w, ...), 218 | error = function(e) { 219 | message("Applying the statistic on the original data and with uniform weights resulted in an error") 220 | stop(e) 221 | } 222 | ) 223 | # TODO: Should I add some more checks to stat.result? Like, that it contains no NA, values? 224 | # or should I maybe do these tests to the final posterior samples and issue a varning if 225 | # there are NAs, NULLs and similar? 226 | 227 | } else { # use.weights == FALSE 228 | if(length(R2) != 1 || is.na(R2) || !is.wholenumber(R2) || R2 < 1) { 229 | stop("If use.weights == FALSE then R2 should be a single whole number >= 1.") 230 | } 231 | tryCatch(stat.result <- statistic(data, ...), 232 | error = function(e) { 233 | message("Applying the statistic on the original data resulted in an error") 234 | stop(e) 235 | } 236 | ) 237 | } 238 | 239 | if(! (is.atomic(stat.result) || is.data.frame(stat.result) || is.matrix(stat.result)) && 240 | NROW(stat.result) != 1) { 241 | stop(paste( 242 | "Applying the statistic on the original data should return a vector, or a", 243 | "data.frame or matrix with one row, but an object with the following", 244 | "structure was returned instead:\n", 245 | paste(capture.output(str(stat.result)), collapse = "\n") 246 | )) 247 | } 248 | 249 | dirichlet.weights <- rudirichlet(R, NROW(data)) 250 | 251 | if(use.weights) { 252 | boot.sample <- plyr::adply( 253 | dirichlet.weights, 1, .progress = .progress, .parallel = .parallel, .id = NULL, 254 | .fun = function(weights) { 255 | statistic(data, weights, ...) 256 | } 257 | ) 258 | 259 | } else { 260 | if(is.null(dim(data)) || length(dim(data)) < 2) { # data is a list type of object 261 | boot.sample <- plyr::adply( 262 | dirichlet.weights, 1, .progress = .progress, .parallel = .parallel, .id = NULL, 263 | .fun = function(weights) { 264 | data.sample <- sample(data, size = R2, replace = TRUE, prob = weights) 265 | statistic(data.sample, ...) 266 | } 267 | ) 268 | } else { # assume data can be subsetted like a matrix or data.frame 269 | boot.sample <- plyr::adply( 270 | dirichlet.weights, 1, .progress = .progress, .parallel = .parallel, .id = NULL, 271 | .fun = function(weights) { 272 | index.sample <- sample(nrow(data), size = R2, replace = TRUE, prob = weights) 273 | statistic(data[index.sample, ,drop = FALSE], ...) 274 | } 275 | ) 276 | } 277 | } 278 | class(boot.sample) <- c("bayesboot", class(boot.sample)) 279 | attr(boot.sample, "statistic.label") <- statistic.label 280 | attr(boot.sample, "call") <- call 281 | # Warn if boot.sample contains "non-values". 282 | col.should.warn <- plyr::laply(boot.sample, function(boot.col) { 283 | any(is.na(boot.col) |is.nan(boot.col) | is.null(boot.col)) 284 | }) 285 | if(any(col.should.warn)) { 286 | warning(paste( 287 | "The sample from bayesboot contains either NAs, NaNs or NULLs.", 288 | "Make sure that your statistic function only return actual values.")) 289 | } 290 | boot.sample 291 | } 292 | 293 | 294 | 295 | #' Summarize the result of \code{bayesboot} 296 | #' 297 | #' Summarizes the result of a call to \code{bayesboot} by calculating means, SDs, 298 | #' highest density intervals and quantiles of the posterior marginals. 299 | #' 300 | #' @param object The bayesboot object to summarize. 301 | #' @param cred.mass The probability mass to include in the highest density intervals. 302 | #' @param ... Not used. 303 | #' 304 | #' @return A data frame with three columns: (1) \bold{statistic} the name of the 305 | #' statistic, (2) \bold{measure} the name of the summarizing measure, and (3) 306 | #' \bold{value} the value of the summarizing measure. 307 | #' 308 | #' @seealso \code{\link[HDInterval]{hdi}} in the HDInterval package for directly calculating 309 | #' highest density intervals from \code{bayesboot} objects. 310 | #' 311 | #' @export 312 | summary.bayesboot <- function(object, cred.mass = 0.95, ...) { 313 | bootsum <- plyr::ldply(seq_len(ncol(object)), function(i) { 314 | s <- object[,i] 315 | if(!is.numeric(s)) { 316 | warning(paste("The statistic", names(object)[i] , "was skipped as", 317 | "summary.bayesboot can't handle non-numeric statistics.")) 318 | return(data.frame()) 319 | } 320 | data.frame(statistic = names(object)[i], 321 | measure = c("mean", "sd", "hdi.low", "hdi.high","q2.5%", "q25%", "median" ,"q75%", "q97.5%"), 322 | value = c(mean(s), sd(s), HDInterval::hdi(s, cred.mass), quantile(s, c(0.025, 0.25, 0.5, 0.75, 0.975)))) 323 | }) 324 | attr(bootsum, "statistic.label") <- attr(object, "statistic.label") 325 | attr(bootsum, "call") <- attr(object, "call") 326 | attr(bootsum, "R") <- nrow(object) 327 | attr(bootsum, "cred.mass") <- cred.mass 328 | class(bootsum) <- c("summary.bayesboot", class(bootsum)) 329 | bootsum 330 | } 331 | 332 | #' Print the first number of draws from the Bayesian bootstrap 333 | #' 334 | #' @param x The bayesboot object to print. 335 | #' @param n The number of draws to print. 336 | #' @param ... Not used. 337 | #' @export 338 | print.bayesboot <- function(x, n = 10, ...) { 339 | cat(paste0("The first ", n," draws (out of ", nrow(x) ,") from the Bayesian bootstrap:\n")) 340 | cat("\n") 341 | print(as.data.frame(head(x, n))) 342 | cat(".. ...\n") 343 | cat("\n") 344 | cat("Use summary() to produce a summary of the posterior distribution.\n") 345 | invisible(x) 346 | } 347 | 348 | #' @method print summary.bayesboot 349 | #' @export 350 | print.summary.bayesboot <- function(x, ...) { 351 | stat.table <- plyr::ddply(x, "statistic", function(s) { 352 | stats <- s$value 353 | names(stats) <- s$measure 354 | stats 355 | }) 356 | cat("Bayesian bootstrap\n") 357 | cat("\n") 358 | cat("Number of posterior draws:", attr(x, "R") , "\n") 359 | cat("\n") 360 | if(nrow(x) > 0) { 361 | hdi.percentage <- paste0(round(attr(x, "cred.mass") * 100), "%") 362 | cat("Summary of the posterior (with", hdi.percentage,"Highest Density Intervals):\n") 363 | print(stat.table[,c("statistic","mean", "sd", "hdi.low", "hdi.high")], row.names = FALSE) 364 | cat("\n") 365 | cat("Quantiles:\n") 366 | print(stat.table[,c("statistic", "q2.5%", "q25%", "median" ,"q75%", "q97.5%")], row.names = FALSE) 367 | cat("\n") 368 | } 369 | cat("Call:\n", format(attr(x, "call"))) 370 | cat("\n") 371 | invisible(x) 372 | } 373 | 374 | #' Coerce to a \code{bayesboot} object 375 | #' 376 | #' This converts an object into a data frame and adds the class 377 | #' \code{bayesboot}. Doing this is only useful in the case you would want to use 378 | #' the \code{plot} and \code{summary} methods for \code{bayesboot} objects. 379 | #' 380 | #' @param object Any object that can be converted to a data frame. 381 | #' 382 | #' @return A \code{data.frame} with subclass \code{bayesboot}. 383 | #' @export 384 | as.bayesboot <- function(object) { 385 | object <- as.data.frame(object) 386 | class(object) <- c("bayesboot", class(object)) 387 | if(is.null(attr(object, "statistic.label"))) { 388 | attr(object, "statistic.label") <- "" 389 | } 390 | if(is.null(attr(object, "call"))) { 391 | attr(object, "call") <- "" 392 | } 393 | object 394 | } 395 | 396 | #' Plot the result of \code{bayesboot} 397 | #' 398 | #' Produces histograms showing the marginal posterior distributions from a 399 | #' \code{bayesboot} call. Uses the \code{\link{plotPost}} function to produce 400 | #' the individual histograms. 401 | #' 402 | #' @param x The bayesboot object to plot. 403 | #' @param cred.mass the probability mass to include in credible intervals, or 404 | #' NULL to suppress plotting of credible intervals. 405 | #' @param plots.per.page The maximum numbers of plots per page. 406 | #' @param cex,cex.lab,... Further parameters passed on to 407 | #' \code{\link{plotPost}}. 408 | #' 409 | #' @export 410 | plot.bayesboot <- function(x, cred.mass = 0.95, plots.per.page = 3, cex = 1.2, cex.lab=1.3, ...) { 411 | old.devAskNewPage <- devAskNewPage() 412 | old.par <- par(mfrow = c(min(plots.per.page, ncol(x)) , 1) , mar = c(4.1, 4.1, 0.5, 4.1), mgp = c(2.5, 1, 0)) 413 | on.exit({ # revert graphical parameters 414 | par(old.par) 415 | devAskNewPage(old.devAskNewPage) 416 | }) 417 | n.plots <- 0 418 | for(i in seq_len(ncol(x))) { 419 | if(!is.numeric(x[, i])) { 420 | warning(paste("The statistic", names(x)[i] , "was skipped as", 421 | "plot.bayesboot can't handle non-numeric statistics.")) 422 | next 423 | } 424 | n.plots <- n.plots + 1 425 | if(n.plots > plots.per.page) { 426 | devAskNewPage(TRUE) 427 | } 428 | if(ncol(x) == 1 && names(x)[i] == "V1" && attr(x, "statistic.label") != "") { 429 | # There is only one statistic and it has an uninformative default name 430 | # so use the begining of the function call instead as a statistic, unless 431 | # it is empty. 432 | statistic_name <- attr(x, "statistic.label") 433 | } else { # use the column name 434 | statistic_name <- names(x)[i] 435 | } 436 | plotPost(x[, i], credMass = cred.mass, xlab = statistic_name, cex = cex, cex.lab = cex.lab, ...) 437 | } 438 | invisible(NULL) 439 | } 440 | 441 | 442 | -------------------------------------------------------------------------------- /R/plotPost.R: -------------------------------------------------------------------------------- 1 | #' Graphic display of a posterior probability distribution 2 | #' 3 | #' Plot the posterior probability distribution for a single parameter from a 4 | #' vector of samples, typically from an MCMC process, with appropriate summary 5 | #' statistics. 6 | #' 7 | #' The data are plotted either as a histogram (above) or, if \code{showCurve = 8 | #' TRUE}, as a fitted kernel density curve (below). Either the mean or the mode 9 | #' of the distribution is displayed, depending on the parameter \code{showMode.} 10 | #' The Highest Density Interval (HDI) is shown as a horizontal bar, with labels 11 | #' for the ends of the interval. 12 | #' 13 | #' \if{html}{\figure{plotPost1.jpg} } 14 | #' \if{latex}{\figure{plotPost1.jpg}{options: width=5cm}} 15 | #' \cr 16 | #' \cr 17 | #' \if{html}{\figure{plotPost2.jpg} } 18 | #' \if{latex}{\figure{plotPost2.jpg}{options: width=5cm}} 19 | #' 20 | #' If values for a ROPE are supplied, these are shown as dark red vertical 21 | #' dashed lines, together with the percentage of probability mass within the 22 | #' ROPE. If a comparison value (\code{compVal}) is supplied, this is shown as a 23 | #' vertical green dotted line, together with the probability mass below and 24 | #' above this value. 25 | #' 26 | #' @param paramSampleVec A vector of samples drawn from the target distribution. 27 | #' @param credMass the probability mass to include in credible intervals, or 28 | #' NULL to suppress plotting of credible intervals. 29 | #' @param compVal a value for comparison with those plotted. 30 | #' @param ROPE a two element vector, such as \code{c(-1, 1)}, specifying the 31 | #' limits of the Region Of Practical Equivalence. 32 | #' @param HDItextPlace a value in [0,1] that controls the horizontal position of 33 | #' the labels at the ends of the HDI bar. 34 | #' @param showMode logical: if TRUE, the mode is displayed instead of the mean. 35 | #' @param showCurve logical: if TRUE, the posterior density will be represented 36 | #' by a kernel density function instead of a histogram. 37 | #' @param \dots graphical parameters and the \code{breaks} parameter for the 38 | #' histogram. 39 | #' @return Returns an object of class \code{histogram} invisibly. Used for its 40 | #' plotting side-effect. 41 | #' 42 | #' @note The origin of this function is 43 | #' \href{https://cran.r-project.org/package=BEST}{the BEST 44 | #' package} which is based on Kruschke(2015, 2013). 45 | #' 46 | #' @author John Kruschke, modified by Mike Meredith 47 | #' @seealso For details of the HDI calculation, see \code{\link[HDInterval]{hdi}}. 48 | #' @examples 49 | #' # Generate some data 50 | #' tst <- rnorm(1e5, 3, 1) 51 | #' plotPost(tst) 52 | #' plotPost(tst, col='wheat', border='magenta') 53 | #' plotPost(tst, credMass=0.8, ROPE=c(-1,1), xlab="Response variable") 54 | #' plotPost(tst, showMode=TRUE, showCurve=TRUE, compVal=5.5) 55 | #' 56 | #' # For integers: 57 | #' tst <- rpois(1e5, 12) 58 | #' plotPost(tst) 59 | #' 60 | #' # A severely bimodal distribution: 61 | #' tst2 <- c(rnorm(1e5), rnorm(5e4, 7)) 62 | #' plotPost(tst2) # A valid 95% CrI, but not HDI 63 | #' plotPost(tst2, showCurve=TRUE) # Correct 95% HDI 64 | #' @references 65 | #' Kruschke, J. K. (2015) \emph{Doing Bayesian data analysis, second 66 | #' edition: A tutorial with R, JAGS, and Stan.} Waltham, MA: Academic Press / 67 | #' Elsevier. 68 | #' 69 | #' Kruschke, J. K. (2013) Bayesian estimation supersedes the t test. 70 | #' \emph{Journal of Experimental Psychology: General}, \bold{142(2)}, 573. 71 | #' @export 72 | plotPost <- 73 | function(paramSampleVec, credMass = 0.95, compVal = NULL, ROPE = NULL, 74 | HDItextPlace = 0.7, showMode = FALSE, showCurve = FALSE, ...) { 75 | 76 | # Does a plot for a single parameter. Called by plot.BEST but also exported. 77 | # Returns a histogram object invisibly. 78 | # This stuff should be in the ... argument: 79 | # yaxt="n", ylab="", xlab="Parameter", main="", cex.lab=1.5, cex=1.4, 80 | # xlim=range(compVal, paramSampleVec), col="skyblue", border="white", 81 | # breaks=NULL 82 | 83 | # Deal with ... argument: 84 | dots <- list(...) 85 | if(length(dots) == 1 && is.list(dots[[1]])) 86 | dots <- dots[[1]] 87 | defaultArgs <- list(xlab=deparse(substitute(paramSampleVec)), 88 | yaxt="n", ylab="", main="", cex.lab=1.5, 89 | cex=1.4, col="skyblue", border="white", bty="n", lwd=5, freq=FALSE, 90 | xlim=range(compVal, HDInterval::hdi(paramSampleVec, 0.99))) 91 | useArgs <- modifyList(defaultArgs, dots) 92 | 93 | # Get breaks argument 94 | breaks <- dots$breaks 95 | if (is.null(breaks)) { 96 | if (all(paramSampleVec == round(paramSampleVec))) { # all integers 97 | breaks <- seq(min(paramSampleVec), max(paramSampleVec) + 1) - 0.5 98 | } else { 99 | by <- diff(HDInterval::hdi(paramSampleVec))/18 100 | breaks <- unique(c( seq( from=min(paramSampleVec), to=max(paramSampleVec), 101 | by=by), max(paramSampleVec) )) 102 | } 103 | } 104 | histinfo <- hist(paramSampleVec, breaks=breaks, plot=FALSE) 105 | histinfo$xname <- useArgs$xlab 106 | 107 | oldpar <- par(xpd=TRUE) ; on.exit(par(oldpar)) 108 | 109 | if (showCurve) { 110 | densCurve <- density( paramSampleVec, adjust=2 ) 111 | selPlot <- names(useArgs) %in% 112 | c(names(as.list(args(plot.default))), names(par(no.readonly=TRUE))) 113 | plotArgs <- useArgs[selPlot] 114 | plotArgs$x <- densCurve$x 115 | plotArgs$y <- densCurve$y 116 | plotArgs$type <- "l" 117 | plotArgs$xpd <- FALSE 118 | do.call(plot, plotArgs, quote=TRUE) 119 | abline(h=0, col='grey', xpd=FALSE) 120 | # Display the HDI. 121 | if(!is.null(credMass)) { 122 | HDI <- HDInterval::hdi(densCurve, credMass, allowSplit=TRUE) 123 | ht <- attr(HDI, "height") 124 | segments(HDI[, 1], ht, HDI[, 2], ht, lwd=4, lend='butt') 125 | segments(HDI, 0, HDI, ht, lty=2) 126 | text( mean(HDI), ht, bquote(.(100*credMass) * "% HDI" ), 127 | adj=c(.5,-1.7), cex=useArgs$cex ) 128 | text( HDI, ht, bquote(.(signif(HDI, 3))), 129 | pos=3, cex=useArgs$cex ) 130 | } 131 | } else { 132 | plot.histogram.args.names <- c("freq", "density", "angle", "border", 133 | "main", "sub", "xlab", "ylab", "xlim", "ylim", "axes", "labels", 134 | "add") # plot.histogram not exported, so need to cheat! 135 | selPlot <- names(useArgs) %in% 136 | c(plot.histogram.args.names, names(par(no.readonly=TRUE))) 137 | plotArgs <- useArgs[selPlot] 138 | plotArgs$lwd <- 1 139 | plotArgs$x <- histinfo 140 | do.call(plot, plotArgs, quote=TRUE) 141 | # Display the HDI. 142 | if(!is.null(credMass)) { 143 | HDI <- HDInterval::hdi( paramSampleVec, credMass ) 144 | lines(HDI, c(0,0), lwd=4, lend='butt') 145 | text( mean(HDI), 0, bquote(.(100*credMass) * "% HDI" ), 146 | adj=c(.5,-1.7), cex=useArgs$cex ) 147 | text( HDI[1], 0, bquote(.(signif(HDI[1],3))), 148 | adj=c(HDItextPlace,-0.5), cex=useArgs$cex ) 149 | text( HDI[2], 0, bquote(.(signif(HDI[2],3))), 150 | adj=c(1.0-HDItextPlace,-0.5), cex=useArgs$cex ) 151 | } 152 | } 153 | 154 | 155 | # Display mean or mode: 156 | cenTendHt <- 0.9 * max(histinfo$density) 157 | if ( showMode==FALSE ) { 158 | meanParam <- mean( paramSampleVec ) 159 | text( meanParam, cenTendHt, 160 | bquote(mean==.(signif(meanParam,3))), adj=c(.5,0), cex=useArgs$cex ) 161 | } else { 162 | dres <- density( paramSampleVec ) 163 | modeParam <- dres$x[which.max(dres$y)] 164 | text( modeParam, cenTendHt, 165 | bquote(mode==.(signif(modeParam,3))), adj=c(.5,0), cex=useArgs$cex ) 166 | } 167 | # Display the comparison value. 168 | if ( !is.null( compVal ) ) { 169 | cvHt <- 0.7 * max(histinfo$density) 170 | cvCol <- "darkgreen" 171 | pcgtCompVal <- round( 100 * sum( paramSampleVec > compVal ) 172 | / length( paramSampleVec ) , 1 ) 173 | pcltCompVal <- 100 - pcgtCompVal 174 | lines( c(compVal,compVal), c(0.96*cvHt,0), 175 | lty="dotted", lwd=1, col=cvCol ) 176 | text( compVal, cvHt, 177 | bquote( .(pcltCompVal)*"% < " * 178 | .(signif(compVal,3)) * " < "*.(pcgtCompVal)*"%" ), 179 | adj=c(pcltCompVal/100,0), cex=0.8*useArgs$cex, col=cvCol ) 180 | } 181 | # Display the ROPE. 182 | if ( !is.null( ROPE ) ) { 183 | ROPEtextHt <- 0.55 * max(histinfo$density) 184 | ropeCol <- "darkred" 185 | pcInROPE <- ( sum( paramSampleVec > ROPE[1] & paramSampleVec < ROPE[2] ) 186 | / length( paramSampleVec ) ) 187 | lines( c(ROPE[1],ROPE[1]), c(0.96*ROPEtextHt,0), lty="dotted", lwd=2, 188 | col=ropeCol ) 189 | lines( c(ROPE[2],ROPE[2]), c(0.96*ROPEtextHt,0), lty="dotted", lwd=2, 190 | col=ropeCol) 191 | text( mean(ROPE), ROPEtextHt, 192 | bquote( .(round(100*pcInROPE))*"% in ROPE" ), 193 | adj=c(.5,0), cex=1, col=ropeCol ) 194 | } 195 | 196 | return(invisible(histinfo)) 197 | } 198 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: markdown_github 5 | --- 6 | 7 | ```{r, echo = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "##", 11 | fig.path = "man/figures/README-" 12 | ) 13 | set.seed(123) 14 | ``` 15 | 16 | `bayesboot`: Easy Bayesian Bootstrap in R 17 | ======================================== 18 | 19 | The `bayesboot` package implements a function `bayesboot` that performs the Bayesian bootstrap introduced by Rubin (1981). The implementation can both handle summary statistics that works on a weighted version of the data or that works on a resampled data set. 20 | 21 | `bayesboot` is available on CRAN and can be installed in the usual way: 22 | 23 | ```{r eval=FALSE} 24 | install.packages("bayesboot") 25 | ``` 26 | 27 | A simple example 28 | --------------------- 29 | 30 | Here is a Bayesian bootstrap analysis of the mean height of the last ten American presidents: 31 | 32 | ```{r} 33 | # Heights of the last ten American presidents in cm (Kennedy to Obama). 34 | heights <- c(183, 192, 182, 183, 177, 185, 188, 188, 182, 185) 35 | 36 | library(bayesboot) 37 | b1 <- bayesboot(heights, mean) 38 | ``` 39 | 40 | The resulting posterior distribution in `b1` can now be `plot`ted and `summary`ized: 41 | 42 | ```{r president_summary, fig.height= 4, fig.width=5} 43 | summary(b1) 44 | plot(b1) 45 | ``` 46 | 47 | While it is possible to use a summary statistic that works on a resample of the original data, it is more efficient if it's possible to use a summary statistic that works on a *reweighting* of the original dataset. Instead of using `mean` above it would be better to use `weighted.mean` like this: 48 | 49 | ```{r} 50 | b2 <- bayesboot(heights, weighted.mean, use.weights = TRUE) 51 | ``` 52 | 53 | The result of a call to `bayesboot` will always result in a `data.frame` with one column per dimension of the summary statistic. If the summary statistic does not return a named vector the columns will be called `V1`, `V2`, etc. The result of a `bayesboot` call can be further inspected and post processed. For example: 54 | 55 | ```{r} 56 | # Given the model and the data, this is the probability that the mean 57 | # heights of American presidents is above the mean heights of 58 | # American males as given by www.cdc.gov/nchs/data/series/sr_11/sr11_252.pdf 59 | mean(c(b2[,1] > 175.9, TRUE, FALSE)) 60 | ``` 61 | 62 | ### Comparing two groups 63 | 64 | If we want to compare the means of two groups, we will have to call `bayesboot` twice with each dataset and then use the resulting samples to calculate the posterior difference. For example, let's say we have the heights of the opponents that lost to the presidents in `height` the first time those presidents were elected. Now we are interested in comparing the mean height of American presidents with the mean height of presidential candidates that lost. 65 | 66 | ```{r height_comparison, fig.height= 4, fig.width=5} 67 | # The heights of oponents of American presidents (first time they were elected). 68 | # From Richard Nixon to John McCain 69 | heights_opponents <- c(182, 180, 180, 183, 177, 173, 188, 185, 175) 70 | 71 | # Running the Bayesian bootstrap for both datasets 72 | b_presidents <- bayesboot(heights, weighted.mean, use.weights = TRUE) 73 | b_opponents <- bayesboot(heights_opponents, weighted.mean, use.weights = TRUE) 74 | 75 | # Calculating the posterior difference 76 | # (here converting back to a bayesboot object for pretty plotting) 77 | b_diff <- as.bayesboot(b_presidents - b_opponents) 78 | plot(b_diff) 79 | ``` 80 | 81 | So there is some evidence that loosing opponents could be shorter. (Though, I must add that it is quite unclear what the purpose really is with analyzing the heights of presidents and opponents...) 82 | 83 | A more advanced example 84 | ---------------------------- 85 | 86 | A slightly more complicated example, where we do Bayesian bootstrap analysis of LOESS regression applied to the `cars` dataset on the speed of cars and the resulting distance it takes to stop. The `loess` function returns, among other things, a vector of `fitted` *y* values, one value for each *x* value in the data. These *y* values define the smoothed LOESS line and is what you would usually plot after having fitted a LOESS. Now we want to use the Bayesian bootstrap to gauge the uncertainty in the LOESS line. As the `loess` function accepts weighted data, we'll simply create a function that takes the data with weights and returns the `fitted` *y* values. We'll then plug that function into `bayesboot`: 87 | 88 | ```{r} 89 | boot_fn <- function(cars, weights) { 90 | loess(dist ~ speed, cars, weights = weights)$fitted 91 | } 92 | 93 | bb_loess <- bayesboot(cars, boot_fn, use.weights = TRUE) 94 | ``` 95 | 96 | To plot this takes a couple of lines more: 97 | 98 | ```{r car_plot} 99 | # Plotting the data 100 | plot(cars$speed, cars$dist, pch = 20, col = "tomato4", xlab = "Car speed in mph", 101 | ylab = "Stopping distance in ft", main = "Speed and Stopping distances of Cars") 102 | 103 | # Plotting a scatter of Bootstrapped LOESS lines to represent the uncertainty. 104 | for(i in sample(nrow(bb_loess), 20)) { 105 | lines(cars$speed, bb_loess[i,], col = "gray") 106 | } 107 | # Finally plotting the posterior mean LOESS line 108 | lines(cars$speed, colMeans(bb_loess, na.rm = TRUE), type ="l", 109 | col = "tomato", lwd = 4) 110 | ``` 111 | 112 | 113 | More information 114 | ----------------------- 115 | 116 | For more information on the Bayesian bootstrap see [Rubin's (1981) original paper](https://projecteuclid.org/euclid.aos/1176345338) and my blog post [The Non-parametric Bootstrap as a Bayesian Model](https://www.sumsar.net/blog/2015/04/the-non-parametric-bootstrap-as-a-bayesian-model/). The implementation of `bayesboot` is similar to the function outlined in the blog post [Easy Bayesian Bootstrap in R](https://www.sumsar.net/blog/2015/07/easy-bayesian-bootstrap-in-r/), but the interface is slightly different. 117 | 118 | References 119 | ---------------- 120 | 121 | Rubin, D. B. (1981). The Bayesian bootstrap. *The annals of statistics*, 9(1), 130--134. [link to paper](https://projecteuclid.org/euclid.aos/1176345338) 122 | 123 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `bayesboot`: Easy Bayesian Bootstrap in R 2 | 3 | The `bayesboot` package implements a function `bayesboot` that performs 4 | the Bayesian bootstrap introduced by Rubin (1981). The implementation 5 | can both handle summary statistics that works on a weighted version of 6 | the data or that works on a resampled data set. 7 | 8 | `bayesboot` is available on CRAN and can be installed in the usual way: 9 | 10 | ``` r 11 | install.packages("bayesboot") 12 | ``` 13 | 14 | ## A simple example 15 | 16 | Here is a Bayesian bootstrap analysis of the mean height of the last ten 17 | American presidents: 18 | 19 | ``` r 20 | # Heights of the last ten American presidents in cm (Kennedy to Obama). 21 | heights <- c(183, 192, 182, 183, 177, 185, 188, 188, 182, 185) 22 | 23 | library(bayesboot) 24 | b1 <- bayesboot(heights, mean) 25 | ``` 26 | 27 | The resulting posterior distribution in `b1` can now be `plot`ted and 28 | `summary`ized: 29 | 30 | ``` r 31 | summary(b1) 32 | ## Bayesian bootstrap 33 | ## 34 | ## Number of posterior draws: 4000 35 | ## 36 | ## Summary of the posterior (with 95% Highest Density Intervals): 37 | ## statistic mean sd hdi.low hdi.high 38 | ## V1 184.4979 1.154456 182.3328 186.909 39 | ## 40 | ## Quantiles: 41 | ## statistic q2.5% q25% median q75% q97.5% 42 | ## V1 182.2621 183.742 184.4664 185.2378 186.8737 43 | ## 44 | ## Call: 45 | ## bayesboot(data = heights, statistic = mean) 46 | plot(b1) 47 | ``` 48 | 49 | ![](man/figures/README-president_summary-1.png) 50 | 51 | While it is possible to use a summary statistic that works on a resample 52 | of the original data, it is more efficient if it’s possible to use a 53 | summary statistic that works on a *reweighting* of the original dataset. 54 | Instead of using `mean` above it would be better to use `weighted.mean` 55 | like this: 56 | 57 | ``` r 58 | b2 <- bayesboot(heights, weighted.mean, use.weights = TRUE) 59 | ``` 60 | 61 | The result of a call to `bayesboot` will always result in a `data.frame` 62 | with one column per dimension of the summary statistic. If the summary 63 | statistic does not return a named vector the columns will be called 64 | `V1`, `V2`, etc. The result of a `bayesboot` call can be further 65 | inspected and post processed. For example: 66 | 67 | ``` r 68 | # Given the model and the data, this is the probability that the mean 69 | # heights of American presidents is above the mean heights of 70 | # American males as given by www.cdc.gov/nchs/data/series/sr_11/sr11_252.pdf 71 | mean(c(b2[,1] > 175.9, TRUE, FALSE)) 72 | ## [1] 0.9997501 73 | ``` 74 | 75 | ### Comparing two groups 76 | 77 | If we want to compare the means of two groups, we will have to call 78 | `bayesboot` twice with each dataset and then use the resulting samples 79 | to calculate the posterior difference. For example, let’s say we have 80 | the heights of the opponents that lost to the presidents in `height` the 81 | first time those presidents were elected. Now we are interested in 82 | comparing the mean height of American presidents with the mean height of 83 | presidential candidates that lost. 84 | 85 | ``` r 86 | # The heights of oponents of American presidents (first time they were elected). 87 | # From Richard Nixon to John McCain 88 | heights_opponents <- c(182, 180, 180, 183, 177, 173, 188, 185, 175) 89 | 90 | # Running the Bayesian bootstrap for both datasets 91 | b_presidents <- bayesboot(heights, weighted.mean, use.weights = TRUE) 92 | b_opponents <- bayesboot(heights_opponents, weighted.mean, use.weights = TRUE) 93 | 94 | # Calculating the posterior difference 95 | # (here converting back to a bayesboot object for pretty plotting) 96 | b_diff <- as.bayesboot(b_presidents - b_opponents) 97 | plot(b_diff) 98 | ``` 99 | 100 | ![](man/figures/README-height_comparison-1.png) 101 | 102 | So there is some evidence that loosing opponents could be shorter. 103 | (Though, I must add that it is quite unclear what the purpose really is 104 | with analyzing the heights of presidents and opponents…) 105 | 106 | ## A more advanced example 107 | 108 | A slightly more complicated example, where we do Bayesian bootstrap 109 | analysis of LOESS regression applied to the `cars` dataset on the speed 110 | of cars and the resulting distance it takes to stop. The `loess` 111 | function returns, among other things, a vector of `fitted` *y* values, 112 | one value for each *x* value in the data. These *y* values define the 113 | smoothed LOESS line and is what you would usually plot after having 114 | fitted a LOESS. Now we want to use the Bayesian bootstrap to gauge the 115 | uncertainty in the LOESS line. As the `loess` function accepts weighted 116 | data, we’ll simply create a function that takes the data with weights 117 | and returns the `fitted` *y* values. We’ll then plug that function into 118 | `bayesboot`: 119 | 120 | ``` r 121 | boot_fn <- function(cars, weights) { 122 | loess(dist ~ speed, cars, weights = weights)$fitted 123 | } 124 | 125 | bb_loess <- bayesboot(cars, boot_fn, use.weights = TRUE) 126 | ``` 127 | 128 | To plot this takes a couple of lines more: 129 | 130 | ``` r 131 | # Plotting the data 132 | plot(cars$speed, cars$dist, pch = 20, col = "tomato4", xlab = "Car speed in mph", 133 | ylab = "Stopping distance in ft", main = "Speed and Stopping distances of Cars") 134 | 135 | # Plotting a scatter of Bootstrapped LOESS lines to represent the uncertainty. 136 | for(i in sample(nrow(bb_loess), 20)) { 137 | lines(cars$speed, bb_loess[i,], col = "gray") 138 | } 139 | # Finally plotting the posterior mean LOESS line 140 | lines(cars$speed, colMeans(bb_loess, na.rm = TRUE), type ="l", 141 | col = "tomato", lwd = 4) 142 | ``` 143 | 144 | ![](man/figures/README-car_plot-1.png) 145 | 146 | ## More information 147 | 148 | For more information on the Bayesian bootstrap see [Rubin’s (1981) 149 | original paper](https://projecteuclid.org/euclid.aos/1176345338) and my 150 | blog post [The Non-parametric Bootstrap as a Bayesian 151 | Model](https://www.sumsar.net/blog/2015/04/the-non-parametric-bootstrap-as-a-bayesian-model/). 152 | The implementation of `bayesboot` is similar to the function outlined in 153 | the blog post [Easy Bayesian Bootstrap in 154 | R](https://www.sumsar.net/blog/2015/07/easy-bayesian-bootstrap-in-r/), 155 | but the interface is slightly different. 156 | 157 | ## References 158 | 159 | Rubin, D. B. (1981). The Bayesian bootstrap. *The annals of statistics*, 160 | 9(1), 130–134. [link to 161 | paper](https://projecteuclid.org/euclid.aos/1176345338) 162 | -------------------------------------------------------------------------------- /bayesboot.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageCheckArgs: --as-cran 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 0 notes 4 | 5 | Note: This previous NOTE, has now been resolved: 6 | 7 | > Please provide package anchors for all Rd \link{} targets not in the 8 | > package itself and the base packages. 9 | 10 | ## Downstream dependencies 11 | 12 | This package currently has no downstream dependencies 13 | -------------------------------------------------------------------------------- /man/as.bayesboot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bayesboot.R 3 | \name{as.bayesboot} 4 | \alias{as.bayesboot} 5 | \title{Coerce to a \code{bayesboot} object} 6 | \usage{ 7 | as.bayesboot(object) 8 | } 9 | \arguments{ 10 | \item{object}{Any object that can be converted to a data frame.} 11 | } 12 | \value{ 13 | A \code{data.frame} with subclass \code{bayesboot}. 14 | } 15 | \description{ 16 | This converts an object into a data frame and adds the class 17 | \code{bayesboot}. Doing this is only useful in the case you would want to use 18 | the \code{plot} and \code{summary} methods for \code{bayesboot} objects. 19 | } 20 | -------------------------------------------------------------------------------- /man/bayesboot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bayesboot.R 3 | \name{bayesboot} 4 | \alias{bayesboot} 5 | \title{The Bayesian bootstrap} 6 | \usage{ 7 | bayesboot( 8 | data, 9 | statistic, 10 | R = 4000, 11 | R2 = 4000, 12 | use.weights = FALSE, 13 | .progress = "none", 14 | .parallel = FALSE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{data}{Either a vector or a list, or a matrix or a data.frame with one 20 | datapoint per row. The format of \code{data} should be compatible with the 21 | first argument of \code{statistic}} 22 | 23 | \item{statistic}{A function implementing the summary statistic of interest 24 | where the first argument should take the data. If \code{use.weights = TRUE} 25 | then the second argument should take a vector of weights.} 26 | 27 | \item{R}{The size of the posterior sample from the Bayesian bootstrap.} 28 | 29 | \item{R2}{When \code{use.weights = FALSE} this is the size of the resample of 30 | the data used to approximate the weighted statistic.} 31 | 32 | \item{use.weights}{When \code{TRUE} the data will be reweighted, like in the 33 | original Bayesian bootstrap. When \code{FALSE} (the default) the 34 | reweighting will be approximated by resampling the data.} 35 | 36 | \item{.progress}{The type of progress bar ("none", "text", "tk", and "win"). 37 | See the \code{.progress} argument to \code{\link[plyr]{adply}} in the plyr 38 | package.} 39 | 40 | \item{.parallel}{If \code{TRUE} enables parallel processing. See the 41 | \code{.parallel} argument to \code{\link[plyr]{adply}} in the plyr package.} 42 | 43 | \item{...}{Other arguments passed on to \code{statistic}} 44 | } 45 | \value{ 46 | A \code{data.frame} with \code{R} rows, each row being a draw from 47 | the posterior distribution of the Bayesian bootstrap. The number of columns 48 | is decided by the length of the output from \code{statistic}. If 49 | \code{statistic} does not return a vector or data frame with named values 50 | then the columns will be given the names \code{V1}, \code{V2}, \code{V3}, 51 | etc. While the output is a \code{data.frame} it has subclass 52 | \code{bayesboot} which enables specialized \code{\link{summary}} and 53 | \code{\link{plot}} functions for the result of a \code{bayesboot} call. 54 | } 55 | \description{ 56 | Performs a Bayesian bootstrap and returns a \code{data.frame} with a sample 57 | of size \code{R} representing the posterior distribution of the (possibly 58 | multivariate) summary \code{statistic}. 59 | } 60 | \details{ 61 | The summary statistic is a function of the data that represents a feature of 62 | interest, where a typical statistic is the mean. In \code{bayesboot} it is 63 | most efficient to define the statistic as a function taking the data as the 64 | first argument and a vector of weights as the second argument. An example of 65 | such a function is \code{\link{weighted.mean}}. Indicate that you are using a 66 | statistic defined in this way by setting \code{use.weights = TRUE}. 67 | 68 | It is also possible to define the statistic as a function only taking data 69 | (and no weights) by having \code{use.weights = FALSE} (the default). This 70 | will, for each of the \code{R} Bayesian bootstrap draws, give a resampled 71 | version of the \code{data} of size \code{R2} to \code{statistic}. This will 72 | be much slower than using \code{use.weights = TRUE} but will work with a 73 | larger range of statistics (the \code{\link{median}}, for example) 74 | 75 | For more information regarding this implementation of the Bayesian bootstrap 76 | see the blog post 77 | \href{https://www.sumsar.net/blog/2015/07/easy-bayesian-bootstrap-in-r/}{Easy 78 | Bayesian Bootstrap in R}. For more information about the model behind the 79 | Bayesian bootstrap see the blog post 80 | \href{https://www.sumsar.net/blog/2015/04/the-non-parametric-bootstrap-as-a-bayesian-model/}{The 81 | Non-parametric Bootstrap as a Bayesian Model} and, of course, 82 | \href{https://projecteuclid.org/euclid.aos/1176345338}{the original Bayesian 83 | bootstrap paper by Rubin (1981)}. 84 | } 85 | \note{ 86 | \itemize{ 87 | \item While \code{R} and \code{R2} are set to \code{4000} by 88 | default, that should not be taken to indicate that a sample of size 4000 is 89 | sufficient nor recommended. 90 | 91 | \item When using \code{use.weights = FALSE} it is important to use a summary 92 | statistic that does not depend on the sample size. That is, doubling the size 93 | of a dataset by cloning data should result in the same statistic as when 94 | using the original dataset. An example of a statistic that depends on the 95 | sample size is the sample standard deviation (that is, \code{\link{sd}}), and 96 | when using \code{bayesboot} it would make more sense to use the population 97 | standard deviation (as in the example below). } 98 | } 99 | \examples{ 100 | 101 | ### A Bayesian bootstrap analysis of a mean ### 102 | 103 | # Heights of the last ten American presidents in cm (Kennedy to Obama). 104 | heights <- c(183, 192, 182, 183, 177, 185, 188, 188, 182, 185); 105 | b1 <- bayesboot(heights, mean) 106 | # But it's more efficient to use the a weighted statistic. 107 | b2 <- bayesboot(heights, weighted.mean, use.weights = TRUE) 108 | 109 | # The result of bayesboot can be plotted and summarized 110 | plot(b2) 111 | summary(b2) 112 | 113 | # It can also be easily post processed. 114 | # Here the probability that the mean is > 182 cm. 115 | mean( b2[,1] > 182) 116 | 117 | ### A Bayesian bootstrap analysis of a SD ### 118 | 119 | # When use.weights = FALSE it is important that the summary statistics 120 | # does not change as a function of sample size. This is the case with 121 | # the sample standard deviation, so here we have to implement a 122 | # function calculating the population standard deviation. 123 | pop.sd <- function(x) { 124 | n <- length(x) 125 | sd(x) * sqrt( (n - 1) / n) 126 | } 127 | 128 | b3 <- bayesboot(heights, pop.sd) 129 | summary(b3) 130 | 131 | ### A Bayesian bootstrap analysis of a correlation coefficient ### 132 | 133 | # Data comparing two methods of measuring blood flow. 134 | # From Table 1 in Miller (1974) and used in an example 135 | # by Rubin (1981, p. 132). 136 | blood.flow <- data.frame( 137 | dye = c(1.15, 1.7, 1.42, 1.38, 2.80, 4.7, 4.8, 1.41, 3.9), 138 | efp = c(1.38, 1.72, 1.59, 1.47, 1.66, 3.45, 3.87, 1.31, 3.75)) 139 | 140 | # Using the weighted correlation (corr) from the boot package. 141 | library(boot) 142 | b4 <- bayesboot(blood.flow, corr, R = 1000, use.weights = TRUE) 143 | hist(b4[,1]) 144 | 145 | ### A Bayesian bootstrap analysis of lm coefficients ### 146 | 147 | # A custom function that returns the coefficients of 148 | # a weighted linear regression on the blood.flow data 149 | lm.coefs <- function(d, w) { 150 | coef( lm(efp ~ dye, data = d, weights = w) ) 151 | } 152 | 153 | b5 <- bayesboot(blood.flow, lm.coefs, R = 1000, use.weights = TRUE) 154 | 155 | # Plotting the marginal posteriors 156 | plot(b5) 157 | 158 | # Plotting a scatter of regression lines from the posterior 159 | plot(blood.flow) 160 | for(i in sample(nrow(b5), size = 20)) { 161 | abline(coef = b5[i, ], col = "grey") 162 | } 163 | } 164 | \references{ 165 | Miller, R. G. (1974) The jackknife - a review. \emph{Biometrika}, 166 | \bold{61(1)}, 1--15. 167 | 168 | Rubin, D. B. (1981). The Bayesian bootstrap. \emph{The annals of statistics}, 169 | \bold{9(1)}, 130--134. 170 | } 171 | -------------------------------------------------------------------------------- /man/figures/README-car_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/man/figures/README-car_plot-1.png -------------------------------------------------------------------------------- /man/figures/README-height_comparison-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/man/figures/README-height_comparison-1.png -------------------------------------------------------------------------------- /man/figures/README-president_summary-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/man/figures/README-president_summary-1.png -------------------------------------------------------------------------------- /man/figures/plotPost1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/man/figures/plotPost1.jpg -------------------------------------------------------------------------------- /man/figures/plotPost2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/man/figures/plotPost2.jpg -------------------------------------------------------------------------------- /man/plot.bayesboot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bayesboot.R 3 | \name{plot.bayesboot} 4 | \alias{plot.bayesboot} 5 | \title{Plot the result of \code{bayesboot}} 6 | \usage{ 7 | \method{plot}{bayesboot}(x, cred.mass = 0.95, plots.per.page = 3, cex = 1.2, cex.lab = 1.3, ...) 8 | } 9 | \arguments{ 10 | \item{x}{The bayesboot object to plot.} 11 | 12 | \item{cred.mass}{the probability mass to include in credible intervals, or 13 | NULL to suppress plotting of credible intervals.} 14 | 15 | \item{plots.per.page}{The maximum numbers of plots per page.} 16 | 17 | \item{cex, cex.lab, ...}{Further parameters passed on to 18 | \code{\link{plotPost}}.} 19 | } 20 | \description{ 21 | Produces histograms showing the marginal posterior distributions from a 22 | \code{bayesboot} call. Uses the \code{\link{plotPost}} function to produce 23 | the individual histograms. 24 | } 25 | -------------------------------------------------------------------------------- /man/plotPost.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotPost.R 3 | \name{plotPost} 4 | \alias{plotPost} 5 | \title{Graphic display of a posterior probability distribution} 6 | \usage{ 7 | plotPost( 8 | paramSampleVec, 9 | credMass = 0.95, 10 | compVal = NULL, 11 | ROPE = NULL, 12 | HDItextPlace = 0.7, 13 | showMode = FALSE, 14 | showCurve = FALSE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{paramSampleVec}{A vector of samples drawn from the target distribution.} 20 | 21 | \item{credMass}{the probability mass to include in credible intervals, or 22 | NULL to suppress plotting of credible intervals.} 23 | 24 | \item{compVal}{a value for comparison with those plotted.} 25 | 26 | \item{ROPE}{a two element vector, such as \code{c(-1, 1)}, specifying the 27 | limits of the Region Of Practical Equivalence.} 28 | 29 | \item{HDItextPlace}{a value in [0,1] that controls the horizontal position of 30 | the labels at the ends of the HDI bar.} 31 | 32 | \item{showMode}{logical: if TRUE, the mode is displayed instead of the mean.} 33 | 34 | \item{showCurve}{logical: if TRUE, the posterior density will be represented 35 | by a kernel density function instead of a histogram.} 36 | 37 | \item{\dots}{graphical parameters and the \code{breaks} parameter for the 38 | histogram.} 39 | } 40 | \value{ 41 | Returns an object of class \code{histogram} invisibly. Used for its 42 | plotting side-effect. 43 | } 44 | \description{ 45 | Plot the posterior probability distribution for a single parameter from a 46 | vector of samples, typically from an MCMC process, with appropriate summary 47 | statistics. 48 | } 49 | \details{ 50 | The data are plotted either as a histogram (above) or, if \code{showCurve = 51 | TRUE}, as a fitted kernel density curve (below). Either the mean or the mode 52 | of the distribution is displayed, depending on the parameter \code{showMode.} 53 | The Highest Density Interval (HDI) is shown as a horizontal bar, with labels 54 | for the ends of the interval. 55 | 56 | \if{html}{\figure{plotPost1.jpg} } 57 | \if{latex}{\figure{plotPost1.jpg}{options: width=5cm}} 58 | \cr 59 | \cr 60 | \if{html}{\figure{plotPost2.jpg} } 61 | \if{latex}{\figure{plotPost2.jpg}{options: width=5cm}} 62 | 63 | If values for a ROPE are supplied, these are shown as dark red vertical 64 | dashed lines, together with the percentage of probability mass within the 65 | ROPE. If a comparison value (\code{compVal}) is supplied, this is shown as a 66 | vertical green dotted line, together with the probability mass below and 67 | above this value. 68 | } 69 | \note{ 70 | The origin of this function is 71 | \href{https://cran.r-project.org/package=BEST}{the BEST 72 | package} which is based on Kruschke(2015, 2013). 73 | } 74 | \examples{ 75 | # Generate some data 76 | tst <- rnorm(1e5, 3, 1) 77 | plotPost(tst) 78 | plotPost(tst, col='wheat', border='magenta') 79 | plotPost(tst, credMass=0.8, ROPE=c(-1,1), xlab="Response variable") 80 | plotPost(tst, showMode=TRUE, showCurve=TRUE, compVal=5.5) 81 | 82 | # For integers: 83 | tst <- rpois(1e5, 12) 84 | plotPost(tst) 85 | 86 | # A severely bimodal distribution: 87 | tst2 <- c(rnorm(1e5), rnorm(5e4, 7)) 88 | plotPost(tst2) # A valid 95\% CrI, but not HDI 89 | plotPost(tst2, showCurve=TRUE) # Correct 95\% HDI 90 | } 91 | \references{ 92 | Kruschke, J. K. (2015) \emph{Doing Bayesian data analysis, second 93 | edition: A tutorial with R, JAGS, and Stan.} Waltham, MA: Academic Press / 94 | Elsevier. 95 | 96 | Kruschke, J. K. (2013) Bayesian estimation supersedes the t test. 97 | \emph{Journal of Experimental Psychology: General}, \bold{142(2)}, 573. 98 | } 99 | \seealso{ 100 | For details of the HDI calculation, see \code{\link[HDInterval]{hdi}}. 101 | } 102 | \author{ 103 | John Kruschke, modified by Mike Meredith 104 | } 105 | -------------------------------------------------------------------------------- /man/print.bayesboot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bayesboot.R 3 | \name{print.bayesboot} 4 | \alias{print.bayesboot} 5 | \title{Print the first number of draws from the Bayesian bootstrap} 6 | \usage{ 7 | \method{print}{bayesboot}(x, n = 10, ...) 8 | } 9 | \arguments{ 10 | \item{x}{The bayesboot object to print.} 11 | 12 | \item{n}{The number of draws to print.} 13 | 14 | \item{...}{Not used.} 15 | } 16 | \description{ 17 | Print the first number of draws from the Bayesian bootstrap 18 | } 19 | -------------------------------------------------------------------------------- /man/rudirichlet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bayesboot.R 3 | \name{rudirichlet} 4 | \alias{rudirichlet} 5 | \title{Produce random draws from a uniform Dirichlet distribution} 6 | \usage{ 7 | rudirichlet(n, d) 8 | } 9 | \arguments{ 10 | \item{n}{the number of draws.} 11 | 12 | \item{d}{the dimension of the Dirichlet distribution.} 13 | } 14 | \value{ 15 | An \code{n} by \code{d} matrix. 16 | } 17 | \description{ 18 | \code{rudirichlet} produces \code{n} draws from a \code{d}-dimensional 19 | uniform Dirichlet distribution. Here "uniform" implies that any combination 20 | of values on the support of the distribution is equally likely, that is, the 21 | \eqn{\alpha} parameters to the Dirichlet distribution are all set to 1.0. 22 | } 23 | \details{ 24 | In the context of the Bayesian bootstrap \code{rudirichlet} is used to 25 | produces the bootstrap weights. Therefore, \code{rudirichlet} can be used if 26 | you directly want to generate Bayesian bootstrap weights. 27 | } 28 | \examples{ 29 | set.seed(123) 30 | rudirichlet(2, 3) 31 | # Should produce the following matrix: 32 | # [,1] [,2] [,3] 33 | # [1,] 0.30681 0.2097 0.4834 34 | # [2,] 0.07811 0.1390 0.7829 35 | 36 | # The above could be seen as a sample of two Bayesian bootstrap weights for a 37 | # dataset of size three. 38 | } 39 | -------------------------------------------------------------------------------- /man/summary.bayesboot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bayesboot.R 3 | \name{summary.bayesboot} 4 | \alias{summary.bayesboot} 5 | \title{Summarize the result of \code{bayesboot}} 6 | \usage{ 7 | \method{summary}{bayesboot}(object, cred.mass = 0.95, ...) 8 | } 9 | \arguments{ 10 | \item{object}{The bayesboot object to summarize.} 11 | 12 | \item{cred.mass}{The probability mass to include in the highest density intervals.} 13 | 14 | \item{...}{Not used.} 15 | } 16 | \value{ 17 | A data frame with three columns: (1) \bold{statistic} the name of the 18 | statistic, (2) \bold{measure} the name of the summarizing measure, and (3) 19 | \bold{value} the value of the summarizing measure. 20 | } 21 | \description{ 22 | Summarizes the result of a call to \code{bayesboot} by calculating means, SDs, 23 | highest density intervals and quantiles of the posterior marginals. 24 | } 25 | \seealso{ 26 | \code{\link[HDInterval]{hdi}} in the HDInterval package for directly calculating 27 | highest density intervals from \code{bayesboot} objects. 28 | } 29 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | Sys.setenv("R_TESTS" = "") 2 | 3 | library(testthat) 4 | library(bayesboot) 5 | 6 | test_check("bayesboot") 7 | -------------------------------------------------------------------------------- /tests/testthat/Rplots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/tests/testthat/Rplots.pdf -------------------------------------------------------------------------------- /tests/testthat/test-bayesboot-deterministic.R: -------------------------------------------------------------------------------- 1 | context("Deterministic Bayesian bootstrap tests") 2 | library(doParallel) 3 | set.seed(123) 4 | 5 | test_that("rudirichlet produces a valid output", { 6 | rand_mat <- rudirichlet(10, 15) 7 | expect_true(all(rand_mat >= 0 & rand_mat <= 1)) 8 | expect_equivalent(rowSums(rand_mat), rep(1, 10)) 9 | }) 10 | 11 | # TODO: Why does this pass when using test(), but not when 12 | # checking the package? 13 | test_that("bayesboot produces a valid output", { 14 | x <- rnorm(10) 15 | 16 | b1 <- bayesboot(x, mean, R = 100, R2 = 90, use.weights = FALSE) 17 | expect_equal(class(b1), c("bayesboot", "data.frame")) 18 | expect_equal(nrow(b1), 100) 19 | expect_equal(ncol(b1), 1) 20 | 21 | b2 <- bayesboot(x, weighted.mean, R = 50, R2 = NULL, use.weights = TRUE) 22 | expect_equal(class(b2), c("bayesboot", "data.frame")) 23 | expect_equal(nrow(b2), 50) 24 | expect_equal(ncol(b2), 1) 25 | 26 | d <- data.frame(x = 1:10, y = rnorm(10)) 27 | 28 | boot_stat <- function(d) { 29 | coef(lm(y ~ x, data = d)) 30 | } 31 | b3 <- bayesboot(d, boot_stat, R = 75, R2 = 1000, use.weights = FALSE) 32 | expect_equal(class(b3), c("bayesboot", "data.frame")) 33 | expect_equal(nrow(b3), 75) 34 | expect_equal(ncol(b3), 2) 35 | 36 | boot_stat <- function(d, w) { 37 | coef(lm(y ~ x, data = d, weights = w)) 38 | } 39 | b4 <- bayesboot(d, boot_stat, R = 130, use.weights = TRUE) 40 | expect_equal(class(b4), c("bayesboot", "data.frame")) 41 | expect_equal(nrow(b4), 130) 42 | expect_equal(ncol(b4), 2) 43 | 44 | # A "stranger" bootstrap analysis with the data being chars. in a list. 45 | # And the statistc being the most common answer. 46 | data_list <- list("Yes", "Yes", "No", "Yes", "No", "Yes", "Maybe") 47 | boot_stat <- function(d) { 48 | t <- table(as.character(d)) 49 | c(most_common_answer = names(t)[ which.max(t)]) 50 | } 51 | b5 <- bayesboot(data_list, boot_stat, R = 50, R2 = 20) 52 | expect_equal(class(b5), c("bayesboot", "data.frame")) 53 | expect_equal(nrow(b5), 50) 54 | expect_equal(ncol(b5), 1) 55 | 56 | # Another strange bootstrap with a statistic that outputs NAs 57 | d <- data.frame(x = 1:15, y = c(1, 2, 3, 4, NA)) 58 | expect_warning({ 59 | b6 <- bayesboot(d, use.weights = TRUE, R = 100, function(d, w) { 60 | c(weighted.mean(d$x, w), weighted.mean(d$y, w)) 61 | }) 62 | }) 63 | 64 | expect_output(print(summary(b1)), ".") 65 | expect_output(print(summary(b2)), ".") 66 | expect_output(print(summary(b3)), ".") 67 | expect_output(print(summary(b4)), ".") 68 | expect_warning(summary(b5)) 69 | expect_true({ 70 | plot(b1) 71 | plot(b2) 72 | plot(b3) 73 | plot(b4) 74 | TRUE 75 | }) 76 | expect_warning(plot(b5)) 77 | }) 78 | 79 | test_that("bayesboot can do paralell processing", { 80 | library(doParallel) 81 | library(foreach) 82 | x <- rnorm(10) 83 | registerDoParallel(cores = 2) 84 | b1 <- bayesboot(x, mean, R = 1000, R2 = 1000, .parallel = TRUE) 85 | expect_equal(class(b1), c("bayesboot", "data.frame")) 86 | expect_equal(nrow(b1), 1000) 87 | expect_equal(ncol(b1), 1) 88 | stopImplicitCluster() 89 | registerDoParallel(cores = 1) 90 | stopImplicitCluster() 91 | 92 | }) 93 | 94 | -------------------------------------------------------------------------------- /tests/testthat/test-bayesboot-non-deterministic.R: -------------------------------------------------------------------------------- 1 | # This file contains some tests that are non-deterministic and that might 2 | # occasionally fail, even if everything is OK. But they shouldn't fail too often... 3 | 4 | context("Non-deterministic Bayesian bootstrap tests") 5 | 6 | x = runif(5) 7 | blood.flow <- data.frame( 8 | dye = c(1.15, 1.7, 1.42, 1.38, 2.80, 4.7, 4.8, 1.41, 3.9), 9 | efp = c(1.38, 1.72, 1.59, 1.47, 1.66, 3.45, 3.87, 1.31, 3.75)) 10 | 11 | test_that("The weight based and the resampling based bayesboot does the same thing", { 12 | skip_on_cran() #As these tests might occasionally fail. 13 | b1 <- bayesboot(x, mean, R = 10000, R2 = 1000) 14 | b2 <- bayesboot(x, weighted.mean, R = 10000) 15 | q1 <- quantile(b1$V1, c(0.1, 0.5, 0.9)) 16 | q2 <- quantile(b2$V1, c(0.1, 0.5, 0.9)) 17 | # Check that some quantiles are roughly the same 18 | expect_true(all(abs(q1 - q2) < 0.01)) 19 | }) 20 | 21 | test_that("bayesboot replicates the correlation anlalysis in Rubin (1981)", { 22 | skip_on_cran() #As these tests might occasionally fail. 23 | library(boot) 24 | target_q <- c("0.1" = 0.8962, "0.5" = 0.9519, "0.9" = 0.9788) 25 | b3 <- bayesboot(blood.flow, boot::corr, R = 10000, use.weights = TRUE) 26 | b4 <- bayesboot(blood.flow, function(x) { cor(x[,1], x[,2]) }, R = 10000, R2 = 1000) 27 | q3 <- quantile(b3$V1, c(0.1, 0.5, 0.9)) 28 | q4 <- quantile(b4$V1, c(0.1, 0.5, 0.9)) 29 | expect_true(all(abs(q3 - target_q) < 0.005)) 30 | expect_true(all(abs(q4 - target_q) < 0.005)) 31 | }) 32 | --------------------------------------------------------------------------------