├── .Rbuildignore
├── .gitignore
├── CRAN-SUBMISSION
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS
├── R
    ├── bayesboot.R
    └── plotPost.R
├── README.Rmd
├── README.md
├── bayesboot.Rproj
├── cran-comments.md
├── man
    ├── as.bayesboot.Rd
    ├── bayesboot.Rd
    ├── figures
    │   ├── README-car_plot-1.png
    │   ├── README-height_comparison-1.png
    │   ├── README-president_summary-1.png
    │   ├── plotPost1.jpg
    │   └── plotPost2.jpg
    ├── plot.bayesboot.Rd
    ├── plotPost.Rd
    ├── print.bayesboot.Rd
    ├── rudirichlet.Rd
    └── summary.bayesboot.Rd
└── tests
    ├── testthat.R
    └── testthat
        ├── Rplots.pdf
        ├── test-bayesboot-deterministic.R
        └── test-bayesboot-non-deterministic.R


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^README\.Rmd$
 4 | ^README-.*\.png$
 5 | ^testingground$
 6 | ^blogposts$
 7 | Rplots.pdf
 8 | cran-comments.md
 9 | ^README-files$
10 | ^CRAN-SUBMISSION$
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | testingground
5 | blogposts
6 | 


--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 0.2.3
2 | Date: 2025-03-30 22:01:24 UTC
3 | SHA: 0dbf2a95589f882cd4385b4c77a5823e84d8b81e
4 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bayesboot
 2 | Type: Package
 3 | Title: An Implementation of Rubin's (1981) Bayesian Bootstrap
 4 | Version: 0.2.3
 5 | Date: 2025-03-30
 6 | Authors@R: person("Rasmus", "Bååth", email = "rasmus.baath@gmail.com", role = c("aut", "cre"))
 7 | Description: Functions for performing the Bayesian bootstrap as introduced by
 8 |     Rubin (1981) <doi:10.1214/aos/1176345338> and for summarizing the result.
 9 |     The implementation can handle both summary statistics that works on a
10 |     weighted version of the data and summary statistics that works on a
11 |     resampled data set.
12 | License: MIT + file LICENSE
13 | URL: https://github.com/rasmusab/bayesboot
14 | BugReports: https://github.com/rasmusab/bayesboot/issues
15 | RoxygenNote: 7.3.2
16 | Imports: plyr (>= 1.8.3),
17 |   HDInterval(>= 0.1.1)
18 | Depends: R (>= 3.2.0)
19 | Suggests:
20 |   testthat,
21 |   foreach,
22 |   doParallel,
23 |   boot
24 | Encoding: UTF-8
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2016
2 | COPYRIGHT HOLDER: Rasmus Bååth
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(plot,bayesboot)
 4 | S3method(print,bayesboot)
 5 | S3method(print,summary.bayesboot)
 6 | S3method(summary,bayesboot)
 7 | export(as.bayesboot)
 8 | export(bayesboot)
 9 | export(plotPost)
10 | export(rudirichlet)
11 | import(grDevices)
12 | import(graphics)
13 | import(stats)
14 | import(utils)
15 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | bayesboot 0.2.3
 2 | ===================
 3 | 
 4 | * Fixed issue with  Rd \link{} targets missing package anchors.
 5 | 
 6 | bayesboot 0.2.2
 7 | ===================
 8 | 
 9 | * Fixed issue with unstated dependencies in the tests.
10 | 
11 | bayesboot 0.2.1
12 | =======================
13 | 
14 | * Modified the tests so that they are compatible with upcoming version of the testthat package.
15 | 
16 | bayesboot 0.2.0
17 | =======================
18 | 
19 | * Added print.bayesboot method.
20 | * bayesboot now warns when the posterior sample contains NAs, NaNs and NULLs.
21 | * plot.bayesboot now correctly resets graph parameters.
22 | * Removed bayesboot::hdi and instead depend on HDInterval::hdi .
23 | 
24 | 
25 | bayesboot 0.1.0
26 | =======================
27 | 
28 | * Initial release.
29 | 


--------------------------------------------------------------------------------
/R/bayesboot.R:
--------------------------------------------------------------------------------
  1 | # Importing some of the base packages which are used in the functions below.
  2 | #' @import stats
  3 | #' @import grDevices
  4 | #' @import graphics
  5 | #' @import utils
  6 | NULL
  7 | 
  8 | #' Produce random draws from a uniform Dirichlet distribution
  9 | #'
 10 | #' \code{rudirichlet} produces \code{n} draws from a \code{d}-dimensional
 11 | #' uniform Dirichlet distribution. Here "uniform" implies that any combination
 12 | #' of values on the support of the distribution is equally likely, that is, the
 13 | #' \eqn{\alpha} parameters to the Dirichlet distribution are all set to 1.0.
 14 | #'
 15 | #' In the context of the Bayesian bootstrap \code{rudirichlet} is used to
 16 | #' produces the bootstrap weights. Therefore, \code{rudirichlet} can be used if
 17 | #' you directly want to generate Bayesian bootstrap weights.
 18 | #'
 19 | #'
 20 | #' @param n the number of draws.
 21 | #' @param d the dimension of the Dirichlet distribution.
 22 | #'
 23 | #' @return An \code{n} by \code{d} matrix.
 24 | #' @export
 25 | #'
 26 | #' @examples
 27 | #' set.seed(123)
 28 | #' rudirichlet(2, 3)
 29 | #' # Should produce the following matrix:
 30 | #' #       [,1]   [,2]   [,3]
 31 | #' # [1,] 0.30681 0.2097 0.4834
 32 | #' # [2,] 0.07811 0.1390 0.7829
 33 | #'
 34 | #' # The above could be seen as a sample of two Bayesian bootstrap weights for a
 35 | #' # dataset of size three.
 36 | rudirichlet <- function(n, d) {
 37 |   # Using the facts that you can transform gamma distributed draws into
 38 |   # Dirichlet draws and that rgamma(n, 1) <==> rexp(n, 1)
 39 |   # See here for explanation:
 40 |   # https://en.wikipedia.org/wiki/Dirichlet_distribution#Random_number_generation#Gamma_distribution
 41 |   rexp.mat <- matrix( rexp(d * n, 1) , nrow = n, ncol = d)
 42 |   dirichlet.weights <- rexp.mat / rowSums(rexp.mat)
 43 |   dirichlet.weights
 44 | }
 45 | 
 46 | # From the examples on the help page for base::is.integer
 47 | is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
 48 |   abs(x - round(x)) < tol
 49 | }
 50 | 
 51 | 
 52 | #' The Bayesian bootstrap
 53 | #'
 54 | #' Performs a Bayesian bootstrap and returns a \code{data.frame} with a sample
 55 | #' of size \code{R} representing the posterior distribution of the (possibly
 56 | #' multivariate) summary \code{statistic}.
 57 | #'
 58 | #' The summary statistic is a function of the data that represents a feature of
 59 | #' interest, where a typical statistic is the mean. In \code{bayesboot} it is
 60 | #' most efficient to define the statistic as a function taking the data as the
 61 | #' first argument and a vector of weights as the second argument. An example of
 62 | #' such a function is \code{\link{weighted.mean}}. Indicate that you are using a
 63 | #' statistic defined in this way by setting \code{use.weights = TRUE}.
 64 | #'
 65 | #' It is also possible to define the statistic as a function only taking data
 66 | #' (and no weights) by having \code{use.weights = FALSE} (the default). This
 67 | #' will, for each of the \code{R} Bayesian bootstrap draws, give a resampled
 68 | #' version of the \code{data} of size \code{R2} to \code{statistic}. This will
 69 | #' be much slower than using \code{use.weights = TRUE} but will work with a
 70 | #' larger range of statistics (the \code{\link{median}}, for example)
 71 | #'
 72 | #' For more information regarding this implementation of the Bayesian bootstrap
 73 | #' see the blog post
 74 | #' \href{https://www.sumsar.net/blog/2015/07/easy-bayesian-bootstrap-in-r/}{Easy
 75 | #' Bayesian Bootstrap in R}. For more information about the model behind the
 76 | #' Bayesian bootstrap see the blog post
 77 | #' \href{https://www.sumsar.net/blog/2015/04/the-non-parametric-bootstrap-as-a-bayesian-model/}{The
 78 | #' Non-parametric Bootstrap as a Bayesian Model} and, of course,
 79 | #' \href{https://projecteuclid.org/euclid.aos/1176345338}{the original Bayesian
 80 | #' bootstrap paper by Rubin (1981)}.
 81 | #'
 82 | #' @note \itemize{
 83 | #' \item  While \code{R} and \code{R2} are set to \code{4000} by
 84 | #' default, that should not be taken to indicate that a sample of size 4000 is
 85 | #' sufficient nor recommended.
 86 | #'
 87 | #' \item When using \code{use.weights = FALSE} it is important to use a summary
 88 | #' statistic that does not depend on the sample size. That is, doubling the size
 89 | #' of a dataset by cloning data should result in the same statistic as when
 90 | #' using the original dataset. An example of a statistic that depends on the
 91 | #' sample size is the sample standard deviation (that is, \code{\link{sd}}), and
 92 | #' when using \code{bayesboot} it would make more sense to use the population
 93 | #' standard deviation (as in the example below). }
 94 | #'
 95 | #' @param data Either a vector or a list, or a matrix or a data.frame with one
 96 | #'   datapoint per row. The format of \code{data} should be compatible with the
 97 | #'   first argument of \code{statistic}
 98 | #' @param statistic A function implementing the summary statistic of interest
 99 | #'   where the first argument should take the data. If \code{use.weights = TRUE}
100 | #'   then the second argument should take a vector of weights.
101 | #' @param R The size of the posterior sample from the Bayesian bootstrap.
102 | #' @param R2 When \code{use.weights = FALSE} this is the size of the resample of
103 | #'   the data used to approximate the weighted statistic.
104 | #' @param use.weights When \code{TRUE} the data will be reweighted, like in the
105 | #'   original Bayesian bootstrap. When \code{FALSE} (the default) the
106 | #'   reweighting will be approximated by resampling the data.
107 | #' @param .progress The type of progress bar ("none", "text", "tk", and "win").
108 | #'   See the \code{.progress} argument to \code{\link[plyr]{adply}} in the plyr
109 | #'   package.
110 | #' @param .parallel If \code{TRUE} enables parallel processing. See the
111 | #'   \code{.parallel} argument to \code{\link[plyr]{adply}} in the plyr package.
112 | #' @param ... Other arguments passed on to \code{statistic}
113 | #'
114 | #' @return A \code{data.frame} with \code{R} rows, each row being a draw from
115 | #'   the posterior distribution of the Bayesian bootstrap. The number of columns
116 | #'   is decided by the length of the output from \code{statistic}. If
117 | #'   \code{statistic} does not return a vector or data frame with named values
118 | #'   then the columns will be given the names \code{V1}, \code{V2}, \code{V3},
119 | #'   etc. While the output is a \code{data.frame} it has subclass
120 | #'   \code{bayesboot} which enables specialized \code{\link{summary}} and
121 | #'   \code{\link{plot}} functions for the result of a \code{bayesboot} call.
122 | #'
123 | #' @examples
124 | #'
125 | #' ### A Bayesian bootstrap analysis of a mean ###
126 | #'
127 | #' # Heights of the last ten American presidents in cm (Kennedy to Obama).
128 | #' heights <- c(183, 192, 182, 183, 177, 185, 188, 188, 182, 185);
129 | #' b1 <- bayesboot(heights, mean)
130 | #' # But it's more efficient to use the a weighted statistic.
131 | #' b2 <- bayesboot(heights, weighted.mean, use.weights = TRUE)
132 | #'
133 | #' # The result of bayesboot can be plotted and summarized
134 | #' plot(b2)
135 | #' summary(b2)
136 | #'
137 | #' # It can also be easily post processed.
138 | #' # Here the probability that the mean is > 182 cm.
139 | #' mean( b2[,1] > 182)
140 | #'
141 | #' ### A Bayesian bootstrap analysis of a SD ###
142 | #'
143 | #' # When use.weights = FALSE it is important that the summary statistics
144 | #' # does not change as a function of sample size. This is the case with
145 | #' # the sample standard deviation, so here we have to implement a
146 | #' # function calculating the population standard deviation.
147 | #' pop.sd <- function(x) {
148 | #'   n <- length(x)
149 | #'   sd(x) * sqrt( (n - 1) / n)
150 | #' }
151 | #'
152 | #' b3 <- bayesboot(heights, pop.sd)
153 | #' summary(b3)
154 | #'
155 | #' ### A Bayesian bootstrap analysis of a correlation coefficient ###
156 | #'
157 | #' # Data comparing two methods of measuring blood flow.
158 | #' # From Table 1 in Miller (1974) and used in an example
159 | #' # by Rubin (1981, p. 132).
160 | #' blood.flow <- data.frame(
161 | #'   dye = c(1.15, 1.7, 1.42, 1.38, 2.80, 4.7, 4.8, 1.41, 3.9),
162 | #'   efp = c(1.38, 1.72, 1.59, 1.47, 1.66, 3.45, 3.87, 1.31, 3.75))
163 | #'
164 | #' # Using the weighted correlation (corr) from the boot package.
165 | #' library(boot)
166 | #' b4 <- bayesboot(blood.flow, corr, R = 1000, use.weights = TRUE)
167 | #' hist(b4[,1])
168 | #'
169 | #' ### A Bayesian bootstrap analysis of lm coefficients ###
170 | #'
171 | #' # A custom function that returns the coefficients of
172 | #' # a weighted linear regression on the blood.flow data
173 | #' lm.coefs <- function(d, w) {
174 | #'   coef( lm(efp ~ dye, data = d, weights = w) )
175 | #' }
176 | #'
177 | #' b5 <- bayesboot(blood.flow, lm.coefs, R = 1000, use.weights = TRUE)
178 | #'
179 | #' # Plotting the marginal posteriors
180 | #' plot(b5)
181 | #'
182 | #' # Plotting a scatter of regression lines from the posterior
183 | #' plot(blood.flow)
184 | #' for(i in sample(nrow(b5), size = 20)) {
185 | #'   abline(coef = b5[i, ], col = "grey")
186 | #' }
187 | #' @references
188 | #' Miller, R. G. (1974) The jackknife - a review. \emph{Biometrika},
189 | #' \bold{61(1)}, 1--15.
190 | #'
191 | #' Rubin, D. B. (1981). The Bayesian bootstrap. \emph{The annals of statistics},
192 | #' \bold{9(1)}, 130--134.
193 | #' @export
194 | bayesboot <- function(data, statistic, R = 4000, R2 = 4000, use.weights = FALSE,
195 |                       .progress = "none", .parallel = FALSE, ...) {
196 |   call <- match.call()
197 |   # Pick out the first part of statistic matching a legal variable name,
198 |   # just to be used as a label when plotting later.
199 |   statistic.label <- deparse(substitute(statistic))
200 |   match <- regexpr("^(\\w|\\.|_)*", statistic.label[1])
201 |   statistic.label <- regmatches(statistic.label[1], match)
202 | 
203 |   # Doing some error checks
204 |   if(length(R) != 1 || !is.wholenumber(R) || R < 1) {
205 |     stop("R should be a single whole number >= 1.")
206 |   }
207 | 
208 |   if ((NROW(data) == 0) || is.null(NROW(data))) {
209 |     stop("no data in call to 'bayesboot'")
210 |   }
211 | 
212 |   if(use.weights) {
213 |     if (length(formals(statistic)) < 2) {
214 |       stop("If use.weights == TRUE then statistic should take a weight vector as the second argument.")
215 |     }
216 |     w <- rep(1 / NROW(data), NROW(data))
217 |     tryCatch(stat.result <- statistic(data, w, ...),
218 |       error = function(e) {
219 |         message("Applying the statistic on the original data and with uniform weights resulted in an error")
220 |         stop(e)
221 |       }
222 |     )
223 |     # TODO: Should I add some more checks to stat.result? Like, that it contains no NA, values?
224 |     # or should I maybe do these tests to the final posterior samples and issue a varning if
225 |     # there are NAs, NULLs and similar?
226 | 
227 |   } else { # use.weights == FALSE
228 |     if(length(R2) != 1 || is.na(R2) || !is.wholenumber(R2) || R2 < 1) {
229 |       stop("If use.weights == FALSE then R2 should be a single whole number >= 1.")
230 |     }
231 |     tryCatch(stat.result <- statistic(data, ...),
232 |       error = function(e) {
233 |         message("Applying the statistic on the original data resulted in an error")
234 |         stop(e)
235 |       }
236 |     )
237 |   }
238 | 
239 |   if(! (is.atomic(stat.result) || is.data.frame(stat.result) || is.matrix(stat.result)) &&
240 |         NROW(stat.result) != 1) {
241 |     stop(paste(
242 |     "Applying the statistic on the original data should return a vector, or a",
243 |      "data.frame or matrix with one row, but an object with the following",
244 |      "structure was returned instead:\n",
245 |     paste(capture.output(str(stat.result)), collapse = "\n")
246 |     ))
247 |   }
248 | 
249 |   dirichlet.weights <- rudirichlet(R, NROW(data))
250 | 
251 |   if(use.weights) {
252 |     boot.sample <- plyr::adply(
253 |       dirichlet.weights, 1, .progress = .progress, .parallel = .parallel, .id = NULL,
254 |       .fun = function(weights) {
255 |         statistic(data, weights, ...)
256 |       }
257 |     )
258 | 
259 |   } else {
260 |     if(is.null(dim(data)) || length(dim(data)) < 2) { # data is a list type of object
261 |       boot.sample <- plyr::adply(
262 |         dirichlet.weights, 1, .progress = .progress, .parallel = .parallel, .id = NULL,
263 |         .fun = function(weights) {
264 |           data.sample <- sample(data, size = R2, replace = TRUE, prob = weights)
265 |           statistic(data.sample, ...)
266 |         }
267 |       )
268 |     } else { # assume data can be subsetted like a matrix or data.frame
269 |       boot.sample <- plyr::adply(
270 |         dirichlet.weights, 1, .progress = .progress, .parallel = .parallel, .id = NULL,
271 |         .fun = function(weights) {
272 |           index.sample <- sample(nrow(data), size = R2, replace = TRUE, prob = weights)
273 |           statistic(data[index.sample, ,drop = FALSE], ...)
274 |         }
275 |       )
276 |     }
277 |   }
278 |   class(boot.sample) <- c("bayesboot", class(boot.sample))
279 |   attr(boot.sample, "statistic.label") <- statistic.label
280 |   attr(boot.sample, "call") <- call
281 |   # Warn if boot.sample contains "non-values".
282 |   col.should.warn <- plyr::laply(boot.sample, function(boot.col) {
283 |     any(is.na(boot.col) |is.nan(boot.col) | is.null(boot.col))
284 |   })
285 |   if(any(col.should.warn)) {
286 |     warning(paste(
287 |       "The sample from bayesboot contains either NAs, NaNs or NULLs.",
288 |       "Make sure that your statistic function only return actual values."))
289 |   }
290 |   boot.sample
291 | }
292 | 
293 | 
294 | 
295 | #' Summarize the result of \code{bayesboot}
296 | #'
297 | #' Summarizes the result of a call to \code{bayesboot} by calculating means, SDs,
298 | #' highest density intervals and quantiles of the posterior marginals.
299 | #'
300 | #' @param object The bayesboot object to summarize.
301 | #' @param cred.mass The probability mass to include in the highest density intervals.
302 | #' @param ... Not used.
303 | #'
304 | #' @return A data frame with three columns: (1) \bold{statistic} the name of the
305 | #'   statistic, (2) \bold{measure} the name of the summarizing measure, and (3)
306 | #'   \bold{value} the value of the summarizing measure.
307 | #'
308 | #' @seealso \code{\link[HDInterval]{hdi}} in the HDInterval package for directly calculating
309 | #'   highest density intervals from \code{bayesboot} objects.
310 | #'
311 | #' @export
312 | summary.bayesboot <- function(object, cred.mass = 0.95, ...) {
313 |   bootsum <- plyr::ldply(seq_len(ncol(object)), function(i) {
314 |     s <- object[,i]
315 |     if(!is.numeric(s)) {
316 |       warning(paste("The statistic", names(object)[i] , "was skipped as",
317 |                     "summary.bayesboot can't handle non-numeric statistics."))
318 |       return(data.frame())
319 |     }
320 |     data.frame(statistic   = names(object)[i],
321 |                measure   = c("mean", "sd", "hdi.low", "hdi.high","q2.5%", "q25%", "median" ,"q75%", "q97.5%"),
322 |                value   = c(mean(s), sd(s), HDInterval::hdi(s, cred.mass), quantile(s, c(0.025, 0.25, 0.5, 0.75, 0.975))))
323 |   })
324 |   attr(bootsum, "statistic.label") <- attr(object, "statistic.label")
325 |   attr(bootsum, "call") <- attr(object, "call")
326 |   attr(bootsum, "R") <- nrow(object)
327 |   attr(bootsum, "cred.mass") <- cred.mass
328 |   class(bootsum) <- c("summary.bayesboot", class(bootsum))
329 |   bootsum
330 | }
331 | 
332 | #' Print the first number of draws from the Bayesian bootstrap
333 | #'
334 | #' @param x The bayesboot object to print.
335 | #' @param n The number of draws to print.
336 | #' @param ... Not used.
337 | #' @export
338 | print.bayesboot <- function(x, n = 10, ...) {
339 |   cat(paste0("The first ", n," draws (out of ", nrow(x) ,") from the Bayesian bootstrap:\n"))
340 |   cat("\n")
341 |   print(as.data.frame(head(x, n)))
342 |   cat(".. ...\n")
343 |   cat("\n")
344 |   cat("Use summary() to produce a summary of the posterior distribution.\n")
345 |   invisible(x)
346 | }
347 | 
348 | #' @method print summary.bayesboot
349 | #' @export
350 | print.summary.bayesboot <- function(x, ...) {
351 |   stat.table <- plyr::ddply(x, "statistic", function(s) {
352 |     stats <- s$value
353 |     names(stats) <- s$measure
354 |     stats
355 |   })
356 |   cat("Bayesian bootstrap\n")
357 |   cat("\n")
358 |   cat("Number of posterior draws:", attr(x, "R") , "\n")
359 |   cat("\n")
360 |   if(nrow(x) > 0) {
361 |     hdi.percentage <- paste0(round(attr(x, "cred.mass") * 100), "%")
362 |     cat("Summary of the posterior (with", hdi.percentage,"Highest Density Intervals):\n")
363 |     print(stat.table[,c("statistic","mean", "sd", "hdi.low", "hdi.high")], row.names = FALSE)
364 |     cat("\n")
365 |     cat("Quantiles:\n")
366 |     print(stat.table[,c("statistic", "q2.5%", "q25%", "median" ,"q75%", "q97.5%")], row.names = FALSE)
367 |     cat("\n")
368 |   }
369 |   cat("Call:\n", format(attr(x, "call")))
370 |   cat("\n")
371 |   invisible(x)
372 | }
373 | 
374 | #' Coerce to a \code{bayesboot} object
375 | #'
376 | #' This converts an object into a data frame and adds the class
377 | #' \code{bayesboot}. Doing this is only useful in the case you would want to use
378 | #' the \code{plot} and \code{summary} methods for \code{bayesboot} objects.
379 | #'
380 | #' @param object Any object that can be converted to a data frame.
381 | #'
382 | #' @return A \code{data.frame} with subclass \code{bayesboot}.
383 | #' @export
384 | as.bayesboot <- function(object) {
385 |   object <- as.data.frame(object)
386 |   class(object) <- c("bayesboot", class(object))
387 |   if(is.null(attr(object, "statistic.label"))) {
388 |     attr(object, "statistic.label") <- ""
389 |   }
390 |   if(is.null(attr(object, "call"))) {
391 |     attr(object, "call") <- ""
392 |   }
393 |   object
394 | }
395 | 
396 | #' Plot the result of \code{bayesboot}
397 | #'
398 | #' Produces histograms showing the marginal posterior distributions from a
399 | #' \code{bayesboot} call. Uses the \code{\link{plotPost}} function to produce
400 | #' the individual histograms.
401 | #'
402 | #' @param x The bayesboot object to plot.
403 | #' @param cred.mass the probability mass to include in credible intervals, or
404 | #'   NULL to suppress plotting of credible intervals.
405 | #' @param plots.per.page The maximum numbers of plots per page.
406 | #' @param cex,cex.lab,... Further parameters passed on to
407 | #'   \code{\link{plotPost}}.
408 | #'
409 | #' @export
410 | plot.bayesboot <- function(x, cred.mass = 0.95, plots.per.page = 3, cex = 1.2, cex.lab=1.3, ...) {
411 |   old.devAskNewPage <- devAskNewPage()
412 |   old.par <- par(mfrow = c(min(plots.per.page, ncol(x)) , 1) , mar = c(4.1, 4.1, 0.5, 4.1), mgp = c(2.5, 1, 0))
413 |   on.exit({ # revert graphical parameters
414 |     par(old.par)
415 |     devAskNewPage(old.devAskNewPage)
416 |   })
417 |   n.plots <- 0
418 |   for(i in seq_len(ncol(x))) {
419 |     if(!is.numeric(x[, i])) {
420 |       warning(paste("The statistic", names(x)[i] , "was skipped as",
421 |                     "plot.bayesboot can't handle non-numeric statistics."))
422 |       next
423 |     }
424 |     n.plots <- n.plots + 1
425 |     if(n.plots > plots.per.page) {
426 |       devAskNewPage(TRUE)
427 |     }
428 |     if(ncol(x) == 1 && names(x)[i] == "V1" && attr(x, "statistic.label") != "") {
429 |       # There is only one statistic and it has an uninformative default name
430 |       # so use the begining of the function call instead as a statistic, unless
431 |       # it is empty.
432 |       statistic_name <- attr(x, "statistic.label")
433 |     } else { # use the column name
434 |       statistic_name <- names(x)[i]
435 |     }
436 |     plotPost(x[, i], credMass = cred.mass, xlab = statistic_name, cex = cex, cex.lab = cex.lab, ...)
437 |   }
438 |   invisible(NULL)
439 | }
440 | 
441 | 
442 | 


--------------------------------------------------------------------------------
/R/plotPost.R:
--------------------------------------------------------------------------------
  1 | #' Graphic display of a posterior probability distribution
  2 | #'
  3 | #' Plot the posterior probability distribution for a single parameter from a
  4 | #' vector of samples, typically from an MCMC process, with appropriate summary
  5 | #' statistics.
  6 | #'
  7 | #' The data are plotted either as a histogram (above) or, if \code{showCurve =
  8 | #' TRUE}, as a fitted kernel density curve (below). Either the mean or the mode
  9 | #' of the distribution is displayed, depending on the parameter \code{showMode.}
 10 | #' The Highest Density Interval (HDI) is shown as a horizontal bar, with labels
 11 | #' for the ends of the interval.
 12 | #'
 13 | #' \if{html}{\figure{plotPost1.jpg} }
 14 | #' \if{latex}{\figure{plotPost1.jpg}{options: width=5cm}}
 15 | #' \cr
 16 | #' \cr
 17 | #' \if{html}{\figure{plotPost2.jpg} }
 18 | #' \if{latex}{\figure{plotPost2.jpg}{options: width=5cm}}
 19 | #'
 20 | #' If values for a ROPE are supplied, these are shown as dark red vertical
 21 | #' dashed lines, together with the percentage of probability mass within the
 22 | #' ROPE. If a comparison value (\code{compVal}) is supplied, this is shown as a
 23 | #' vertical green dotted line, together with the probability mass below and
 24 | #' above this value.
 25 | #'
 26 | #' @param paramSampleVec A vector of samples drawn from the target distribution.
 27 | #' @param credMass the probability mass to include in credible intervals, or
 28 | #'   NULL to suppress plotting of credible intervals.
 29 | #' @param compVal a value for comparison with those plotted.
 30 | #' @param ROPE a two element vector, such as \code{c(-1, 1)}, specifying the
 31 | #'   limits of the Region Of Practical Equivalence.
 32 | #' @param HDItextPlace a value in [0,1] that controls the horizontal position of
 33 | #'   the labels at the ends of the HDI bar.
 34 | #' @param showMode logical: if TRUE, the mode is displayed instead of the mean.
 35 | #' @param showCurve logical: if TRUE, the posterior density will be represented
 36 | #'   by a kernel density function instead of a histogram.
 37 | #' @param \dots graphical parameters and the \code{breaks} parameter for the
 38 | #'   histogram.
 39 | #' @return Returns an object of class \code{histogram} invisibly. Used for its
 40 | #'   plotting side-effect.
 41 | #'
 42 | #' @note The origin of this function is
 43 | #'   \href{https://cran.r-project.org/package=BEST}{the BEST
 44 | #'   package} which is based on Kruschke(2015, 2013).
 45 | #'
 46 | #' @author John Kruschke, modified by Mike Meredith
 47 | #' @seealso For details of the HDI calculation, see \code{\link[HDInterval]{hdi}}.
 48 | #' @examples
 49 | #' # Generate some data
 50 | #' tst <- rnorm(1e5, 3, 1)
 51 | #' plotPost(tst)
 52 | #' plotPost(tst, col='wheat', border='magenta')
 53 | #' plotPost(tst, credMass=0.8, ROPE=c(-1,1), xlab="Response variable")
 54 | #' plotPost(tst, showMode=TRUE, showCurve=TRUE, compVal=5.5)
 55 | #'
 56 | #' # For integers:
 57 | #' tst <- rpois(1e5, 12)
 58 | #' plotPost(tst)
 59 | #'
 60 | #' # A severely bimodal distribution:
 61 | #' tst2 <- c(rnorm(1e5), rnorm(5e4, 7))
 62 | #' plotPost(tst2)                  # A valid 95% CrI, but not HDI
 63 | #' plotPost(tst2, showCurve=TRUE)  # Correct 95% HDI
 64 | #' @references
 65 | #' Kruschke, J. K. (2015) \emph{Doing Bayesian data analysis, second
 66 | #'   edition: A tutorial with R, JAGS, and Stan.} Waltham, MA: Academic Press /
 67 | #'   Elsevier.
 68 | #'
 69 | #' Kruschke, J. K. (2013) Bayesian estimation supersedes the t test.
 70 | #'   \emph{Journal of Experimental Psychology: General}, \bold{142(2)}, 573.
 71 | #' @export
 72 | plotPost <-
 73 |   function(paramSampleVec, credMass = 0.95, compVal = NULL, ROPE = NULL,
 74 |            HDItextPlace = 0.7, showMode = FALSE, showCurve = FALSE, ...) {
 75 | 
 76 |   # Does a plot for a single parameter. Called by plot.BEST but also exported.
 77 |   # Returns a histogram object invisibly.
 78 |   # This stuff should be in the ... argument:
 79 |   #   yaxt="n", ylab="", xlab="Parameter", main="", cex.lab=1.5, cex=1.4,
 80 |   #   xlim=range(compVal, paramSampleVec), col="skyblue", border="white",
 81 |   #   breaks=NULL
 82 | 
 83 |   # Deal with ... argument:
 84 |   dots <- list(...)
 85 |   if(length(dots) == 1 && is.list(dots[[1]]))
 86 |     dots <- dots[[1]]
 87 |   defaultArgs <- list(xlab=deparse(substitute(paramSampleVec)),
 88 |     yaxt="n", ylab="", main="", cex.lab=1.5,
 89 |     cex=1.4, col="skyblue", border="white", bty="n", lwd=5, freq=FALSE,
 90 |     xlim=range(compVal, HDInterval::hdi(paramSampleVec, 0.99)))
 91 |   useArgs <- modifyList(defaultArgs, dots)
 92 | 
 93 |   # Get breaks argument
 94 |   breaks <- dots$breaks
 95 |   if (is.null(breaks)) {
 96 |     if (all(paramSampleVec == round(paramSampleVec))) { # all integers
 97 |       breaks <- seq(min(paramSampleVec), max(paramSampleVec) + 1) - 0.5
 98 |     } else {
 99 |       by <- diff(HDInterval::hdi(paramSampleVec))/18
100 |       breaks <- unique(c( seq( from=min(paramSampleVec), to=max(paramSampleVec),
101 |                      by=by), max(paramSampleVec) ))
102 |     }
103 |   }
104 |   histinfo <- hist(paramSampleVec, breaks=breaks, plot=FALSE)
105 |   histinfo$xname <- useArgs$xlab
106 | 
107 |   oldpar <- par(xpd=TRUE) ; on.exit(par(oldpar))
108 | 
109 |   if (showCurve) {
110 |     densCurve <- density( paramSampleVec, adjust=2 )
111 |     selPlot <- names(useArgs) %in%
112 |       c(names(as.list(args(plot.default))), names(par(no.readonly=TRUE)))
113 |     plotArgs <- useArgs[selPlot]
114 |     plotArgs$x <- densCurve$x
115 |     plotArgs$y <- densCurve$y
116 |     plotArgs$type <- "l"
117 |     plotArgs$xpd <- FALSE
118 |     do.call(plot, plotArgs, quote=TRUE)
119 |     abline(h=0, col='grey', xpd=FALSE)
120 |     # Display the HDI.
121 |     if(!is.null(credMass)) {
122 |       HDI <- HDInterval::hdi(densCurve, credMass, allowSplit=TRUE)
123 |       ht <- attr(HDI, "height")
124 |       segments(HDI[, 1], ht, HDI[, 2], ht, lwd=4, lend='butt')
125 |       segments(HDI, 0, HDI, ht, lty=2)
126 |       text( mean(HDI), ht, bquote(.(100*credMass) * "% HDI" ),
127 |             adj=c(.5,-1.7), cex=useArgs$cex )
128 |       text( HDI, ht, bquote(.(signif(HDI, 3))),
129 |             pos=3, cex=useArgs$cex )
130 |     }
131 |   } else {
132 |     plot.histogram.args.names <- c("freq", "density", "angle", "border",
133 |       "main", "sub", "xlab", "ylab", "xlim", "ylim", "axes", "labels",
134 |       "add") # plot.histogram not exported, so need to cheat!
135 |     selPlot <- names(useArgs) %in%
136 |       c(plot.histogram.args.names, names(par(no.readonly=TRUE)))
137 |     plotArgs <- useArgs[selPlot]
138 |     plotArgs$lwd <- 1
139 |     plotArgs$x <- histinfo
140 |     do.call(plot, plotArgs, quote=TRUE)
141 |     # Display the HDI.
142 |     if(!is.null(credMass)) {
143 |       HDI <- HDInterval::hdi( paramSampleVec, credMass )
144 |       lines(HDI, c(0,0), lwd=4, lend='butt')
145 |       text( mean(HDI), 0, bquote(.(100*credMass) * "% HDI" ),
146 |             adj=c(.5,-1.7), cex=useArgs$cex )
147 |       text( HDI[1], 0, bquote(.(signif(HDI[1],3))),
148 |             adj=c(HDItextPlace,-0.5), cex=useArgs$cex )
149 |       text( HDI[2], 0, bquote(.(signif(HDI[2],3))),
150 |             adj=c(1.0-HDItextPlace,-0.5), cex=useArgs$cex )
151 |     }
152 |   }
153 | 
154 | 
155 |   # Display mean or mode:
156 |   cenTendHt <- 0.9 * max(histinfo$density)
157 |   if ( showMode==FALSE ) {
158 |       meanParam <- mean( paramSampleVec )
159 |       text( meanParam, cenTendHt,
160 |             bquote(mean==.(signif(meanParam,3))), adj=c(.5,0), cex=useArgs$cex )
161 |   } else {
162 |       dres <- density( paramSampleVec )
163 |       modeParam <- dres$x[which.max(dres$y)]
164 |       text( modeParam, cenTendHt,
165 |             bquote(mode==.(signif(modeParam,3))), adj=c(.5,0), cex=useArgs$cex )
166 |   }
167 |   # Display the comparison value.
168 |   if ( !is.null( compVal ) ) {
169 |     cvHt <- 0.7 * max(histinfo$density)
170 |     cvCol <- "darkgreen"
171 |     pcgtCompVal <- round( 100 * sum( paramSampleVec > compVal )
172 |                           / length( paramSampleVec ) , 1 )
173 |      pcltCompVal <- 100 - pcgtCompVal
174 |      lines( c(compVal,compVal), c(0.96*cvHt,0),
175 |             lty="dotted", lwd=1, col=cvCol )
176 |      text( compVal, cvHt,
177 |            bquote( .(pcltCompVal)*"% < " *
178 |                    .(signif(compVal,3)) * " < "*.(pcgtCompVal)*"%" ),
179 |            adj=c(pcltCompVal/100,0), cex=0.8*useArgs$cex, col=cvCol )
180 |   }
181 |   # Display the ROPE.
182 |   if ( !is.null( ROPE ) ) {
183 |     ROPEtextHt <- 0.55 * max(histinfo$density)
184 |     ropeCol <- "darkred"
185 |      pcInROPE <- ( sum( paramSampleVec > ROPE[1] & paramSampleVec < ROPE[2] )
186 |                           / length( paramSampleVec ) )
187 |      lines( c(ROPE[1],ROPE[1]), c(0.96*ROPEtextHt,0), lty="dotted", lwd=2,
188 |             col=ropeCol )
189 |      lines( c(ROPE[2],ROPE[2]), c(0.96*ROPEtextHt,0), lty="dotted", lwd=2,
190 |             col=ropeCol)
191 |      text( mean(ROPE), ROPEtextHt,
192 |            bquote( .(round(100*pcInROPE))*"% in ROPE" ),
193 |            adj=c(.5,0), cex=1, col=ropeCol )
194 |   }
195 | 
196 |   return(invisible(histinfo))
197 | }
198 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output:
  3 |   md_document:
  4 |     variant: markdown_github
  5 | ---
  6 | 
  7 | ```{r, echo = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "##",
 11 |   fig.path = "man/figures/README-"
 12 | )
 13 | set.seed(123)
 14 | ```
 15 | 
 16 | `bayesboot`: Easy Bayesian Bootstrap in R
 17 | ========================================
 18 | 
 19 | The `bayesboot` package implements a function `bayesboot` that performs the Bayesian bootstrap introduced by Rubin (1981). The implementation can both handle summary statistics that works on a weighted version of the data or that works on a resampled data set. 
 20 | 
 21 | `bayesboot` is available on CRAN and can be installed in the usual way:
 22 | 
 23 | ```{r eval=FALSE}
 24 | install.packages("bayesboot")
 25 | ```
 26 | 
 27 | A simple example
 28 | ---------------------
 29 | 
 30 | Here is a Bayesian bootstrap analysis of the mean height of the last ten American presidents:
 31 | 
 32 | ```{r}
 33 | # Heights of the last ten American presidents in cm (Kennedy to Obama).
 34 | heights <- c(183, 192, 182, 183, 177, 185, 188, 188, 182, 185)
 35 | 
 36 | library(bayesboot)
 37 | b1 <- bayesboot(heights, mean)
 38 | ```
 39 | 
 40 | The resulting posterior distribution in `b1` can now be `plot`ted and `summary`ized:
 41 | 
 42 | ```{r president_summary, fig.height= 4, fig.width=5}
 43 | summary(b1)
 44 | plot(b1)
 45 | ```
 46 | 
 47 | While it is possible to use a summary statistic that works on a resample of the original data, it is more efficient if it's possible to use a summary statistic that works on a *reweighting* of the original dataset. Instead of using `mean` above it would be better to use `weighted.mean` like this:
 48 | 
 49 | ```{r}
 50 | b2 <- bayesboot(heights, weighted.mean, use.weights = TRUE)
 51 | ```
 52 | 
 53 | The result of a call to `bayesboot` will always result in a `data.frame` with one column per dimension of the summary statistic. If the summary statistic does not return a named vector the columns will be called `V1`, `V2`, etc. The result of a `bayesboot` call can be further inspected and post processed. For example:
 54 | 
 55 | ```{r}
 56 | # Given the model and the data, this is the probability that the mean
 57 | # heights of American presidents is above the mean heights of
 58 | # American males as given by www.cdc.gov/nchs/data/series/sr_11/sr11_252.pdf
 59 | mean(c(b2[,1] > 175.9, TRUE, FALSE))
 60 | ```
 61 | 
 62 | ### Comparing two groups
 63 | 
 64 | If we want to compare the means of two groups, we will have to call `bayesboot` twice with each dataset and then use the resulting samples to calculate the posterior difference. For example, let's say we have the heights of the opponents that lost to the presidents in `height` the first time those presidents were elected. Now we are interested in comparing the mean height of American presidents with the mean height of presidential candidates that lost.
 65 | 
 66 | ```{r height_comparison, fig.height= 4, fig.width=5}
 67 | # The heights of oponents of American presidents (first time they were elected).
 68 | # From Richard Nixon to John McCain
 69 | heights_opponents <- c(182, 180, 180, 183, 177, 173, 188, 185, 175)
 70 | 
 71 | # Running the Bayesian bootstrap for both datasets
 72 | b_presidents <- bayesboot(heights, weighted.mean, use.weights = TRUE)
 73 | b_opponents  <- bayesboot(heights_opponents, weighted.mean, use.weights = TRUE)
 74 | 
 75 | # Calculating the posterior difference
 76 | # (here converting back to a bayesboot object for pretty plotting)
 77 | b_diff <- as.bayesboot(b_presidents - b_opponents)
 78 | plot(b_diff)
 79 | ```
 80 | 
 81 | So there is some evidence that loosing opponents could be shorter. (Though, I must add that it is quite unclear what the purpose really is with analyzing the heights of presidents and opponents...)
 82 | 
 83 | A more advanced example
 84 | ----------------------------
 85 | 
 86 | A slightly more complicated example, where we do Bayesian bootstrap analysis of LOESS regression applied to the `cars` dataset on the speed of cars and the resulting distance it takes to stop. The `loess` function returns, among other things, a vector of `fitted` *y* values, one value for each *x* value in the data. These *y* values define the smoothed LOESS line and is what you would usually plot after having fitted a LOESS. Now we want to use the Bayesian bootstrap to gauge the uncertainty in the LOESS line. As the `loess` function accepts weighted data, we'll simply create a function that takes the data with weights and returns the `fitted` *y* values. We'll then plug that function into `bayesboot`:
 87 | 
 88 | ```{r}
 89 | boot_fn <- function(cars, weights) {
 90 |   loess(dist ~ speed, cars, weights = weights)$fitted
 91 | }
 92 | 
 93 | bb_loess <- bayesboot(cars, boot_fn, use.weights = TRUE)
 94 | ```
 95 | 
 96 | To plot this takes a couple of lines more:
 97 | 
 98 | ```{r car_plot}
 99 | # Plotting the data
100 | plot(cars$speed, cars$dist, pch = 20, col = "tomato4", xlab = "Car speed in mph", 
101 |      ylab = "Stopping distance in ft", main = "Speed and Stopping distances of Cars")
102 | 
103 | # Plotting a scatter of Bootstrapped LOESS lines to represent the uncertainty.
104 | for(i in sample(nrow(bb_loess), 20)) {
105 |   lines(cars$speed, bb_loess[i,], col = "gray")
106 | }
107 | # Finally plotting the posterior mean LOESS line
108 | lines(cars$speed, colMeans(bb_loess, na.rm = TRUE), type ="l",
109 |       col = "tomato", lwd = 4)
110 | ```
111 | 
112 | 
113 | More information
114 | -----------------------
115 | 
116 | For more information on the Bayesian bootstrap see [Rubin's (1981) original paper](https://projecteuclid.org/euclid.aos/1176345338) and my blog post [The Non-parametric Bootstrap as a Bayesian Model](https://www.sumsar.net/blog/2015/04/the-non-parametric-bootstrap-as-a-bayesian-model/). The implementation of `bayesboot` is similar to the function outlined in the blog post [Easy Bayesian Bootstrap in R](https://www.sumsar.net/blog/2015/07/easy-bayesian-bootstrap-in-r/), but the interface is slightly different.
117 | 
118 | References
119 | ----------------
120 | 
121 | Rubin, D. B. (1981). The Bayesian bootstrap. *The annals of statistics*, 9(1), 130--134. [link to paper](https://projecteuclid.org/euclid.aos/1176345338)
122 | 
123 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # `bayesboot`: Easy Bayesian Bootstrap in R
  2 | 
  3 | The `bayesboot` package implements a function `bayesboot` that performs
  4 | the Bayesian bootstrap introduced by Rubin (1981). The implementation
  5 | can both handle summary statistics that works on a weighted version of
  6 | the data or that works on a resampled data set.
  7 | 
  8 | `bayesboot` is available on CRAN and can be installed in the usual way:
  9 | 
 10 | ``` r
 11 | install.packages("bayesboot")
 12 | ```
 13 | 
 14 | ## A simple example
 15 | 
 16 | Here is a Bayesian bootstrap analysis of the mean height of the last ten
 17 | American presidents:
 18 | 
 19 | ``` r
 20 | # Heights of the last ten American presidents in cm (Kennedy to Obama).
 21 | heights <- c(183, 192, 182, 183, 177, 185, 188, 188, 182, 185)
 22 | 
 23 | library(bayesboot)
 24 | b1 <- bayesboot(heights, mean)
 25 | ```
 26 | 
 27 | The resulting posterior distribution in `b1` can now be `plot`ted and
 28 | `summary`ized:
 29 | 
 30 | ``` r
 31 | summary(b1)
 32 | ## Bayesian bootstrap
 33 | ## 
 34 | ## Number of posterior draws: 4000 
 35 | ## 
 36 | ## Summary of the posterior (with 95% Highest Density Intervals):
 37 | ##  statistic     mean       sd  hdi.low hdi.high
 38 | ##         V1 184.4979 1.154456 182.3328  186.909
 39 | ## 
 40 | ## Quantiles:
 41 | ##  statistic    q2.5%    q25%   median     q75%   q97.5%
 42 | ##         V1 182.2621 183.742 184.4664 185.2378 186.8737
 43 | ## 
 44 | ## Call:
 45 | ##  bayesboot(data = heights, statistic = mean)
 46 | plot(b1)
 47 | ```
 48 | 
 49 | ![](man/figures/README-president_summary-1.png)
 50 | 
 51 | While it is possible to use a summary statistic that works on a resample
 52 | of the original data, it is more efficient if it’s possible to use a
 53 | summary statistic that works on a *reweighting* of the original dataset.
 54 | Instead of using `mean` above it would be better to use `weighted.mean`
 55 | like this:
 56 | 
 57 | ``` r
 58 | b2 <- bayesboot(heights, weighted.mean, use.weights = TRUE)
 59 | ```
 60 | 
 61 | The result of a call to `bayesboot` will always result in a `data.frame`
 62 | with one column per dimension of the summary statistic. If the summary
 63 | statistic does not return a named vector the columns will be called
 64 | `V1`, `V2`, etc. The result of a `bayesboot` call can be further
 65 | inspected and post processed. For example:
 66 | 
 67 | ``` r
 68 | # Given the model and the data, this is the probability that the mean
 69 | # heights of American presidents is above the mean heights of
 70 | # American males as given by www.cdc.gov/nchs/data/series/sr_11/sr11_252.pdf
 71 | mean(c(b2[,1] > 175.9, TRUE, FALSE))
 72 | ## [1] 0.9997501
 73 | ```
 74 | 
 75 | ### Comparing two groups
 76 | 
 77 | If we want to compare the means of two groups, we will have to call
 78 | `bayesboot` twice with each dataset and then use the resulting samples
 79 | to calculate the posterior difference. For example, let’s say we have
 80 | the heights of the opponents that lost to the presidents in `height` the
 81 | first time those presidents were elected. Now we are interested in
 82 | comparing the mean height of American presidents with the mean height of
 83 | presidential candidates that lost.
 84 | 
 85 | ``` r
 86 | # The heights of oponents of American presidents (first time they were elected).
 87 | # From Richard Nixon to John McCain
 88 | heights_opponents <- c(182, 180, 180, 183, 177, 173, 188, 185, 175)
 89 | 
 90 | # Running the Bayesian bootstrap for both datasets
 91 | b_presidents <- bayesboot(heights, weighted.mean, use.weights = TRUE)
 92 | b_opponents  <- bayesboot(heights_opponents, weighted.mean, use.weights = TRUE)
 93 | 
 94 | # Calculating the posterior difference
 95 | # (here converting back to a bayesboot object for pretty plotting)
 96 | b_diff <- as.bayesboot(b_presidents - b_opponents)
 97 | plot(b_diff)
 98 | ```
 99 | 
100 | ![](man/figures/README-height_comparison-1.png)
101 | 
102 | So there is some evidence that loosing opponents could be shorter.
103 | (Though, I must add that it is quite unclear what the purpose really is
104 | with analyzing the heights of presidents and opponents…)
105 | 
106 | ## A more advanced example
107 | 
108 | A slightly more complicated example, where we do Bayesian bootstrap
109 | analysis of LOESS regression applied to the `cars` dataset on the speed
110 | of cars and the resulting distance it takes to stop. The `loess`
111 | function returns, among other things, a vector of `fitted` *y* values,
112 | one value for each *x* value in the data. These *y* values define the
113 | smoothed LOESS line and is what you would usually plot after having
114 | fitted a LOESS. Now we want to use the Bayesian bootstrap to gauge the
115 | uncertainty in the LOESS line. As the `loess` function accepts weighted
116 | data, we’ll simply create a function that takes the data with weights
117 | and returns the `fitted` *y* values. We’ll then plug that function into
118 | `bayesboot`:
119 | 
120 | ``` r
121 | boot_fn <- function(cars, weights) {
122 |   loess(dist ~ speed, cars, weights = weights)$fitted
123 | }
124 | 
125 | bb_loess <- bayesboot(cars, boot_fn, use.weights = TRUE)
126 | ```
127 | 
128 | To plot this takes a couple of lines more:
129 | 
130 | ``` r
131 | # Plotting the data
132 | plot(cars$speed, cars$dist, pch = 20, col = "tomato4", xlab = "Car speed in mph", 
133 |      ylab = "Stopping distance in ft", main = "Speed and Stopping distances of Cars")
134 | 
135 | # Plotting a scatter of Bootstrapped LOESS lines to represent the uncertainty.
136 | for(i in sample(nrow(bb_loess), 20)) {
137 |   lines(cars$speed, bb_loess[i,], col = "gray")
138 | }
139 | # Finally plotting the posterior mean LOESS line
140 | lines(cars$speed, colMeans(bb_loess, na.rm = TRUE), type ="l",
141 |       col = "tomato", lwd = 4)
142 | ```
143 | 
144 | ![](man/figures/README-car_plot-1.png)
145 | 
146 | ## More information
147 | 
148 | For more information on the Bayesian bootstrap see [Rubin’s (1981)
149 | original paper](https://projecteuclid.org/euclid.aos/1176345338) and my
150 | blog post [The Non-parametric Bootstrap as a Bayesian
151 | Model](https://www.sumsar.net/blog/2015/04/the-non-parametric-bootstrap-as-a-bayesian-model/).
152 | The implementation of `bayesboot` is similar to the function outlined in
153 | the blog post [Easy Bayesian Bootstrap in
154 | R](https://www.sumsar.net/blog/2015/07/easy-bayesian-bootstrap-in-r/),
155 | but the interface is slightly different.
156 | 
157 | ## References
158 | 
159 | Rubin, D. B. (1981). The Bayesian bootstrap. *The annals of statistics*,
160 | 9(1), 130–134. [link to
161 | paper](https://projecteuclid.org/euclid.aos/1176345338)
162 | 


--------------------------------------------------------------------------------
/bayesboot.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageCheckArgs: --as-cran
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## R CMD check results
 2 | 
 3 | 0 errors | 0 warnings | 0 notes
 4 | 
 5 | Note: This previous NOTE, has now been resolved:  
 6 | 
 7 | > Please provide package anchors for all Rd \link{} targets not in the
 8 | >   package itself and the base packages. 
 9 | 
10 | ## Downstream dependencies
11 | 
12 | This package currently has no downstream dependencies
13 | 


--------------------------------------------------------------------------------
/man/as.bayesboot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bayesboot.R
 3 | \name{as.bayesboot}
 4 | \alias{as.bayesboot}
 5 | \title{Coerce to a \code{bayesboot} object}
 6 | \usage{
 7 | as.bayesboot(object)
 8 | }
 9 | \arguments{
10 | \item{object}{Any object that can be converted to a data frame.}
11 | }
12 | \value{
13 | A \code{data.frame} with subclass \code{bayesboot}.
14 | }
15 | \description{
16 | This converts an object into a data frame and adds the class
17 | \code{bayesboot}. Doing this is only useful in the case you would want to use
18 | the \code{plot} and \code{summary} methods for \code{bayesboot} objects.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/bayesboot.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/bayesboot.R
  3 | \name{bayesboot}
  4 | \alias{bayesboot}
  5 | \title{The Bayesian bootstrap}
  6 | \usage{
  7 | bayesboot(
  8 |   data,
  9 |   statistic,
 10 |   R = 4000,
 11 |   R2 = 4000,
 12 |   use.weights = FALSE,
 13 |   .progress = "none",
 14 |   .parallel = FALSE,
 15 |   ...
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{data}{Either a vector or a list, or a matrix or a data.frame with one
 20 | datapoint per row. The format of \code{data} should be compatible with the
 21 | first argument of \code{statistic}}
 22 | 
 23 | \item{statistic}{A function implementing the summary statistic of interest
 24 | where the first argument should take the data. If \code{use.weights = TRUE}
 25 | then the second argument should take a vector of weights.}
 26 | 
 27 | \item{R}{The size of the posterior sample from the Bayesian bootstrap.}
 28 | 
 29 | \item{R2}{When \code{use.weights = FALSE} this is the size of the resample of
 30 | the data used to approximate the weighted statistic.}
 31 | 
 32 | \item{use.weights}{When \code{TRUE} the data will be reweighted, like in the
 33 | original Bayesian bootstrap. When \code{FALSE} (the default) the
 34 | reweighting will be approximated by resampling the data.}
 35 | 
 36 | \item{.progress}{The type of progress bar ("none", "text", "tk", and "win").
 37 | See the \code{.progress} argument to \code{\link[plyr]{adply}} in the plyr
 38 | package.}
 39 | 
 40 | \item{.parallel}{If \code{TRUE} enables parallel processing. See the
 41 | \code{.parallel} argument to \code{\link[plyr]{adply}} in the plyr package.}
 42 | 
 43 | \item{...}{Other arguments passed on to \code{statistic}}
 44 | }
 45 | \value{
 46 | A \code{data.frame} with \code{R} rows, each row being a draw from
 47 |   the posterior distribution of the Bayesian bootstrap. The number of columns
 48 |   is decided by the length of the output from \code{statistic}. If
 49 |   \code{statistic} does not return a vector or data frame with named values
 50 |   then the columns will be given the names \code{V1}, \code{V2}, \code{V3},
 51 |   etc. While the output is a \code{data.frame} it has subclass
 52 |   \code{bayesboot} which enables specialized \code{\link{summary}} and
 53 |   \code{\link{plot}} functions for the result of a \code{bayesboot} call.
 54 | }
 55 | \description{
 56 | Performs a Bayesian bootstrap and returns a \code{data.frame} with a sample
 57 | of size \code{R} representing the posterior distribution of the (possibly
 58 | multivariate) summary \code{statistic}.
 59 | }
 60 | \details{
 61 | The summary statistic is a function of the data that represents a feature of
 62 | interest, where a typical statistic is the mean. In \code{bayesboot} it is
 63 | most efficient to define the statistic as a function taking the data as the
 64 | first argument and a vector of weights as the second argument. An example of
 65 | such a function is \code{\link{weighted.mean}}. Indicate that you are using a
 66 | statistic defined in this way by setting \code{use.weights = TRUE}.
 67 | 
 68 | It is also possible to define the statistic as a function only taking data
 69 | (and no weights) by having \code{use.weights = FALSE} (the default). This
 70 | will, for each of the \code{R} Bayesian bootstrap draws, give a resampled
 71 | version of the \code{data} of size \code{R2} to \code{statistic}. This will
 72 | be much slower than using \code{use.weights = TRUE} but will work with a
 73 | larger range of statistics (the \code{\link{median}}, for example)
 74 | 
 75 | For more information regarding this implementation of the Bayesian bootstrap
 76 | see the blog post
 77 | \href{https://www.sumsar.net/blog/2015/07/easy-bayesian-bootstrap-in-r/}{Easy
 78 | Bayesian Bootstrap in R}. For more information about the model behind the
 79 | Bayesian bootstrap see the blog post
 80 | \href{https://www.sumsar.net/blog/2015/04/the-non-parametric-bootstrap-as-a-bayesian-model/}{The
 81 | Non-parametric Bootstrap as a Bayesian Model} and, of course,
 82 | \href{https://projecteuclid.org/euclid.aos/1176345338}{the original Bayesian
 83 | bootstrap paper by Rubin (1981)}.
 84 | }
 85 | \note{
 86 | \itemize{
 87 | \item  While \code{R} and \code{R2} are set to \code{4000} by
 88 | default, that should not be taken to indicate that a sample of size 4000 is
 89 | sufficient nor recommended.
 90 | 
 91 | \item When using \code{use.weights = FALSE} it is important to use a summary
 92 | statistic that does not depend on the sample size. That is, doubling the size
 93 | of a dataset by cloning data should result in the same statistic as when
 94 | using the original dataset. An example of a statistic that depends on the
 95 | sample size is the sample standard deviation (that is, \code{\link{sd}}), and
 96 | when using \code{bayesboot} it would make more sense to use the population
 97 | standard deviation (as in the example below). }
 98 | }
 99 | \examples{
100 | 
101 | ### A Bayesian bootstrap analysis of a mean ###
102 | 
103 | # Heights of the last ten American presidents in cm (Kennedy to Obama).
104 | heights <- c(183, 192, 182, 183, 177, 185, 188, 188, 182, 185);
105 | b1 <- bayesboot(heights, mean)
106 | # But it's more efficient to use the a weighted statistic.
107 | b2 <- bayesboot(heights, weighted.mean, use.weights = TRUE)
108 | 
109 | # The result of bayesboot can be plotted and summarized
110 | plot(b2)
111 | summary(b2)
112 | 
113 | # It can also be easily post processed.
114 | # Here the probability that the mean is > 182 cm.
115 | mean( b2[,1] > 182)
116 | 
117 | ### A Bayesian bootstrap analysis of a SD ###
118 | 
119 | # When use.weights = FALSE it is important that the summary statistics
120 | # does not change as a function of sample size. This is the case with
121 | # the sample standard deviation, so here we have to implement a
122 | # function calculating the population standard deviation.
123 | pop.sd <- function(x) {
124 |   n <- length(x)
125 |   sd(x) * sqrt( (n - 1) / n)
126 | }
127 | 
128 | b3 <- bayesboot(heights, pop.sd)
129 | summary(b3)
130 | 
131 | ### A Bayesian bootstrap analysis of a correlation coefficient ###
132 | 
133 | # Data comparing two methods of measuring blood flow.
134 | # From Table 1 in Miller (1974) and used in an example
135 | # by Rubin (1981, p. 132).
136 | blood.flow <- data.frame(
137 |   dye = c(1.15, 1.7, 1.42, 1.38, 2.80, 4.7, 4.8, 1.41, 3.9),
138 |   efp = c(1.38, 1.72, 1.59, 1.47, 1.66, 3.45, 3.87, 1.31, 3.75))
139 | 
140 | # Using the weighted correlation (corr) from the boot package.
141 | library(boot)
142 | b4 <- bayesboot(blood.flow, corr, R = 1000, use.weights = TRUE)
143 | hist(b4[,1])
144 | 
145 | ### A Bayesian bootstrap analysis of lm coefficients ###
146 | 
147 | # A custom function that returns the coefficients of
148 | # a weighted linear regression on the blood.flow data
149 | lm.coefs <- function(d, w) {
150 |   coef( lm(efp ~ dye, data = d, weights = w) )
151 | }
152 | 
153 | b5 <- bayesboot(blood.flow, lm.coefs, R = 1000, use.weights = TRUE)
154 | 
155 | # Plotting the marginal posteriors
156 | plot(b5)
157 | 
158 | # Plotting a scatter of regression lines from the posterior
159 | plot(blood.flow)
160 | for(i in sample(nrow(b5), size = 20)) {
161 |   abline(coef = b5[i, ], col = "grey")
162 | }
163 | }
164 | \references{
165 | Miller, R. G. (1974) The jackknife - a review. \emph{Biometrika},
166 | \bold{61(1)}, 1--15.
167 | 
168 | Rubin, D. B. (1981). The Bayesian bootstrap. \emph{The annals of statistics},
169 | \bold{9(1)}, 130--134.
170 | }
171 | 


--------------------------------------------------------------------------------
/man/figures/README-car_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/man/figures/README-car_plot-1.png


--------------------------------------------------------------------------------
/man/figures/README-height_comparison-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/man/figures/README-height_comparison-1.png


--------------------------------------------------------------------------------
/man/figures/README-president_summary-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/man/figures/README-president_summary-1.png


--------------------------------------------------------------------------------
/man/figures/plotPost1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/man/figures/plotPost1.jpg


--------------------------------------------------------------------------------
/man/figures/plotPost2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/man/figures/plotPost2.jpg


--------------------------------------------------------------------------------
/man/plot.bayesboot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bayesboot.R
 3 | \name{plot.bayesboot}
 4 | \alias{plot.bayesboot}
 5 | \title{Plot the result of \code{bayesboot}}
 6 | \usage{
 7 | \method{plot}{bayesboot}(x, cred.mass = 0.95, plots.per.page = 3, cex = 1.2, cex.lab = 1.3, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{The bayesboot object to plot.}
11 | 
12 | \item{cred.mass}{the probability mass to include in credible intervals, or
13 | NULL to suppress plotting of credible intervals.}
14 | 
15 | \item{plots.per.page}{The maximum numbers of plots per page.}
16 | 
17 | \item{cex, cex.lab, ...}{Further parameters passed on to
18 | \code{\link{plotPost}}.}
19 | }
20 | \description{
21 | Produces histograms showing the marginal posterior distributions from a
22 | \code{bayesboot} call. Uses the \code{\link{plotPost}} function to produce
23 | the individual histograms.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/plotPost.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/plotPost.R
  3 | \name{plotPost}
  4 | \alias{plotPost}
  5 | \title{Graphic display of a posterior probability distribution}
  6 | \usage{
  7 | plotPost(
  8 |   paramSampleVec,
  9 |   credMass = 0.95,
 10 |   compVal = NULL,
 11 |   ROPE = NULL,
 12 |   HDItextPlace = 0.7,
 13 |   showMode = FALSE,
 14 |   showCurve = FALSE,
 15 |   ...
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{paramSampleVec}{A vector of samples drawn from the target distribution.}
 20 | 
 21 | \item{credMass}{the probability mass to include in credible intervals, or
 22 | NULL to suppress plotting of credible intervals.}
 23 | 
 24 | \item{compVal}{a value for comparison with those plotted.}
 25 | 
 26 | \item{ROPE}{a two element vector, such as \code{c(-1, 1)}, specifying the
 27 | limits of the Region Of Practical Equivalence.}
 28 | 
 29 | \item{HDItextPlace}{a value in [0,1] that controls the horizontal position of
 30 | the labels at the ends of the HDI bar.}
 31 | 
 32 | \item{showMode}{logical: if TRUE, the mode is displayed instead of the mean.}
 33 | 
 34 | \item{showCurve}{logical: if TRUE, the posterior density will be represented
 35 | by a kernel density function instead of a histogram.}
 36 | 
 37 | \item{\dots}{graphical parameters and the \code{breaks} parameter for the
 38 | histogram.}
 39 | }
 40 | \value{
 41 | Returns an object of class \code{histogram} invisibly. Used for its
 42 |   plotting side-effect.
 43 | }
 44 | \description{
 45 | Plot the posterior probability distribution for a single parameter from a
 46 | vector of samples, typically from an MCMC process, with appropriate summary
 47 | statistics.
 48 | }
 49 | \details{
 50 | The data are plotted either as a histogram (above) or, if \code{showCurve =
 51 | TRUE}, as a fitted kernel density curve (below). Either the mean or the mode
 52 | of the distribution is displayed, depending on the parameter \code{showMode.}
 53 | The Highest Density Interval (HDI) is shown as a horizontal bar, with labels
 54 | for the ends of the interval.
 55 | 
 56 | \if{html}{\figure{plotPost1.jpg} }
 57 | \if{latex}{\figure{plotPost1.jpg}{options: width=5cm}}
 58 | \cr
 59 | \cr
 60 | \if{html}{\figure{plotPost2.jpg} }
 61 | \if{latex}{\figure{plotPost2.jpg}{options: width=5cm}}
 62 | 
 63 | If values for a ROPE are supplied, these are shown as dark red vertical
 64 | dashed lines, together with the percentage of probability mass within the
 65 | ROPE. If a comparison value (\code{compVal}) is supplied, this is shown as a
 66 | vertical green dotted line, together with the probability mass below and
 67 | above this value.
 68 | }
 69 | \note{
 70 | The origin of this function is
 71 |   \href{https://cran.r-project.org/package=BEST}{the BEST
 72 |   package} which is based on Kruschke(2015, 2013).
 73 | }
 74 | \examples{
 75 | # Generate some data
 76 | tst <- rnorm(1e5, 3, 1)
 77 | plotPost(tst)
 78 | plotPost(tst, col='wheat', border='magenta')
 79 | plotPost(tst, credMass=0.8, ROPE=c(-1,1), xlab="Response variable")
 80 | plotPost(tst, showMode=TRUE, showCurve=TRUE, compVal=5.5)
 81 | 
 82 | # For integers:
 83 | tst <- rpois(1e5, 12)
 84 | plotPost(tst)
 85 | 
 86 | # A severely bimodal distribution:
 87 | tst2 <- c(rnorm(1e5), rnorm(5e4, 7))
 88 | plotPost(tst2)                  # A valid 95\% CrI, but not HDI
 89 | plotPost(tst2, showCurve=TRUE)  # Correct 95\% HDI
 90 | }
 91 | \references{
 92 | Kruschke, J. K. (2015) \emph{Doing Bayesian data analysis, second
 93 |   edition: A tutorial with R, JAGS, and Stan.} Waltham, MA: Academic Press /
 94 |   Elsevier.
 95 | 
 96 | Kruschke, J. K. (2013) Bayesian estimation supersedes the t test.
 97 |   \emph{Journal of Experimental Psychology: General}, \bold{142(2)}, 573.
 98 | }
 99 | \seealso{
100 | For details of the HDI calculation, see \code{\link[HDInterval]{hdi}}.
101 | }
102 | \author{
103 | John Kruschke, modified by Mike Meredith
104 | }
105 | 


--------------------------------------------------------------------------------
/man/print.bayesboot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bayesboot.R
 3 | \name{print.bayesboot}
 4 | \alias{print.bayesboot}
 5 | \title{Print the first number of draws from the Bayesian bootstrap}
 6 | \usage{
 7 | \method{print}{bayesboot}(x, n = 10, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{The bayesboot object to print.}
11 | 
12 | \item{n}{The number of draws to print.}
13 | 
14 | \item{...}{Not used.}
15 | }
16 | \description{
17 | Print the first number of draws from the Bayesian bootstrap
18 | }
19 | 


--------------------------------------------------------------------------------
/man/rudirichlet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bayesboot.R
 3 | \name{rudirichlet}
 4 | \alias{rudirichlet}
 5 | \title{Produce random draws from a uniform Dirichlet distribution}
 6 | \usage{
 7 | rudirichlet(n, d)
 8 | }
 9 | \arguments{
10 | \item{n}{the number of draws.}
11 | 
12 | \item{d}{the dimension of the Dirichlet distribution.}
13 | }
14 | \value{
15 | An \code{n} by \code{d} matrix.
16 | }
17 | \description{
18 | \code{rudirichlet} produces \code{n} draws from a \code{d}-dimensional
19 | uniform Dirichlet distribution. Here "uniform" implies that any combination
20 | of values on the support of the distribution is equally likely, that is, the
21 | \eqn{\alpha} parameters to the Dirichlet distribution are all set to 1.0.
22 | }
23 | \details{
24 | In the context of the Bayesian bootstrap \code{rudirichlet} is used to
25 | produces the bootstrap weights. Therefore, \code{rudirichlet} can be used if
26 | you directly want to generate Bayesian bootstrap weights.
27 | }
28 | \examples{
29 | set.seed(123)
30 | rudirichlet(2, 3)
31 | # Should produce the following matrix:
32 | #       [,1]   [,2]   [,3]
33 | # [1,] 0.30681 0.2097 0.4834
34 | # [2,] 0.07811 0.1390 0.7829
35 | 
36 | # The above could be seen as a sample of two Bayesian bootstrap weights for a
37 | # dataset of size three.
38 | }
39 | 


--------------------------------------------------------------------------------
/man/summary.bayesboot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bayesboot.R
 3 | \name{summary.bayesboot}
 4 | \alias{summary.bayesboot}
 5 | \title{Summarize the result of \code{bayesboot}}
 6 | \usage{
 7 | \method{summary}{bayesboot}(object, cred.mass = 0.95, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{The bayesboot object to summarize.}
11 | 
12 | \item{cred.mass}{The probability mass to include in the highest density intervals.}
13 | 
14 | \item{...}{Not used.}
15 | }
16 | \value{
17 | A data frame with three columns: (1) \bold{statistic} the name of the
18 |   statistic, (2) \bold{measure} the name of the summarizing measure, and (3)
19 |   \bold{value} the value of the summarizing measure.
20 | }
21 | \description{
22 | Summarizes the result of a call to \code{bayesboot} by calculating means, SDs,
23 | highest density intervals and quantiles of the posterior marginals.
24 | }
25 | \seealso{
26 | \code{\link[HDInterval]{hdi}} in the HDInterval package for directly calculating
27 |   highest density intervals from \code{bayesboot} objects.
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | Sys.setenv("R_TESTS" = "")
2 | 
3 | library(testthat)
4 | library(bayesboot)
5 | 
6 | test_check("bayesboot")
7 | 


--------------------------------------------------------------------------------
/tests/testthat/Rplots.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasmusab/bayesboot/514fec369f21468be5476c251c7d4d381f92357f/tests/testthat/Rplots.pdf


--------------------------------------------------------------------------------
/tests/testthat/test-bayesboot-deterministic.R:
--------------------------------------------------------------------------------
 1 | context("Deterministic Bayesian bootstrap tests")
 2 | library(doParallel)
 3 | set.seed(123)
 4 | 
 5 | test_that("rudirichlet produces a valid output", {
 6 |   rand_mat <- rudirichlet(10, 15)
 7 |   expect_true(all(rand_mat >= 0 & rand_mat <= 1))
 8 |   expect_equivalent(rowSums(rand_mat), rep(1, 10))
 9 | })
10 | 
11 | # TODO: Why does this pass when using test(), but not when
12 | # checking the package?
13 | test_that("bayesboot produces a valid output", {
14 |   x <- rnorm(10)
15 | 
16 |   b1 <- bayesboot(x, mean, R = 100, R2 = 90, use.weights = FALSE)
17 |   expect_equal(class(b1), c("bayesboot", "data.frame"))
18 |   expect_equal(nrow(b1), 100)
19 |   expect_equal(ncol(b1), 1)
20 | 
21 |   b2 <- bayesboot(x, weighted.mean, R = 50, R2 = NULL, use.weights = TRUE)
22 |   expect_equal(class(b2), c("bayesboot", "data.frame"))
23 |   expect_equal(nrow(b2), 50)
24 |   expect_equal(ncol(b2), 1)
25 | 
26 |   d <- data.frame(x = 1:10, y = rnorm(10))
27 | 
28 |   boot_stat <- function(d) {
29 |     coef(lm(y ~ x, data = d))
30 |   }
31 |   b3 <- bayesboot(d, boot_stat, R = 75, R2 = 1000, use.weights = FALSE)
32 |   expect_equal(class(b3), c("bayesboot", "data.frame"))
33 |   expect_equal(nrow(b3), 75)
34 |   expect_equal(ncol(b3), 2)
35 | 
36 |   boot_stat <- function(d, w) {
37 |     coef(lm(y ~ x, data = d, weights = w))
38 |   }
39 |   b4 <- bayesboot(d, boot_stat, R = 130, use.weights = TRUE)
40 |   expect_equal(class(b4), c("bayesboot", "data.frame"))
41 |   expect_equal(nrow(b4), 130)
42 |   expect_equal(ncol(b4), 2)
43 | 
44 |   # A "stranger" bootstrap analysis with the data being chars. in a list.
45 |   # And the statistc being the most common answer.
46 |   data_list <- list("Yes", "Yes", "No", "Yes", "No", "Yes", "Maybe")
47 |   boot_stat <- function(d) {
48 |     t <- table(as.character(d))
49 |     c(most_common_answer = names(t)[ which.max(t)])
50 |   }
51 |   b5 <- bayesboot(data_list, boot_stat, R = 50, R2 = 20)
52 |   expect_equal(class(b5), c("bayesboot", "data.frame"))
53 |   expect_equal(nrow(b5), 50)
54 |   expect_equal(ncol(b5), 1)
55 | 
56 |   # Another strange bootstrap with a statistic that outputs NAs
57 |   d <- data.frame(x = 1:15, y = c(1, 2, 3, 4, NA))
58 |   expect_warning({
59 |     b6 <- bayesboot(d, use.weights = TRUE, R = 100, function(d, w) {
60 |       c(weighted.mean(d$x, w), weighted.mean(d$y, w))
61 |     })
62 |   })
63 | 
64 |   expect_output(print(summary(b1)), ".")
65 |   expect_output(print(summary(b2)), ".")
66 |   expect_output(print(summary(b3)), ".")
67 |   expect_output(print(summary(b4)), ".")
68 |   expect_warning(summary(b5))
69 |   expect_true({
70 |     plot(b1)
71 |     plot(b2)
72 |     plot(b3)
73 |     plot(b4)
74 |     TRUE
75 |   })
76 |   expect_warning(plot(b5))
77 | })
78 | 
79 | test_that("bayesboot can do paralell processing", {
80 |   library(doParallel)
81 |   library(foreach)
82 |   x <- rnorm(10)
83 |   registerDoParallel(cores = 2)
84 |   b1 <- bayesboot(x, mean, R = 1000, R2 = 1000, .parallel = TRUE)
85 |   expect_equal(class(b1), c("bayesboot", "data.frame"))
86 |   expect_equal(nrow(b1), 1000)
87 |   expect_equal(ncol(b1), 1)
88 |   stopImplicitCluster()
89 |   registerDoParallel(cores = 1)
90 |   stopImplicitCluster()
91 | 
92 | })
93 | 
94 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bayesboot-non-deterministic.R:
--------------------------------------------------------------------------------
 1 | # This file contains some tests that are non-deterministic and that might
 2 | # occasionally fail, even if everything is OK. But they shouldn't fail too often...
 3 | 
 4 | context("Non-deterministic Bayesian bootstrap tests")
 5 | 
 6 | x = runif(5)
 7 | blood.flow <- data.frame(
 8 |   dye = c(1.15, 1.7, 1.42, 1.38, 2.80, 4.7, 4.8, 1.41, 3.9),
 9 |   efp = c(1.38, 1.72, 1.59, 1.47, 1.66, 3.45, 3.87, 1.31, 3.75))
10 | 
11 | test_that("The weight based and the resampling based bayesboot does the same thing", {
12 |   skip_on_cran() #As these tests might occasionally fail.
13 |   b1 <- bayesboot(x, mean, R = 10000, R2 = 1000)
14 |   b2 <- bayesboot(x, weighted.mean, R = 10000)
15 |   q1 <- quantile(b1$V1, c(0.1, 0.5, 0.9))
16 |   q2 <- quantile(b2$V1, c(0.1, 0.5, 0.9))
17 |   # Check that some quantiles are roughly the same
18 |   expect_true(all(abs(q1 - q2) < 0.01))
19 | })
20 | 
21 | test_that("bayesboot replicates the correlation anlalysis in Rubin (1981)", {
22 |   skip_on_cran() #As these tests might occasionally fail.
23 |   library(boot)
24 |   target_q <- c("0.1" = 0.8962, "0.5" = 0.9519, "0.9" = 0.9788)
25 |   b3 <- bayesboot(blood.flow, boot::corr, R = 10000, use.weights = TRUE)
26 |   b4 <- bayesboot(blood.flow, function(x) { cor(x[,1], x[,2]) }, R = 10000, R2 = 1000)
27 |   q3 <- quantile(b3$V1, c(0.1, 0.5, 0.9))
28 |   q4 <- quantile(b4$V1, c(0.1, 0.5, 0.9))
29 |   expect_true(all(abs(q3 - target_q) < 0.005))
30 |   expect_true(all(abs(q4 - target_q) < 0.005))
31 | })
32 | 


--------------------------------------------------------------------------------